1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 /* This file implements randomized texture blit tests. */
8
9 #include "si_pipe.h"
10 #include "util/rand_xor.h"
11 #include "util/u_surface.h"
12 #include "amd/addrlib/inc/addrtypes.h"
13
14 static uint64_t seed_xorshift128plus[2];
15
16 #define RAND_NUM_SIZE 8
17
18 /* The GPU blits are emulated on the CPU using these CPU textures. */
19
20 struct cpu_texture {
21 uint8_t *ptr;
22 uint64_t size;
23 uint64_t layer_stride;
24 unsigned stride;
25 };
26
alloc_cpu_texture(struct cpu_texture * tex,struct pipe_resource * templ,unsigned level)27 static void alloc_cpu_texture(struct cpu_texture *tex, struct pipe_resource *templ, unsigned level)
28 {
29 unsigned width = u_minify(templ->width0, level);
30 unsigned height = u_minify(templ->height0, level);
31
32 tex->stride = align(util_format_get_stride(templ->format, width), RAND_NUM_SIZE);
33 tex->layer_stride = util_format_get_2d_size(templ->format, tex->stride, height);
34 tex->size = tex->layer_stride * util_num_layers(templ, level);
35 tex->ptr = malloc(tex->size);
36 assert(tex->ptr);
37 }
38
set_random_pixels(struct pipe_context * ctx,struct pipe_resource * tex,struct cpu_texture * cpu,unsigned level)39 static void set_random_pixels(struct pipe_context *ctx, struct pipe_resource *tex,
40 struct cpu_texture *cpu, unsigned level)
41 {
42 struct pipe_transfer *t;
43 uint8_t *map;
44 int x, y, z;
45 unsigned width = u_minify(tex->width0, level);
46 unsigned height = u_minify(tex->height0, level);
47 unsigned num_y_blocks = util_format_get_nblocksy(tex->format, height);
48 unsigned num_layers = util_num_layers(tex, level);
49
50 map = pipe_texture_map_3d(ctx, tex, level, PIPE_MAP_WRITE, 0, 0, 0, width, height,
51 num_layers, &t);
52 assert(map);
53
54 for (z = 0; z < num_layers; z++) {
55 for (y = 0; y < num_y_blocks; y++) {
56 uint64_t *ptr = (uint64_t *)(map + t->layer_stride * z + t->stride * y);
57 uint64_t *ptr_cpu = (uint64_t *)(cpu->ptr + cpu->layer_stride * z + cpu->stride * y);
58 unsigned size = cpu->stride / RAND_NUM_SIZE;
59
60 assert(t->stride % RAND_NUM_SIZE == 0);
61 assert(cpu->stride % RAND_NUM_SIZE == 0);
62
63 for (x = 0; x < size; x++) {
64 *ptr++ = *ptr_cpu++ = rand_xorshift128plus(seed_xorshift128plus);
65 }
66 }
67 }
68
69 pipe_texture_unmap(ctx, t);
70 }
71
set_random_pixels_for_2_textures(struct pipe_context * ctx,struct pipe_resource * tex1,struct pipe_resource * tex2)72 static void set_random_pixels_for_2_textures(struct pipe_context *ctx, struct pipe_resource *tex1,
73 struct pipe_resource *tex2)
74 {
75 /* tex1 and tex2 are assumed to be the same size, format, and layout */
76 for (unsigned level = 0; level <= tex1->last_level; level++) {
77 for (unsigned sample = 0; sample < MAX2(tex1->nr_samples, 1); sample++) {
78 struct pipe_transfer *t1, *t2;
79 uint8_t *map1, *map2;
80 int x, y, z;
81 unsigned width = align(u_minify(tex1->width0, level), util_format_get_blockwidth(tex1->format));
82 unsigned height = align(u_minify(tex1->height0, level), util_format_get_blockheight(tex1->format));
83 unsigned num_y_blocks = util_format_get_nblocksy(tex1->format, height);
84 unsigned num_layers = util_num_layers(tex1, level);
85 /* If we set level to sample + 1, we will only upload that sample instead of
86 * overwriting all samples.
87 */
88 unsigned level_or_sample = tex1->nr_samples > 1 ? sample + 1 : level;
89
90 map1 = pipe_texture_map_3d(ctx, tex1, level_or_sample, PIPE_MAP_WRITE, 0, 0, 0, width, height,
91 num_layers, &t1);
92 map2 = pipe_texture_map_3d(ctx, tex2, level_or_sample, PIPE_MAP_WRITE, 0, 0, 0, width, height,
93 num_layers, &t2);
94 assert(map1 && map2);
95 assert(t1->stride == t2->stride);
96
97 for (z = 0; z < num_layers; z++) {
98 for (y = 0; y < num_y_blocks; y++) {
99 uint64_t *ptr1 = (uint64_t *)(map1 + t1->layer_stride * z + t1->stride * y);
100 uint64_t *ptr2 = (uint64_t *)(map2 + t2->layer_stride * z + t2->stride * y);
101 unsigned size = t1->stride / 8;
102
103 assert(t1->stride % 8 == 0);
104 assert(t2->stride % 8 == 0);
105
106 for (x = 0; x < size; x++) {
107 *ptr1++ = *ptr2++ = rand_xorshift128plus(seed_xorshift128plus);
108 }
109 }
110 }
111
112 pipe_texture_unmap(ctx, t1);
113 pipe_texture_unmap(ctx, t2);
114 }
115 }
116 }
117
compare_textures(struct pipe_context * ctx,struct pipe_resource * tex,struct cpu_texture * cpu,unsigned level)118 static bool compare_textures(struct pipe_context *ctx, struct pipe_resource *tex,
119 struct cpu_texture *cpu, unsigned level)
120 {
121 struct pipe_transfer *t;
122 uint8_t *map;
123 int y, z;
124 bool pass = true;
125 unsigned width = u_minify(tex->width0, level);
126 unsigned height = u_minify(tex->height0, level);
127 unsigned stride = util_format_get_stride(tex->format, width);
128 unsigned num_y_blocks = util_format_get_nblocksy(tex->format, height);
129 unsigned num_layers = util_num_layers(tex, level);
130
131 map = pipe_texture_map_3d(ctx, tex, level, PIPE_MAP_READ, 0, 0, 0, width, height,
132 num_layers, &t);
133 assert(map);
134
135 for (z = 0; z < num_layers; z++) {
136 for (y = 0; y < num_y_blocks; y++) {
137 uint8_t *ptr = map + t->layer_stride * z + t->stride * y;
138 uint8_t *cpu_ptr = cpu->ptr + cpu->layer_stride * z + cpu->stride * y;
139
140 if (memcmp(ptr, cpu_ptr, stride)) {
141 pass = false;
142 goto done;
143 }
144 }
145 }
146 done:
147 pipe_texture_unmap(ctx, t);
148 return pass;
149 }
150
compare_gpu_textures(struct pipe_context * ctx,struct pipe_resource * tex1,struct pipe_resource * tex2)151 static bool compare_gpu_textures(struct pipe_context *ctx, struct pipe_resource *tex1,
152 struct pipe_resource *tex2)
153 {
154 /* tex1 and tex2 are assumed to be the same size, format, and layout */
155 for (unsigned level = 0; level <= tex1->last_level; level++) {
156 struct pipe_transfer *t1, *t2;
157 uint8_t *map1, *map2;
158 unsigned width = u_minify(tex1->width0, level);
159 unsigned height = u_minify(tex1->height0, level);
160 unsigned stride = util_format_get_stride(tex1->format, width);
161 unsigned num_y_blocks = util_format_get_nblocksy(tex1->format, height);
162 unsigned num_layers = util_num_layers(tex1, level);
163
164 map1 = pipe_texture_map_3d(ctx, tex1, level, PIPE_MAP_READ, 0, 0, 0, width, height,
165 num_layers, &t1);
166 map2 = pipe_texture_map_3d(ctx, tex2, level, PIPE_MAP_READ, 0, 0, 0, width, height,
167 num_layers, &t2);
168 assert(map1 && map2);
169 assert(t1->stride == t2->stride);
170
171 for (unsigned z = 0; z < num_layers; z++) {
172 for (unsigned y = 0; y < num_y_blocks; y++) {
173 uint64_t *ptr1 = (uint64_t *)(map1 + t1->layer_stride * z + t1->stride * y);
174 uint64_t *ptr2 = (uint64_t *)(map2 + t2->layer_stride * z + t2->stride * y);
175
176 assert(t1->stride % 8 == 0);
177 assert(t2->stride % 8 == 0);
178
179 if (memcmp(ptr1, ptr2, stride)) {
180 pipe_texture_unmap(ctx, t1);
181 pipe_texture_unmap(ctx, t2);
182 return false;
183 }
184 }
185 }
186
187 pipe_texture_unmap(ctx, t1);
188 pipe_texture_unmap(ctx, t2);
189 }
190
191 return true;
192 }
193
194 struct si_format_options {
195 bool only_resolve;
196 bool allow_float;
197 bool allow_unorm16;
198 bool allow_srgb;
199 bool allow_x_channels;
200 bool allow_subsampled;
201 bool allow_compressed;
202 };
203
get_random_format(struct si_screen * sscreen,bool render_target,enum pipe_format color_or_zs,enum pipe_format res_format,enum pipe_format integer_or_not,const struct si_format_options * options)204 static enum pipe_format get_random_format(struct si_screen *sscreen, bool render_target,
205 enum pipe_format color_or_zs, /* must be color or Z/S */
206 enum pipe_format res_format, /* must have the same bpp */
207 enum pipe_format integer_or_not, /* must be integer or non-integer */
208 const struct si_format_options *options)
209 {
210 /* Depth/stencil formats can only select Z/S using the blit mask, not via the view format. */
211 if (res_format != PIPE_FORMAT_NONE && util_format_is_depth_or_stencil(res_format))
212 return res_format;
213
214 /* Keep generating formats until we get a supported one. */
215 while (1) {
216 /* Skip one format: PIPE_FORMAT_NONE */
217 enum pipe_format format = (rand() % (PIPE_FORMAT_COUNT - 1)) + 1;
218 const struct util_format_description *desc = util_format_description(format);
219
220 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_YUV ||
221 format == PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8)
222 continue;
223
224 if (!options->allow_srgb && desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
225 continue;
226
227 if (!options->allow_subsampled && desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
228 continue;
229
230 if (!options->allow_compressed && util_format_get_blockwidth(format) >= 4)
231 continue;
232
233 if (color_or_zs != PIPE_FORMAT_NONE &&
234 (util_format_is_depth_or_stencil(color_or_zs) !=
235 util_format_is_depth_or_stencil(format)))
236 continue;
237
238 if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
239 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
240 /* Don't select stencil-only formats - we don't support them for rendering. */
241 if (util_format_has_stencil(desc) && !util_format_has_depth(desc))
242 continue;
243 }
244
245 if (!options->allow_x_channels) {
246 unsigned i;
247
248 /* Don't test formats with X channels because cpu_texture doesn't emulate them. */
249 for (i = 0; i < desc->nr_channels; i++) {
250 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID)
251 break;
252 }
253 if (i != desc->nr_channels)
254 continue;
255 }
256 }
257
258 if (res_format != PIPE_FORMAT_NONE) {
259 /* If the resource format is Z/S, we handle it at the beginning of this function,
260 * so here res_format can only be a color format.
261 */
262 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
263 continue;
264
265 if (util_format_get_blocksize(res_format) != util_format_get_blocksize(format) ||
266 util_format_get_blockwidth(res_format) != util_format_get_blockwidth(format) ||
267 util_format_get_blockheight(res_format) != util_format_get_blockheight(format))
268 continue;
269 }
270
271 if (integer_or_not != PIPE_FORMAT_NONE) {
272 /* The integer property must match between blit src/dst. */
273 if (util_format_is_pure_integer(integer_or_not) != util_format_is_pure_integer(format))
274 continue;
275 }
276
277 if (options->only_resolve &&
278 (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS || util_format_is_pure_integer(format)))
279 continue;
280
281 if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) {
282 /* Every integer format should have an equivalent non-integer format, but 128-bit integer
283 * formats don't have that if floats are disallowed, which can cause an infinite loop later
284 * if compat_type is non-integer.
285 */
286 if (!options->allow_float &&
287 (util_format_is_float(format) || util_format_get_blocksizebits(format) == 128))
288 continue;
289
290 if (!options->allow_unorm16 &&
291 desc->channel[0].size == 16 && desc->channel[0].normalized &&
292 desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED)
293 continue;
294 }
295
296 unsigned bind = PIPE_BIND_SAMPLER_VIEW;
297 if (render_target) {
298 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
299 bind = PIPE_BIND_DEPTH_STENCIL;
300 else
301 bind = PIPE_BIND_RENDER_TARGET;
302 }
303
304 if (sscreen->b.is_format_supported(&sscreen->b, format, PIPE_TEXTURE_2D, 1, 1, bind))
305 return format;
306 }
307 }
308
309 #define MAX_ALLOC_SIZE (64 * 1024 * 1024)
310
set_random_image_attrs(struct pipe_resource * templ,bool allow_msaa,bool only_cb_resolve)311 static void set_random_image_attrs(struct pipe_resource *templ, bool allow_msaa,
312 bool only_cb_resolve)
313 {
314 unsigned target_index;
315
316 if (only_cb_resolve) {
317 target_index = 6; /* CB resolving doesn't support array textures. */
318 } else {
319 target_index = rand() % (allow_msaa ? 8 : 6);
320 }
321
322 switch (target_index) {
323 case 0:
324 templ->target = PIPE_TEXTURE_1D;
325 break;
326 case 1:
327 templ->target = PIPE_TEXTURE_2D;
328 break;
329 case 2:
330 if (util_format_is_depth_or_stencil(templ->format))
331 templ->target = PIPE_TEXTURE_2D_ARRAY; /* 3D doesn't support Z/S */
332 else
333 templ->target = PIPE_TEXTURE_3D;
334 break;
335 case 3:
336 templ->target = PIPE_TEXTURE_RECT;
337 break;
338 case 4:
339 templ->target = PIPE_TEXTURE_1D_ARRAY;
340 break;
341 case 5:
342 templ->target = PIPE_TEXTURE_2D_ARRAY;
343 break;
344 case 6:
345 templ->target = PIPE_TEXTURE_2D;
346 templ->nr_samples = 2 << (rand() % 3);
347 break;
348 case 7:
349 templ->target = PIPE_TEXTURE_2D_ARRAY;
350 templ->nr_samples = 2 << (rand() % 3);
351 break;
352 default:
353 unreachable("invalid path");
354 }
355
356 templ->usage = PIPE_USAGE_DEFAULT;
357
358 templ->height0 = 1;
359 templ->depth0 = 1;
360 templ->array_size = 1;
361 templ->nr_storage_samples = templ->nr_samples;
362
363 /* Try to hit microtiling in 1/2 of the cases. */
364 unsigned max_tex_size = rand() & 1 ? 128 : 1024;
365
366 templ->width0 = (rand() % max_tex_size) + 1;
367
368 if (templ->target != PIPE_TEXTURE_1D &&
369 templ->target != PIPE_TEXTURE_1D_ARRAY)
370 templ->height0 = (rand() % max_tex_size) + 1;
371
372 if (templ->target == PIPE_TEXTURE_3D)
373 templ->depth0 = (rand() % max_tex_size) + 1;
374
375 if (templ->target == PIPE_TEXTURE_1D_ARRAY ||
376 templ->target == PIPE_TEXTURE_2D_ARRAY)
377 templ->array_size = (rand() % max_tex_size) + 1;
378
379 /* Keep reducing the size until it we get a small enough size. */
380 while (util_format_get_nblocks(templ->format, templ->width0, templ->height0) *
381 templ->depth0 * templ->array_size * util_format_get_blocksize(templ->format) >
382 MAX_ALLOC_SIZE) {
383 switch (rand() % 3) {
384 case 0:
385 if (templ->width0 > 1)
386 templ->width0 /= 2;
387 break;
388 case 1:
389 if (templ->height0 > 1)
390 templ->height0 /= 2;
391 break;
392 case 2:
393 if (templ->depth0 > 1)
394 templ->depth0 /= 2;
395 else if (templ->array_size > 1)
396 templ->array_size /= 2;
397 break;
398 }
399 }
400
401 if (util_format_get_blockwidth(templ->format) == 2)
402 templ->width0 = align(templ->width0, 2);
403
404 if (templ->target != PIPE_TEXTURE_RECT &&
405 util_format_description(templ->format)->layout != UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
406 unsigned max_dim = MAX3(templ->width0, templ->height0, templ->depth0);
407
408 if (templ->nr_samples <= 1)
409 templ->last_level = rand() % (util_logbase2(max_dim) + 1);
410 }
411 }
412
print_image_attrs(struct si_screen * sscreen,struct si_texture * tex)413 static void print_image_attrs(struct si_screen *sscreen, struct si_texture *tex)
414 {
415 const char *mode;
416
417 if (sscreen->info.gfx_level >= GFX9) {
418 static const char *modes[32] = {
419 [ADDR_SW_LINEAR] = "LINEAR",
420 [ADDR_SW_4KB_S_X] = "4KB_S_X",
421 [ADDR_SW_4KB_D_X] = "4KB_D_X",
422 [ADDR_SW_64KB_Z_X] = "64KB_Z_X",
423 [ADDR_SW_64KB_S_X] = "64KB_S_X",
424 [ADDR_SW_64KB_D_X] = "64KB_D_X",
425 [ADDR_SW_64KB_R_X] = "64KB_R_X",
426 };
427 mode = modes[tex->surface.u.gfx9.swizzle_mode];
428 } else {
429 static const char *modes[32] = {
430 [RADEON_SURF_MODE_LINEAR_ALIGNED] = "LINEAR",
431 [RADEON_SURF_MODE_1D] = "1D_TILED",
432 [RADEON_SURF_MODE_2D] = "2D_TILED",
433 };
434 mode = modes[tex->surface.u.legacy.level[0].mode];
435 }
436
437 if (!mode)
438 mode = "UNKNOWN";
439
440 static const char *targets[PIPE_MAX_TEXTURE_TYPES] = {
441 [PIPE_TEXTURE_1D] = "1D",
442 [PIPE_TEXTURE_2D] = "2D",
443 [PIPE_TEXTURE_3D] = "3D",
444 [PIPE_TEXTURE_RECT] = "RECT",
445 [PIPE_TEXTURE_1D_ARRAY] = "1D_ARRAY",
446 [PIPE_TEXTURE_2D_ARRAY] = "2D_ARRAY",
447 };
448
449 char size[64];
450 if (tex->buffer.b.b.target == PIPE_TEXTURE_1D)
451 snprintf(size, sizeof(size), "%u", tex->buffer.b.b.width0);
452 else if (tex->buffer.b.b.target == PIPE_TEXTURE_2D ||
453 tex->buffer.b.b.target == PIPE_TEXTURE_RECT)
454 snprintf(size, sizeof(size), "%ux%u", tex->buffer.b.b.width0, tex->buffer.b.b.height0);
455 else
456 snprintf(size, sizeof(size), "%ux%ux%u", tex->buffer.b.b.width0, tex->buffer.b.b.height0,
457 util_num_layers(&tex->buffer.b.b, 0));
458
459 printf("%8s, %14s, %2u %7s, %8s", targets[tex->buffer.b.b.target], size,
460 tex->buffer.b.b.nr_samples > 1 ? tex->buffer.b.b.nr_samples : tex->buffer.b.b.last_level + 1,
461 tex->buffer.b.b.nr_samples > 1 ? "samples" : "levels", mode);
462 }
463
si_test_image_copy_region(struct si_screen * sscreen)464 void si_test_image_copy_region(struct si_screen *sscreen)
465 {
466 struct pipe_screen *screen = &sscreen->b;
467 struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
468 struct si_context *sctx = (struct si_context *)ctx;
469 unsigned i, iterations, num_partial_copies;
470 unsigned num_pass = 0, num_fail = 0;
471
472 /* the seed for random test parameters */
473 srand(0x9b47d95b);
474 /* the seed for random pixel data */
475 s_rand_xorshift128plus(seed_xorshift128plus, false);
476
477 iterations = 1000000000; /* just kill it when you are bored */
478 num_partial_copies = 30;
479
480 /* These parameters are randomly generated per test:
481 * - which texture dimensions to use
482 * - random initial pixels in src
483 * - execute multiple subrectangle copies for partial blits
484 */
485 for (i = 0; i < iterations; i++) {
486 struct pipe_resource tsrc = {}, tdst = {}, *src, *dst;
487 struct si_texture *sdst;
488 struct si_texture *ssrc;
489 struct cpu_texture src_cpu[RADEON_SURF_MAX_LEVELS], dst_cpu[RADEON_SURF_MAX_LEVELS];
490 unsigned max_width, max_height, max_depth, j;
491 unsigned gfx_blits = 0, cs_blits = 0;
492 bool pass;
493
494 /* generate a random test case */
495 struct si_format_options format_options = {
496 .only_resolve = false,
497 .allow_float = true,
498 .allow_unorm16 = true,
499 .allow_x_channels = false, /* cpu_texture doesn't implement X channels */
500 .allow_subsampled = false, /* TODO: fix subsampled formats */
501 .allow_compressed = false, /* TODO: fix compressed formats */
502 };
503
504 tsrc.format = tdst.format = get_random_format(sscreen, false, 0, 0, 0, &format_options);
505
506 /* MSAA copy testing not implemented and might be too difficult because of how
507 * cpu_texture works.
508 */
509 set_random_image_attrs(&tsrc, false, false);
510 set_random_image_attrs(&tdst, false, false);
511
512 /* Allocate textures (both the GPU and CPU copies).
513 * The CPU will emulate what the GPU should be doing.
514 */
515 src = screen->resource_create(screen, &tsrc);
516 dst = screen->resource_create(screen, &tdst);
517 assert(src);
518 assert(dst);
519 sdst = (struct si_texture *)dst;
520 ssrc = (struct si_texture *)src;
521
522 printf("%4u: dst = (", i);
523 print_image_attrs(sscreen, sdst);
524 printf("), src = (");
525 print_image_attrs(sscreen, ssrc);
526 printf("), format = %20s, ", util_format_description(tsrc.format)->short_name);
527 fflush(stdout);
528
529 for (unsigned level = 0; level <= tsrc.last_level; level++) {
530 alloc_cpu_texture(&src_cpu[level], &tsrc, level);
531 set_random_pixels(ctx, src, &src_cpu[level], level);
532 }
533 for (unsigned level = 0; level <= tdst.last_level; level++) {
534 alloc_cpu_texture(&dst_cpu[level], &tdst, level);
535 memset(dst_cpu[level].ptr, 0, dst_cpu[level].layer_stride * util_num_layers(&tdst, level));
536 }
537
538 /* clear dst pixels */
539 uint32_t zero = 0;
540 si_barrier_before_simple_buffer_op(sctx, 0, dst, NULL);
541 si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, &zero, 4,
542 SI_AUTO_SELECT_CLEAR_METHOD, false);
543 si_barrier_after_simple_buffer_op(sctx, 0, dst, NULL);
544
545 for (j = 0; j < num_partial_copies; j++) {
546 int width, height, depth;
547 int srcx, srcy, srcz, dstx, dsty, dstz;
548 struct pipe_box box;
549 unsigned old_num_draw_calls = sctx->num_draw_calls;
550 unsigned old_num_cs_calls = sctx->num_compute_calls;
551
552 unsigned src_level = j % (tsrc.last_level + 1);
553 unsigned dst_level = j % (tdst.last_level + 1);
554
555 max_width = MIN2(u_minify(tsrc.width0, src_level), u_minify(tdst.width0, dst_level));
556 max_height = MIN2(u_minify(tsrc.height0, src_level), u_minify(tdst.height0, dst_level));
557 max_depth = MIN2(util_num_layers(&tsrc, src_level), util_num_layers(&tdst, dst_level));
558
559 /* random sub-rectangle copies from src to dst */
560 depth = (rand() % max_depth) + 1;
561 srcz = rand() % (util_num_layers(&tsrc, src_level) - depth + 1);
562 dstz = rand() % (util_num_layers(&tdst, dst_level) - depth + 1);
563
564 /* just make sure that it doesn't divide by zero */
565 assert(max_width > 0 && max_height > 0);
566
567 width = (rand() % max_width) + 1;
568 height = (rand() % max_height) + 1;
569
570 srcx = rand() % (u_minify(tsrc.width0, src_level) - width + 1);
571 srcy = rand() % (u_minify(tsrc.height0, src_level) - height + 1);
572
573 dstx = rand() % (u_minify(tdst.width0, dst_level) - width + 1);
574 dsty = rand() % (u_minify(tdst.height0, dst_level) - height + 1);
575
576 /* Align the box to the format block size. */
577 srcx &= ~(util_format_get_blockwidth(src->format) - 1);
578 srcy &= ~(util_format_get_blockheight(src->format) - 1);
579
580 dstx &= ~(util_format_get_blockwidth(dst->format) - 1);
581 dsty &= ~(util_format_get_blockheight(dst->format) - 1);
582
583 width = align(width, util_format_get_blockwidth(src->format));
584 height = align(height, util_format_get_blockheight(src->format));
585
586 /* GPU copy */
587 u_box_3d(srcx, srcy, srcz, width, height, depth, &box);
588 si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, src, src_level, &box);
589
590 /* See which engine was used. */
591 gfx_blits += sctx->num_draw_calls > old_num_draw_calls;
592 cs_blits += sctx->num_compute_calls > old_num_cs_calls;
593
594 /* CPU copy */
595 util_copy_box(dst_cpu[dst_level].ptr, tdst.format, dst_cpu[dst_level].stride,
596 dst_cpu[dst_level].layer_stride, dstx, dsty, dstz,
597 width, height, depth, src_cpu[src_level].ptr, src_cpu[src_level].stride,
598 src_cpu[src_level].layer_stride, srcx, srcy, srcz);
599 }
600
601 pass = true;
602 for (unsigned level = 0; level <= tdst.last_level; level++)
603 pass &= compare_textures(ctx, dst, &dst_cpu[level], level);
604
605 if (pass)
606 num_pass++;
607 else
608 num_fail++;
609
610 printf("BLITs: GFX = %2u, CS = %2u, %s [%u/%u]\n", gfx_blits, cs_blits,
611 pass ? "pass" : "fail", num_pass, num_pass + num_fail);
612
613 /* cleanup */
614 pipe_resource_reference(&src, NULL);
615 pipe_resource_reference(&dst, NULL);
616 for (unsigned level = 0; level <= tsrc.last_level; level++)
617 free(src_cpu[level].ptr);
618 for (unsigned level = 0; level <= tdst.last_level; level++)
619 free(dst_cpu[level].ptr);
620 }
621
622 ctx->destroy(ctx);
623 exit(0);
624 }
625
si_test_blit(struct si_screen * sscreen,unsigned test_flags)626 void si_test_blit(struct si_screen *sscreen, unsigned test_flags)
627 {
628 struct pipe_screen *screen = &sscreen->b;
629 struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
630 struct si_context *sctx = (struct si_context *)ctx;
631 unsigned iterations;
632 unsigned num_pass = 0, num_fail = 0;
633 bool only_cb_resolve = test_flags == DBG(TEST_CB_RESOLVE);
634
635 bool allow_float = false;
636 bool allow_unorm16_dst = false;
637 bool allow_srgb_dst = false;
638 bool allow_filter = false;
639 bool allow_scaled_min = false;
640 bool allow_scaled_mag = false;
641 bool allow_out_of_bounds_dst = false;
642 bool allow_out_of_bounds_src = false;
643 bool allow_scissor = false;
644 bool allow_flip = false;
645
646 /* The following tests always compare the tested operation with the gfx blit (u_blitter). */
647 switch (test_flags) {
648 case DBG(TEST_CB_RESOLVE):
649 /* This is mostly failing because the precision of CB_RESOLVE is very different
650 * from the gfx blit. FP32 and FP16 are the only formats that mostly pass.
651 */
652 allow_float = true;
653 allow_unorm16_dst = true;
654 allow_srgb_dst = true;
655 break;
656
657 case DBG(TEST_COMPUTE_BLIT):
658 //allow_float = true; /* precision difference: NaNs not preserved by CB (u_blitter) */
659 allow_unorm16_dst = true;
660 //allow_srgb_dst = true; /* precision difference: sRGB is less precise in CB (u_blitter) */
661 //allow_filter = true; /* not implemented by compute blits, lots of precision differences */
662 //allow_scaled_min = true; /* not implemented by compute blits, lots of precision differences */
663 //allow_scaled_mag = true; /* not implemented by compute blits, lots of precision differences */
664 allow_out_of_bounds_dst = true;
665 allow_out_of_bounds_src = true;
666 //allow_scissor = true; /* not implemented by compute blits */
667 allow_flip = true;
668 break;
669
670 default:
671 assert(0);
672 }
673
674 /* the seed for random test parameters */
675 srand(0x9b47d95b);
676 /* the seed for random pixel data */
677 s_rand_xorshift128plus(seed_xorshift128plus, false);
678
679 iterations = 10000000; /* just kill it when you are bored */
680
681 /* These parameters are randomly generated per test:
682 * - which texture dimensions to use
683 * - random initial pixels in src
684 * - random pipe_blit_info
685 */
686 for (unsigned i = 0; i < iterations; i++) {
687 struct pipe_resource tsrc = {}, tdst = {}, *gfx_src, *gfx_dst, *comp_src, *comp_dst;
688
689 /* Generate a random test case. */
690 {
691 struct si_format_options format_options = {
692 .only_resolve = only_cb_resolve,
693 .allow_float = allow_float,
694 .allow_unorm16 = true,
695 .allow_srgb = true,
696 .allow_x_channels = true,
697 .allow_subsampled = false, /* TODO: fix subsampled formats */
698 .allow_compressed = false, /* TODO: fix compressed formats */
699 };
700
701 tsrc.format = get_random_format(sscreen, false, 0, 0, 0, &format_options);
702 tdst.format = get_random_format(sscreen, true, tsrc.format, 0, 0, &format_options);
703 }
704
705 set_random_image_attrs(&tsrc, true, only_cb_resolve);
706 set_random_image_attrs(&tdst, !only_cb_resolve, false);
707
708 /* MSAA blits must have matching sample counts. */
709 if (tsrc.nr_samples > 1 && tdst.nr_samples > 1)
710 tdst.nr_samples = tdst.nr_storage_samples = tsrc.nr_samples;
711
712 /* Allocate textures. */
713 gfx_src = screen->resource_create(screen, &tsrc);
714 gfx_dst = screen->resource_create(screen, &tdst);
715 comp_src = screen->resource_create(screen, &tsrc);
716 comp_dst = screen->resource_create(screen, &tdst);
717
718 /* clear dst pixels */
719 uint32_t zero = 0;
720
721 /* Using 2 consecutive barriers calls results in a single merged barrier for both resources. */
722 si_barrier_before_simple_buffer_op(sctx, 0, gfx_dst, NULL);
723 si_barrier_before_simple_buffer_op(sctx, 0, comp_dst, NULL);
724 si_clear_buffer(sctx, gfx_dst, 0, ((struct si_texture *)gfx_dst)->surface.surf_size, &zero,
725 4, SI_AUTO_SELECT_CLEAR_METHOD, false);
726 si_clear_buffer(sctx, comp_dst, 0, ((struct si_texture *)comp_dst)->surface.surf_size, &zero,
727 4, SI_AUTO_SELECT_CLEAR_METHOD, false);
728 si_barrier_after_simple_buffer_op(sctx, 0, gfx_dst, NULL);
729 si_barrier_after_simple_buffer_op(sctx, 0, comp_dst, NULL);
730
731 /* TODO: These two fix quite a lot of BCn cases. */
732 /*si_clear_buffer(sctx, gfx_src, 0, ((struct si_texture *)gfx_src)->surface.surf_size, &zero,
733 4, SI_OP_SYNC_BEFORE_AFTER, SI_COHERENCY_SHADER, SI_AUTO_SELECT_CLEAR_METHOD);
734 si_clear_buffer(sctx, comp_src, 0, ((struct si_texture *)comp_src)->surface.surf_size, &zero,
735 4, SI_OP_SYNC_BEFORE_AFTER, SI_COHERENCY_SHADER, SI_AUTO_SELECT_CLEAR_METHOD);*/
736
737 set_random_pixels_for_2_textures(ctx, gfx_src, comp_src);
738
739 struct pipe_blit_info info;
740 memset(&info, 0, sizeof(info));
741
742 {
743 struct si_format_options format_options = {
744 .only_resolve = only_cb_resolve,
745 .allow_float = allow_float,
746 .allow_unorm16 = true,
747 .allow_srgb = true,
748 .allow_x_channels = true,
749 .allow_subsampled = false, /* TODO: fix subsampled formats */
750 .allow_compressed = false, /* TODO: fix compressed formats */
751 };
752
753 info.src.format = get_random_format(sscreen, false, 0, tsrc.format, 0, &format_options);
754 format_options.allow_unorm16 = allow_unorm16_dst;
755 format_options.allow_srgb = allow_srgb_dst;
756 info.dst.format = get_random_format(sscreen, true, 0, tdst.format, info.src.format,
757 &format_options);
758 }
759
760 printf("%4u: dst = (", i);
761 print_image_attrs(sscreen, (struct si_texture *)gfx_dst);
762 printf(", %20s), src = (", util_format_short_name(info.dst.format));
763 print_image_attrs(sscreen, (struct si_texture *)gfx_src);
764 printf(", %20s)", util_format_short_name(info.src.format));
765 fflush(stdout);
766
767 int src_width, src_height, src_depth, dst_width, dst_height, dst_depth;
768 int srcx, srcy, srcz, dstx, dsty, dstz;
769
770 unsigned src_level = rand() % (tsrc.last_level + 1);
771 unsigned dst_level = rand() % (tdst.last_level + 1);
772
773 unsigned max_src_width = u_minify(tsrc.width0, src_level);
774 unsigned max_src_height = u_minify(tsrc.height0, src_level);
775 unsigned max_src_depth = util_num_layers(&tsrc, src_level);
776
777 unsigned max_dst_width = u_minify(tdst.width0, dst_level);
778 unsigned max_dst_height = u_minify(tdst.height0, dst_level);
779 unsigned max_dst_depth = util_num_layers(&tdst, dst_level);
780
781 /* make sure that it doesn't divide by zero */
782 assert(max_src_width && max_src_height && max_src_depth &&
783 max_dst_width && max_dst_height && max_dst_depth);
784
785 /* random sub-rectangle copies from src to dst */
786 src_width = (rand() % max_src_width) + 1;
787 src_height = (rand() % max_src_height) + 1;
788 src_depth = (rand() % max_src_depth) + 1;
789
790 dst_width = (rand() % max_dst_width) + 1;
791 dst_height = (rand() % max_dst_height) + 1;
792 dst_depth = (rand() % max_dst_depth) + 1;
793
794 srcx = rand() % (u_minify(tsrc.width0, src_level) - src_width + 1);
795 srcy = rand() % (u_minify(tsrc.height0, src_level) - src_height + 1);
796 srcz = rand() % (util_num_layers(&tsrc, src_level) - src_depth + 1);
797
798 dstx = rand() % (u_minify(tdst.width0, dst_level) - dst_width + 1);
799 dsty = rand() % (u_minify(tdst.height0, dst_level) - dst_height + 1);
800 dstz = rand() % (util_num_layers(&tdst, dst_level) - dst_depth + 1);
801
802 /* Test out-of-bounds boxes. Add -dim/10 .. +dim/10 */
803 if (allow_out_of_bounds_src) {
804 if (max_src_width / 5 >= 2)
805 srcx += rand() % (max_src_width / 5) - max_src_width / 10;
806 if (max_src_height / 5 >= 2)
807 srcy += rand() % (max_src_height / 5) - max_src_height / 10;
808 }
809
810 if (allow_out_of_bounds_dst) {
811 if (max_dst_width / 5 >= 2)
812 dstx += rand() % (max_dst_width / 5) - max_dst_width / 10;
813 if (max_dst_height / 5 >= 2)
814 dsty += rand() % (max_dst_height / 5) - max_dst_height / 10;
815 }
816
817 /* Align the box to the format block size. */
818 srcx &= ~(util_format_get_blockwidth(tsrc.format) - 1);
819 srcy &= ~(util_format_get_blockheight(tsrc.format) - 1);
820
821 dstx &= ~(util_format_get_blockwidth(tdst.format) - 1);
822 dsty &= ~(util_format_get_blockheight(tdst.format) - 1);
823
824 src_width = align(src_width, util_format_get_blockwidth(tsrc.format));
825 src_height = align(src_height, util_format_get_blockheight(tsrc.format));
826
827 dst_width = align(dst_width, util_format_get_blockwidth(tdst.format));
828 dst_height = align(dst_height, util_format_get_blockheight(tdst.format));
829
830 if (!allow_scaled_min) {
831 if (src_width > dst_width)
832 src_width = dst_width;
833 if (src_height > dst_height)
834 src_height = dst_height;
835 if (src_depth > dst_depth)
836 src_depth = dst_depth;
837 }
838
839 if (!allow_scaled_mag) {
840 if (src_width < dst_width)
841 dst_width = src_width;
842 if (src_height < dst_height)
843 dst_height = src_height;
844 if (src_depth < dst_depth)
845 dst_depth = src_depth;
846 }
847
848 /* Flips */
849 if (allow_flip) {
850 if (rand() % 2) {
851 srcx += src_width;
852 src_width = -src_width;
853 }
854 if (rand() % 2) {
855 srcy += src_height;
856 src_height = -src_height;
857 }
858 }
859
860 info.src.level = src_level;
861 info.dst.level = dst_level;
862
863 u_box_3d(srcx, srcy, srcz, src_width, src_height, src_depth, &info.src.box);
864 u_box_3d(dstx, dsty, dstz, dst_width, dst_height, dst_depth, &info.dst.box);
865
866 if (util_format_is_depth_and_stencil(tsrc.format)) {
867 switch (rand() % 3) {
868 case 0:
869 info.mask = PIPE_MASK_ZS;
870 break;
871 case 1:
872 info.mask = PIPE_MASK_Z;
873 break;
874 case 2:
875 info.mask = PIPE_MASK_S;
876 break;
877 }
878 } else {
879 /* RGBA, Z, or S */
880 info.mask = util_format_get_mask(tdst.format);
881 }
882
883 /* Don't filter MSAA and integer sources. */
884 if (allow_filter && tsrc.nr_samples <= 1 &&
885 !util_format_is_pure_integer(info.src.format) && rand() % 2)
886 info.filter = PIPE_TEX_FILTER_LINEAR;
887 else
888 info.filter = PIPE_TEX_FILTER_NEAREST;
889
890 info.scissor_enable = allow_scissor ? rand() % 2 : false;
891
892 if (info.scissor_enable) {
893 info.scissor.minx = MAX2(MIN2(info.dst.box.x, info.dst.box.x + info.dst.box.width), 0);
894 info.scissor.miny = MAX2(MIN2(info.dst.box.y, info.dst.box.y + info.dst.box.height), 0);
895 info.scissor.maxx = MIN2(MAX2(info.dst.box.x, info.dst.box.x + info.dst.box.width), UINT16_MAX);
896 info.scissor.maxy = MIN2(MAX2(info.dst.box.y, info.dst.box.y + info.dst.box.height), UINT16_MAX);
897
898 if (abs(info.dst.box.width) / 2 >= 2) {
899 info.scissor.minx += rand() % (abs(info.dst.box.width) / 2);
900 info.scissor.maxx -= rand() % (abs(info.dst.box.width) / 2);
901 }
902 if (abs(info.dst.box.height) / 2 >= 2) {
903 info.scissor.miny += rand() % (abs(info.dst.box.height) / 2);
904 info.scissor.maxy -= rand() % (abs(info.dst.box.height) / 2);
905 }
906 }
907
908 char dstbox_s[128], srcbox_s[128], scissor[128];
909
910 snprintf(dstbox_s, sizeof(dstbox_s), "{%ix%ix%i .. %ix%ix%i}",
911 info.dst.box.x, info.dst.box.y, info.dst.box.z,
912 info.dst.box.width, info.dst.box.height, info.dst.box.depth);
913 snprintf(srcbox_s, sizeof(srcbox_s), "{%ix%ix%i .. %ix%ix%i}",
914 info.src.box.x, info.src.box.y, info.src.box.z,
915 info.src.box.width, info.src.box.height, info.src.box.depth);
916 if (info.scissor_enable) {
917 snprintf(scissor, sizeof(scissor), "(%u..%u, %u..%u)",
918 info.scissor.minx, info.scissor.maxx, info.scissor.miny, info.scissor.maxy);
919 } else {
920 snprintf(scissor, sizeof(scissor), "(none)");
921 }
922
923 printf(", filter %u, mask 0x%02x, ", info.filter, info.mask);
924 printf("dst(level %u, box = %-28s), ", info.dst.level, dstbox_s);
925 printf("src(level %u, box = %-28s), ", info.src.level, srcbox_s);
926 printf("scissor%-20s", scissor);
927
928 /* Blits. */
929 info.src.resource = gfx_src;
930 info.dst.resource = gfx_dst;
931 si_gfx_blit(ctx, &info);
932
933 info.src.resource = comp_src;
934 info.dst.resource = comp_dst;
935
936 bool success;
937 if (only_cb_resolve)
938 success = si_msaa_resolve_blit_via_CB(ctx, &info, false);
939 else
940 success = si_compute_blit(sctx, &info, NULL, 0, 0, false);
941
942 if (success) {
943 printf(" %-7s", only_cb_resolve ? "resolve" : "comp");
944 } else {
945 si_gfx_blit(ctx, &info);
946 printf(" %-7s", "gfx");
947 }
948
949 bool pass = compare_gpu_textures(ctx, gfx_dst, comp_dst);
950 if (pass)
951 num_pass++;
952 else
953 num_fail++;
954
955 printf(" %s [%u/%u]\n", pass ? "pass" : "fail", num_pass, num_pass + num_fail);
956
957 /* cleanup */
958 pipe_resource_reference(&gfx_src, NULL);
959 pipe_resource_reference(&gfx_dst, NULL);
960 pipe_resource_reference(&comp_src, NULL);
961 pipe_resource_reference(&comp_dst, NULL);
962 }
963
964 ctx->destroy(ctx);
965 exit(0);
966 }
967