xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/radeonsi/si_test_image_copy_region.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 /* This file implements randomized texture blit tests. */
8 
9 #include "si_pipe.h"
10 #include "util/rand_xor.h"
11 #include "util/u_surface.h"
12 #include "amd/addrlib/inc/addrtypes.h"
13 
14 static uint64_t seed_xorshift128plus[2];
15 
16 #define RAND_NUM_SIZE 8
17 
18 /* The GPU blits are emulated on the CPU using these CPU textures. */
19 
20 struct cpu_texture {
21    uint8_t *ptr;
22    uint64_t size;
23    uint64_t layer_stride;
24    unsigned stride;
25 };
26 
alloc_cpu_texture(struct cpu_texture * tex,struct pipe_resource * templ,unsigned level)27 static void alloc_cpu_texture(struct cpu_texture *tex, struct pipe_resource *templ, unsigned level)
28 {
29    unsigned width = u_minify(templ->width0, level);
30    unsigned height = u_minify(templ->height0, level);
31 
32    tex->stride = align(util_format_get_stride(templ->format, width), RAND_NUM_SIZE);
33    tex->layer_stride = util_format_get_2d_size(templ->format, tex->stride, height);
34    tex->size = tex->layer_stride * util_num_layers(templ, level);
35    tex->ptr = malloc(tex->size);
36    assert(tex->ptr);
37 }
38 
set_random_pixels(struct pipe_context * ctx,struct pipe_resource * tex,struct cpu_texture * cpu,unsigned level)39 static void set_random_pixels(struct pipe_context *ctx, struct pipe_resource *tex,
40                               struct cpu_texture *cpu, unsigned level)
41 {
42    struct pipe_transfer *t;
43    uint8_t *map;
44    int x, y, z;
45    unsigned width = u_minify(tex->width0, level);
46    unsigned height = u_minify(tex->height0, level);
47    unsigned num_y_blocks = util_format_get_nblocksy(tex->format, height);
48    unsigned num_layers = util_num_layers(tex, level);
49 
50    map = pipe_texture_map_3d(ctx, tex, level, PIPE_MAP_WRITE, 0, 0, 0, width, height,
51                              num_layers, &t);
52    assert(map);
53 
54    for (z = 0; z < num_layers; z++) {
55       for (y = 0; y < num_y_blocks; y++) {
56          uint64_t *ptr = (uint64_t *)(map + t->layer_stride * z + t->stride * y);
57          uint64_t *ptr_cpu = (uint64_t *)(cpu->ptr + cpu->layer_stride * z + cpu->stride * y);
58          unsigned size = cpu->stride / RAND_NUM_SIZE;
59 
60          assert(t->stride % RAND_NUM_SIZE == 0);
61          assert(cpu->stride % RAND_NUM_SIZE == 0);
62 
63          for (x = 0; x < size; x++) {
64             *ptr++ = *ptr_cpu++ = rand_xorshift128plus(seed_xorshift128plus);
65          }
66       }
67    }
68 
69    pipe_texture_unmap(ctx, t);
70 }
71 
set_random_pixels_for_2_textures(struct pipe_context * ctx,struct pipe_resource * tex1,struct pipe_resource * tex2)72 static void set_random_pixels_for_2_textures(struct pipe_context *ctx, struct pipe_resource *tex1,
73                                              struct pipe_resource *tex2)
74 {
75    /* tex1 and tex2 are assumed to be the same size, format, and layout */
76    for (unsigned level = 0; level <= tex1->last_level; level++) {
77       for (unsigned sample = 0; sample < MAX2(tex1->nr_samples, 1); sample++) {
78          struct pipe_transfer *t1, *t2;
79          uint8_t *map1, *map2;
80          int x, y, z;
81          unsigned width = align(u_minify(tex1->width0, level), util_format_get_blockwidth(tex1->format));
82          unsigned height = align(u_minify(tex1->height0, level), util_format_get_blockheight(tex1->format));
83          unsigned num_y_blocks = util_format_get_nblocksy(tex1->format, height);
84          unsigned num_layers = util_num_layers(tex1, level);
85          /* If we set level to sample + 1, we will only upload that sample instead of
86           * overwriting all samples.
87           */
88          unsigned level_or_sample = tex1->nr_samples > 1 ? sample + 1 : level;
89 
90          map1 = pipe_texture_map_3d(ctx, tex1, level_or_sample, PIPE_MAP_WRITE, 0, 0, 0, width, height,
91                                     num_layers, &t1);
92          map2 = pipe_texture_map_3d(ctx, tex2, level_or_sample, PIPE_MAP_WRITE, 0, 0, 0, width, height,
93                                     num_layers, &t2);
94          assert(map1 && map2);
95          assert(t1->stride == t2->stride);
96 
97          for (z = 0; z < num_layers; z++) {
98             for (y = 0; y < num_y_blocks; y++) {
99                uint64_t *ptr1 = (uint64_t *)(map1 + t1->layer_stride * z + t1->stride * y);
100                uint64_t *ptr2 = (uint64_t *)(map2 + t2->layer_stride * z + t2->stride * y);
101                unsigned size = t1->stride / 8;
102 
103                assert(t1->stride % 8 == 0);
104                assert(t2->stride % 8 == 0);
105 
106                for (x = 0; x < size; x++) {
107                   *ptr1++ = *ptr2++ = rand_xorshift128plus(seed_xorshift128plus);
108                }
109             }
110          }
111 
112          pipe_texture_unmap(ctx, t1);
113          pipe_texture_unmap(ctx, t2);
114       }
115    }
116 }
117 
compare_textures(struct pipe_context * ctx,struct pipe_resource * tex,struct cpu_texture * cpu,unsigned level)118 static bool compare_textures(struct pipe_context *ctx, struct pipe_resource *tex,
119                              struct cpu_texture *cpu, unsigned level)
120 {
121    struct pipe_transfer *t;
122    uint8_t *map;
123    int y, z;
124    bool pass = true;
125    unsigned width = u_minify(tex->width0, level);
126    unsigned height = u_minify(tex->height0, level);
127    unsigned stride = util_format_get_stride(tex->format, width);
128    unsigned num_y_blocks = util_format_get_nblocksy(tex->format, height);
129    unsigned num_layers = util_num_layers(tex, level);
130 
131    map = pipe_texture_map_3d(ctx, tex, level, PIPE_MAP_READ, 0, 0, 0, width, height,
132                              num_layers, &t);
133    assert(map);
134 
135    for (z = 0; z < num_layers; z++) {
136       for (y = 0; y < num_y_blocks; y++) {
137          uint8_t *ptr = map + t->layer_stride * z + t->stride * y;
138          uint8_t *cpu_ptr = cpu->ptr + cpu->layer_stride * z + cpu->stride * y;
139 
140          if (memcmp(ptr, cpu_ptr, stride)) {
141             pass = false;
142             goto done;
143          }
144       }
145    }
146 done:
147    pipe_texture_unmap(ctx, t);
148    return pass;
149 }
150 
compare_gpu_textures(struct pipe_context * ctx,struct pipe_resource * tex1,struct pipe_resource * tex2)151 static bool compare_gpu_textures(struct pipe_context *ctx, struct pipe_resource *tex1,
152                                  struct pipe_resource *tex2)
153 {
154    /* tex1 and tex2 are assumed to be the same size, format, and layout */
155    for (unsigned level = 0; level <= tex1->last_level; level++) {
156       struct pipe_transfer *t1, *t2;
157       uint8_t *map1, *map2;
158       unsigned width = u_minify(tex1->width0, level);
159       unsigned height = u_minify(tex1->height0, level);
160       unsigned stride = util_format_get_stride(tex1->format, width);
161       unsigned num_y_blocks = util_format_get_nblocksy(tex1->format, height);
162       unsigned num_layers = util_num_layers(tex1, level);
163 
164       map1 = pipe_texture_map_3d(ctx, tex1, level, PIPE_MAP_READ, 0, 0, 0, width, height,
165                                  num_layers, &t1);
166       map2 = pipe_texture_map_3d(ctx, tex2, level, PIPE_MAP_READ, 0, 0, 0, width, height,
167                                  num_layers, &t2);
168       assert(map1 && map2);
169       assert(t1->stride == t2->stride);
170 
171       for (unsigned z = 0; z < num_layers; z++) {
172          for (unsigned y = 0; y < num_y_blocks; y++) {
173             uint64_t *ptr1 = (uint64_t *)(map1 + t1->layer_stride * z + t1->stride * y);
174             uint64_t *ptr2 = (uint64_t *)(map2 + t2->layer_stride * z + t2->stride * y);
175 
176             assert(t1->stride % 8 == 0);
177             assert(t2->stride % 8 == 0);
178 
179             if (memcmp(ptr1, ptr2, stride)) {
180                pipe_texture_unmap(ctx, t1);
181                pipe_texture_unmap(ctx, t2);
182                return false;
183             }
184          }
185       }
186 
187       pipe_texture_unmap(ctx, t1);
188       pipe_texture_unmap(ctx, t2);
189    }
190 
191    return true;
192 }
193 
194 struct si_format_options {
195    bool only_resolve;
196    bool allow_float;
197    bool allow_unorm16;
198    bool allow_srgb;
199    bool allow_x_channels;
200    bool allow_subsampled;
201    bool allow_compressed;
202 };
203 
get_random_format(struct si_screen * sscreen,bool render_target,enum pipe_format color_or_zs,enum pipe_format res_format,enum pipe_format integer_or_not,const struct si_format_options * options)204 static enum pipe_format get_random_format(struct si_screen *sscreen, bool render_target,
205                                           enum pipe_format color_or_zs, /* must be color or Z/S */
206                                           enum pipe_format res_format,  /* must have the same bpp */
207                                           enum pipe_format integer_or_not, /* must be integer or non-integer */
208                                           const struct si_format_options *options)
209 {
210    /* Depth/stencil formats can only select Z/S using the blit mask, not via the view format. */
211    if (res_format != PIPE_FORMAT_NONE && util_format_is_depth_or_stencil(res_format))
212       return res_format;
213 
214    /* Keep generating formats until we get a supported one. */
215    while (1) {
216       /* Skip one format: PIPE_FORMAT_NONE */
217       enum pipe_format format = (rand() % (PIPE_FORMAT_COUNT - 1)) + 1;
218       const struct util_format_description *desc = util_format_description(format);
219 
220       if (desc->colorspace == UTIL_FORMAT_COLORSPACE_YUV ||
221           format == PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8)
222          continue;
223 
224       if (!options->allow_srgb && desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
225          continue;
226 
227       if (!options->allow_subsampled && desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
228          continue;
229 
230       if (!options->allow_compressed && util_format_get_blockwidth(format) >= 4)
231          continue;
232 
233       if (color_or_zs != PIPE_FORMAT_NONE &&
234           (util_format_is_depth_or_stencil(color_or_zs) !=
235            util_format_is_depth_or_stencil(format)))
236          continue;
237 
238       if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
239          if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
240             /* Don't select stencil-only formats - we don't support them for rendering. */
241             if (util_format_has_stencil(desc) && !util_format_has_depth(desc))
242                continue;
243          }
244 
245          if (!options->allow_x_channels) {
246             unsigned i;
247 
248             /* Don't test formats with X channels because cpu_texture doesn't emulate them. */
249             for (i = 0; i < desc->nr_channels; i++) {
250                if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID)
251                   break;
252             }
253             if (i != desc->nr_channels)
254                continue;
255          }
256       }
257 
258       if (res_format != PIPE_FORMAT_NONE) {
259          /* If the resource format is Z/S, we handle it at the beginning of this function,
260           * so here res_format can only be a color format.
261           */
262          if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
263             continue;
264 
265          if (util_format_get_blocksize(res_format) != util_format_get_blocksize(format) ||
266              util_format_get_blockwidth(res_format) != util_format_get_blockwidth(format) ||
267              util_format_get_blockheight(res_format) != util_format_get_blockheight(format))
268             continue;
269       }
270 
271       if (integer_or_not != PIPE_FORMAT_NONE) {
272          /* The integer property must match between blit src/dst. */
273          if (util_format_is_pure_integer(integer_or_not) != util_format_is_pure_integer(format))
274             continue;
275       }
276 
277       if (options->only_resolve &&
278           (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS || util_format_is_pure_integer(format)))
279          continue;
280 
281       if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) {
282          /* Every integer format should have an equivalent non-integer format, but 128-bit integer
283           * formats don't have that if floats are disallowed, which can cause an infinite loop later
284           * if compat_type is non-integer.
285           */
286          if (!options->allow_float &&
287              (util_format_is_float(format) || util_format_get_blocksizebits(format) == 128))
288             continue;
289 
290          if (!options->allow_unorm16 &&
291              desc->channel[0].size == 16 && desc->channel[0].normalized &&
292              desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED)
293             continue;
294       }
295 
296       unsigned bind = PIPE_BIND_SAMPLER_VIEW;
297       if (render_target) {
298          if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
299             bind = PIPE_BIND_DEPTH_STENCIL;
300          else
301             bind = PIPE_BIND_RENDER_TARGET;
302       }
303 
304       if (sscreen->b.is_format_supported(&sscreen->b, format, PIPE_TEXTURE_2D, 1, 1, bind))
305          return format;
306    }
307 }
308 
309 #define MAX_ALLOC_SIZE (64 * 1024 * 1024)
310 
set_random_image_attrs(struct pipe_resource * templ,bool allow_msaa,bool only_cb_resolve)311 static void set_random_image_attrs(struct pipe_resource *templ, bool allow_msaa,
312                                    bool only_cb_resolve)
313 {
314    unsigned target_index;
315 
316    if (only_cb_resolve) {
317       target_index = 6; /* CB resolving doesn't support array textures. */
318    } else {
319       target_index = rand() % (allow_msaa ? 8 : 6);
320    }
321 
322    switch (target_index) {
323    case 0:
324       templ->target = PIPE_TEXTURE_1D;
325       break;
326    case 1:
327       templ->target = PIPE_TEXTURE_2D;
328       break;
329    case 2:
330       if (util_format_is_depth_or_stencil(templ->format))
331          templ->target = PIPE_TEXTURE_2D_ARRAY; /* 3D doesn't support Z/S */
332       else
333          templ->target = PIPE_TEXTURE_3D;
334       break;
335    case 3:
336       templ->target = PIPE_TEXTURE_RECT;
337       break;
338    case 4:
339       templ->target = PIPE_TEXTURE_1D_ARRAY;
340       break;
341    case 5:
342       templ->target = PIPE_TEXTURE_2D_ARRAY;
343       break;
344    case 6:
345       templ->target = PIPE_TEXTURE_2D;
346       templ->nr_samples = 2 << (rand() % 3);
347       break;
348    case 7:
349       templ->target = PIPE_TEXTURE_2D_ARRAY;
350       templ->nr_samples = 2 << (rand() % 3);
351       break;
352    default:
353       unreachable("invalid path");
354    }
355 
356    templ->usage = PIPE_USAGE_DEFAULT;
357 
358    templ->height0 = 1;
359    templ->depth0 = 1;
360    templ->array_size = 1;
361    templ->nr_storage_samples = templ->nr_samples;
362 
363    /* Try to hit microtiling in 1/2 of the cases. */
364    unsigned max_tex_size = rand() & 1 ? 128 : 1024;
365 
366    templ->width0 = (rand() % max_tex_size) + 1;
367 
368    if (templ->target != PIPE_TEXTURE_1D &&
369        templ->target != PIPE_TEXTURE_1D_ARRAY)
370       templ->height0 = (rand() % max_tex_size) + 1;
371 
372    if (templ->target == PIPE_TEXTURE_3D)
373       templ->depth0 = (rand() % max_tex_size) + 1;
374 
375    if (templ->target == PIPE_TEXTURE_1D_ARRAY ||
376        templ->target == PIPE_TEXTURE_2D_ARRAY)
377       templ->array_size = (rand() % max_tex_size) + 1;
378 
379    /* Keep reducing the size until it we get a small enough size. */
380    while (util_format_get_nblocks(templ->format, templ->width0, templ->height0) *
381           templ->depth0 * templ->array_size * util_format_get_blocksize(templ->format) >
382           MAX_ALLOC_SIZE) {
383       switch (rand() % 3) {
384       case 0:
385          if (templ->width0 > 1)
386             templ->width0 /= 2;
387          break;
388       case 1:
389          if (templ->height0 > 1)
390             templ->height0 /= 2;
391          break;
392       case 2:
393          if (templ->depth0 > 1)
394             templ->depth0 /= 2;
395          else if (templ->array_size > 1)
396             templ->array_size /= 2;
397          break;
398       }
399    }
400 
401    if (util_format_get_blockwidth(templ->format) == 2)
402       templ->width0 = align(templ->width0, 2);
403 
404    if (templ->target != PIPE_TEXTURE_RECT &&
405        util_format_description(templ->format)->layout != UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
406       unsigned max_dim = MAX3(templ->width0, templ->height0, templ->depth0);
407 
408       if (templ->nr_samples <= 1)
409          templ->last_level = rand() % (util_logbase2(max_dim) + 1);
410    }
411 }
412 
print_image_attrs(struct si_screen * sscreen,struct si_texture * tex)413 static void print_image_attrs(struct si_screen *sscreen, struct si_texture *tex)
414 {
415    const char *mode;
416 
417    if (sscreen->info.gfx_level >= GFX9) {
418       static const char *modes[32] = {
419          [ADDR_SW_LINEAR] = "LINEAR",
420          [ADDR_SW_4KB_S_X] = "4KB_S_X",
421          [ADDR_SW_4KB_D_X] = "4KB_D_X",
422          [ADDR_SW_64KB_Z_X] = "64KB_Z_X",
423          [ADDR_SW_64KB_S_X] = "64KB_S_X",
424          [ADDR_SW_64KB_D_X] = "64KB_D_X",
425          [ADDR_SW_64KB_R_X] = "64KB_R_X",
426       };
427       mode = modes[tex->surface.u.gfx9.swizzle_mode];
428    } else {
429       static const char *modes[32] = {
430          [RADEON_SURF_MODE_LINEAR_ALIGNED] = "LINEAR",
431          [RADEON_SURF_MODE_1D] = "1D_TILED",
432          [RADEON_SURF_MODE_2D] = "2D_TILED",
433       };
434       mode = modes[tex->surface.u.legacy.level[0].mode];
435    }
436 
437    if (!mode)
438       mode = "UNKNOWN";
439 
440    static const char *targets[PIPE_MAX_TEXTURE_TYPES] = {
441       [PIPE_TEXTURE_1D] = "1D",
442       [PIPE_TEXTURE_2D] = "2D",
443       [PIPE_TEXTURE_3D] = "3D",
444       [PIPE_TEXTURE_RECT] = "RECT",
445       [PIPE_TEXTURE_1D_ARRAY] = "1D_ARRAY",
446       [PIPE_TEXTURE_2D_ARRAY] = "2D_ARRAY",
447    };
448 
449    char size[64];
450    if (tex->buffer.b.b.target == PIPE_TEXTURE_1D)
451       snprintf(size, sizeof(size), "%u", tex->buffer.b.b.width0);
452    else if (tex->buffer.b.b.target == PIPE_TEXTURE_2D ||
453             tex->buffer.b.b.target == PIPE_TEXTURE_RECT)
454       snprintf(size, sizeof(size), "%ux%u", tex->buffer.b.b.width0, tex->buffer.b.b.height0);
455    else
456       snprintf(size, sizeof(size), "%ux%ux%u", tex->buffer.b.b.width0, tex->buffer.b.b.height0,
457                util_num_layers(&tex->buffer.b.b, 0));
458 
459    printf("%8s, %14s, %2u %7s, %8s", targets[tex->buffer.b.b.target], size,
460           tex->buffer.b.b.nr_samples > 1 ? tex->buffer.b.b.nr_samples : tex->buffer.b.b.last_level + 1,
461           tex->buffer.b.b.nr_samples > 1 ? "samples" : "levels", mode);
462 }
463 
si_test_image_copy_region(struct si_screen * sscreen)464 void si_test_image_copy_region(struct si_screen *sscreen)
465 {
466    struct pipe_screen *screen = &sscreen->b;
467    struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
468    struct si_context *sctx = (struct si_context *)ctx;
469    unsigned i, iterations, num_partial_copies;
470    unsigned num_pass = 0, num_fail = 0;
471 
472    /* the seed for random test parameters */
473    srand(0x9b47d95b);
474    /* the seed for random pixel data */
475    s_rand_xorshift128plus(seed_xorshift128plus, false);
476 
477    iterations = 1000000000; /* just kill it when you are bored */
478    num_partial_copies = 30;
479 
480    /* These parameters are randomly generated per test:
481     * - which texture dimensions to use
482     * - random initial pixels in src
483     * - execute multiple subrectangle copies for partial blits
484     */
485    for (i = 0; i < iterations; i++) {
486       struct pipe_resource tsrc = {}, tdst = {}, *src, *dst;
487       struct si_texture *sdst;
488       struct si_texture *ssrc;
489       struct cpu_texture src_cpu[RADEON_SURF_MAX_LEVELS], dst_cpu[RADEON_SURF_MAX_LEVELS];
490       unsigned max_width, max_height, max_depth, j;
491       unsigned gfx_blits = 0, cs_blits = 0;
492       bool pass;
493 
494       /* generate a random test case */
495       struct si_format_options format_options = {
496          .only_resolve = false,
497          .allow_float = true,
498          .allow_unorm16 = true,
499          .allow_x_channels = false, /* cpu_texture doesn't implement X channels */
500          .allow_subsampled = false, /* TODO: fix subsampled formats */
501          .allow_compressed = false, /* TODO: fix compressed formats */
502       };
503 
504       tsrc.format = tdst.format = get_random_format(sscreen, false, 0, 0, 0, &format_options);
505 
506       /* MSAA copy testing not implemented and might be too difficult because of how
507        * cpu_texture works.
508        */
509       set_random_image_attrs(&tsrc, false, false);
510       set_random_image_attrs(&tdst, false, false);
511 
512       /* Allocate textures (both the GPU and CPU copies).
513        * The CPU will emulate what the GPU should be doing.
514        */
515       src = screen->resource_create(screen, &tsrc);
516       dst = screen->resource_create(screen, &tdst);
517       assert(src);
518       assert(dst);
519       sdst = (struct si_texture *)dst;
520       ssrc = (struct si_texture *)src;
521 
522       printf("%4u: dst = (", i);
523       print_image_attrs(sscreen, sdst);
524       printf("), src = (");
525       print_image_attrs(sscreen, ssrc);
526       printf("), format = %20s, ", util_format_description(tsrc.format)->short_name);
527       fflush(stdout);
528 
529       for (unsigned level = 0; level <= tsrc.last_level; level++) {
530          alloc_cpu_texture(&src_cpu[level], &tsrc, level);
531          set_random_pixels(ctx, src, &src_cpu[level], level);
532       }
533       for (unsigned level = 0; level <= tdst.last_level; level++) {
534          alloc_cpu_texture(&dst_cpu[level], &tdst, level);
535          memset(dst_cpu[level].ptr, 0, dst_cpu[level].layer_stride * util_num_layers(&tdst, level));
536       }
537 
538       /* clear dst pixels */
539       uint32_t zero = 0;
540       si_barrier_before_simple_buffer_op(sctx, 0, dst, NULL);
541       si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, &zero, 4,
542                       SI_AUTO_SELECT_CLEAR_METHOD, false);
543       si_barrier_after_simple_buffer_op(sctx, 0, dst, NULL);
544 
545       for (j = 0; j < num_partial_copies; j++) {
546          int width, height, depth;
547          int srcx, srcy, srcz, dstx, dsty, dstz;
548          struct pipe_box box;
549          unsigned old_num_draw_calls = sctx->num_draw_calls;
550          unsigned old_num_cs_calls = sctx->num_compute_calls;
551 
552          unsigned src_level = j % (tsrc.last_level + 1);
553          unsigned dst_level = j % (tdst.last_level + 1);
554 
555          max_width = MIN2(u_minify(tsrc.width0, src_level), u_minify(tdst.width0, dst_level));
556          max_height = MIN2(u_minify(tsrc.height0, src_level), u_minify(tdst.height0, dst_level));
557          max_depth = MIN2(util_num_layers(&tsrc, src_level), util_num_layers(&tdst, dst_level));
558 
559          /* random sub-rectangle copies from src to dst */
560          depth = (rand() % max_depth) + 1;
561          srcz = rand() % (util_num_layers(&tsrc, src_level) - depth + 1);
562          dstz = rand() % (util_num_layers(&tdst, dst_level) - depth + 1);
563 
564          /* just make sure that it doesn't divide by zero */
565          assert(max_width > 0 && max_height > 0);
566 
567          width = (rand() % max_width) + 1;
568          height = (rand() % max_height) + 1;
569 
570          srcx = rand() % (u_minify(tsrc.width0, src_level) - width + 1);
571          srcy = rand() % (u_minify(tsrc.height0, src_level) - height + 1);
572 
573          dstx = rand() % (u_minify(tdst.width0, dst_level) - width + 1);
574          dsty = rand() % (u_minify(tdst.height0, dst_level) - height + 1);
575 
576          /* Align the box to the format block size. */
577          srcx &= ~(util_format_get_blockwidth(src->format) - 1);
578          srcy &= ~(util_format_get_blockheight(src->format) - 1);
579 
580          dstx &= ~(util_format_get_blockwidth(dst->format) - 1);
581          dsty &= ~(util_format_get_blockheight(dst->format) - 1);
582 
583          width = align(width, util_format_get_blockwidth(src->format));
584          height = align(height, util_format_get_blockheight(src->format));
585 
586          /* GPU copy */
587          u_box_3d(srcx, srcy, srcz, width, height, depth, &box);
588          si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, src, src_level, &box);
589 
590          /* See which engine was used. */
591          gfx_blits += sctx->num_draw_calls > old_num_draw_calls;
592          cs_blits += sctx->num_compute_calls > old_num_cs_calls;
593 
594          /* CPU copy */
595          util_copy_box(dst_cpu[dst_level].ptr, tdst.format, dst_cpu[dst_level].stride,
596                        dst_cpu[dst_level].layer_stride, dstx, dsty, dstz,
597                        width, height, depth, src_cpu[src_level].ptr, src_cpu[src_level].stride,
598                        src_cpu[src_level].layer_stride, srcx, srcy, srcz);
599       }
600 
601       pass = true;
602       for (unsigned level = 0; level <= tdst.last_level; level++)
603          pass &= compare_textures(ctx, dst, &dst_cpu[level], level);
604 
605       if (pass)
606          num_pass++;
607       else
608          num_fail++;
609 
610       printf("BLITs: GFX = %2u, CS = %2u, %s [%u/%u]\n", gfx_blits, cs_blits,
611              pass ? "pass" : "fail", num_pass, num_pass + num_fail);
612 
613       /* cleanup */
614       pipe_resource_reference(&src, NULL);
615       pipe_resource_reference(&dst, NULL);
616       for (unsigned level = 0; level <= tsrc.last_level; level++)
617          free(src_cpu[level].ptr);
618       for (unsigned level = 0; level <= tdst.last_level; level++)
619          free(dst_cpu[level].ptr);
620    }
621 
622    ctx->destroy(ctx);
623    exit(0);
624 }
625 
si_test_blit(struct si_screen * sscreen,unsigned test_flags)626 void si_test_blit(struct si_screen *sscreen, unsigned test_flags)
627 {
628    struct pipe_screen *screen = &sscreen->b;
629    struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
630    struct si_context *sctx = (struct si_context *)ctx;
631    unsigned iterations;
632    unsigned num_pass = 0, num_fail = 0;
633    bool only_cb_resolve = test_flags == DBG(TEST_CB_RESOLVE);
634 
635    bool allow_float = false;
636    bool allow_unorm16_dst = false;
637    bool allow_srgb_dst = false;
638    bool allow_filter = false;
639    bool allow_scaled_min = false;
640    bool allow_scaled_mag = false;
641    bool allow_out_of_bounds_dst = false;
642    bool allow_out_of_bounds_src = false;
643    bool allow_scissor = false;
644    bool allow_flip = false;
645 
646    /* The following tests always compare the tested operation with the gfx blit (u_blitter). */
647    switch (test_flags) {
648    case DBG(TEST_CB_RESOLVE):
649       /* This is mostly failing because the precision of CB_RESOLVE is very different
650        * from the gfx blit. FP32 and FP16 are the only formats that mostly pass.
651        */
652       allow_float = true;
653       allow_unorm16_dst = true;
654       allow_srgb_dst = true;
655       break;
656 
657    case DBG(TEST_COMPUTE_BLIT):
658       //allow_float = true;      /* precision difference: NaNs not preserved by CB (u_blitter) */
659       allow_unorm16_dst = true;
660       //allow_srgb_dst = true;   /* precision difference: sRGB is less precise in CB (u_blitter) */
661       //allow_filter = true;     /* not implemented by compute blits, lots of precision differences */
662       //allow_scaled_min = true; /* not implemented by compute blits, lots of precision differences */
663       //allow_scaled_mag = true; /* not implemented by compute blits, lots of precision differences */
664       allow_out_of_bounds_dst = true;
665       allow_out_of_bounds_src = true;
666       //allow_scissor = true;    /* not implemented by compute blits */
667       allow_flip = true;
668       break;
669 
670    default:
671       assert(0);
672    }
673 
674    /* the seed for random test parameters */
675    srand(0x9b47d95b);
676    /* the seed for random pixel data */
677    s_rand_xorshift128plus(seed_xorshift128plus, false);
678 
679    iterations = 10000000; /* just kill it when you are bored */
680 
681    /* These parameters are randomly generated per test:
682     * - which texture dimensions to use
683     * - random initial pixels in src
684     * - random pipe_blit_info
685     */
686    for (unsigned i = 0; i < iterations; i++) {
687       struct pipe_resource tsrc = {}, tdst = {}, *gfx_src, *gfx_dst, *comp_src, *comp_dst;
688 
689       /* Generate a random test case. */
690       {
691          struct si_format_options format_options = {
692             .only_resolve = only_cb_resolve,
693             .allow_float = allow_float,
694             .allow_unorm16 = true,
695             .allow_srgb = true,
696             .allow_x_channels = true,
697             .allow_subsampled = false, /* TODO: fix subsampled formats */
698             .allow_compressed = false, /* TODO: fix compressed formats */
699          };
700 
701          tsrc.format = get_random_format(sscreen, false, 0, 0, 0, &format_options);
702          tdst.format = get_random_format(sscreen, true, tsrc.format, 0, 0, &format_options);
703       }
704 
705       set_random_image_attrs(&tsrc, true, only_cb_resolve);
706       set_random_image_attrs(&tdst, !only_cb_resolve, false);
707 
708       /* MSAA blits must have matching sample counts. */
709       if (tsrc.nr_samples > 1 && tdst.nr_samples > 1)
710          tdst.nr_samples = tdst.nr_storage_samples = tsrc.nr_samples;
711 
712       /* Allocate textures. */
713       gfx_src = screen->resource_create(screen, &tsrc);
714       gfx_dst = screen->resource_create(screen, &tdst);
715       comp_src = screen->resource_create(screen, &tsrc);
716       comp_dst = screen->resource_create(screen, &tdst);
717 
718       /* clear dst pixels */
719       uint32_t zero = 0;
720 
721       /* Using 2 consecutive barriers calls results in a single merged barrier for both resources. */
722       si_barrier_before_simple_buffer_op(sctx, 0, gfx_dst, NULL);
723       si_barrier_before_simple_buffer_op(sctx, 0, comp_dst, NULL);
724       si_clear_buffer(sctx, gfx_dst, 0, ((struct si_texture *)gfx_dst)->surface.surf_size, &zero,
725                       4, SI_AUTO_SELECT_CLEAR_METHOD, false);
726       si_clear_buffer(sctx, comp_dst, 0, ((struct si_texture *)comp_dst)->surface.surf_size, &zero,
727                       4, SI_AUTO_SELECT_CLEAR_METHOD, false);
728       si_barrier_after_simple_buffer_op(sctx, 0, gfx_dst, NULL);
729       si_barrier_after_simple_buffer_op(sctx, 0, comp_dst, NULL);
730 
731       /* TODO: These two fix quite a lot of BCn cases. */
732       /*si_clear_buffer(sctx, gfx_src, 0, ((struct si_texture *)gfx_src)->surface.surf_size, &zero,
733                       4, SI_OP_SYNC_BEFORE_AFTER, SI_COHERENCY_SHADER, SI_AUTO_SELECT_CLEAR_METHOD);
734       si_clear_buffer(sctx, comp_src, 0, ((struct si_texture *)comp_src)->surface.surf_size, &zero,
735                       4, SI_OP_SYNC_BEFORE_AFTER, SI_COHERENCY_SHADER, SI_AUTO_SELECT_CLEAR_METHOD);*/
736 
737       set_random_pixels_for_2_textures(ctx, gfx_src, comp_src);
738 
739       struct pipe_blit_info info;
740       memset(&info, 0, sizeof(info));
741 
742       {
743          struct si_format_options format_options = {
744             .only_resolve = only_cb_resolve,
745             .allow_float = allow_float,
746             .allow_unorm16 = true,
747             .allow_srgb = true,
748             .allow_x_channels = true,
749             .allow_subsampled = false, /* TODO: fix subsampled formats */
750             .allow_compressed = false, /* TODO: fix compressed formats */
751          };
752 
753          info.src.format = get_random_format(sscreen, false, 0, tsrc.format, 0, &format_options);
754          format_options.allow_unorm16 = allow_unorm16_dst;
755          format_options.allow_srgb = allow_srgb_dst;
756          info.dst.format = get_random_format(sscreen, true, 0, tdst.format, info.src.format,
757                                              &format_options);
758       }
759 
760       printf("%4u: dst = (", i);
761       print_image_attrs(sscreen, (struct si_texture *)gfx_dst);
762       printf(", %20s), src = (", util_format_short_name(info.dst.format));
763       print_image_attrs(sscreen, (struct si_texture *)gfx_src);
764       printf(", %20s)", util_format_short_name(info.src.format));
765       fflush(stdout);
766 
767       int src_width, src_height, src_depth, dst_width, dst_height, dst_depth;
768       int srcx, srcy, srcz, dstx, dsty, dstz;
769 
770       unsigned src_level = rand() % (tsrc.last_level + 1);
771       unsigned dst_level = rand() % (tdst.last_level + 1);
772 
773       unsigned max_src_width = u_minify(tsrc.width0, src_level);
774       unsigned max_src_height = u_minify(tsrc.height0, src_level);
775       unsigned max_src_depth = util_num_layers(&tsrc, src_level);
776 
777       unsigned max_dst_width = u_minify(tdst.width0, dst_level);
778       unsigned max_dst_height = u_minify(tdst.height0, dst_level);
779       unsigned max_dst_depth = util_num_layers(&tdst, dst_level);
780 
781       /* make sure that it doesn't divide by zero */
782       assert(max_src_width && max_src_height && max_src_depth &&
783              max_dst_width && max_dst_height && max_dst_depth);
784 
785       /* random sub-rectangle copies from src to dst */
786       src_width = (rand() % max_src_width) + 1;
787       src_height = (rand() % max_src_height) + 1;
788       src_depth = (rand() % max_src_depth) + 1;
789 
790       dst_width = (rand() % max_dst_width) + 1;
791       dst_height = (rand() % max_dst_height) + 1;
792       dst_depth = (rand() % max_dst_depth) + 1;
793 
794       srcx = rand() % (u_minify(tsrc.width0, src_level) - src_width + 1);
795       srcy = rand() % (u_minify(tsrc.height0, src_level) - src_height + 1);
796       srcz = rand() % (util_num_layers(&tsrc, src_level) - src_depth + 1);
797 
798       dstx = rand() % (u_minify(tdst.width0, dst_level) - dst_width + 1);
799       dsty = rand() % (u_minify(tdst.height0, dst_level) - dst_height + 1);
800       dstz = rand() % (util_num_layers(&tdst, dst_level) - dst_depth + 1);
801 
802       /* Test out-of-bounds boxes. Add -dim/10 .. +dim/10 */
803       if (allow_out_of_bounds_src) {
804          if (max_src_width / 5 >= 2)
805             srcx += rand() % (max_src_width / 5) - max_src_width / 10;
806          if (max_src_height / 5 >= 2)
807             srcy += rand() % (max_src_height / 5) - max_src_height / 10;
808       }
809 
810       if (allow_out_of_bounds_dst) {
811          if (max_dst_width / 5 >= 2)
812             dstx += rand() % (max_dst_width / 5) - max_dst_width / 10;
813          if (max_dst_height / 5 >= 2)
814             dsty += rand() % (max_dst_height / 5) - max_dst_height / 10;
815       }
816 
817       /* Align the box to the format block size. */
818       srcx &= ~(util_format_get_blockwidth(tsrc.format) - 1);
819       srcy &= ~(util_format_get_blockheight(tsrc.format) - 1);
820 
821       dstx &= ~(util_format_get_blockwidth(tdst.format) - 1);
822       dsty &= ~(util_format_get_blockheight(tdst.format) - 1);
823 
824       src_width = align(src_width, util_format_get_blockwidth(tsrc.format));
825       src_height = align(src_height, util_format_get_blockheight(tsrc.format));
826 
827       dst_width = align(dst_width, util_format_get_blockwidth(tdst.format));
828       dst_height = align(dst_height, util_format_get_blockheight(tdst.format));
829 
830       if (!allow_scaled_min) {
831          if (src_width > dst_width)
832             src_width = dst_width;
833          if (src_height > dst_height)
834             src_height = dst_height;
835          if (src_depth > dst_depth)
836             src_depth = dst_depth;
837       }
838 
839       if (!allow_scaled_mag) {
840          if (src_width < dst_width)
841             dst_width = src_width;
842          if (src_height < dst_height)
843             dst_height = src_height;
844          if (src_depth < dst_depth)
845             dst_depth = src_depth;
846       }
847 
848       /* Flips */
849       if (allow_flip) {
850          if (rand() % 2) {
851             srcx += src_width;
852             src_width = -src_width;
853          }
854          if (rand() % 2) {
855             srcy += src_height;
856             src_height = -src_height;
857          }
858       }
859 
860       info.src.level = src_level;
861       info.dst.level = dst_level;
862 
863       u_box_3d(srcx, srcy, srcz, src_width, src_height, src_depth, &info.src.box);
864       u_box_3d(dstx, dsty, dstz, dst_width, dst_height, dst_depth, &info.dst.box);
865 
866       if (util_format_is_depth_and_stencil(tsrc.format)) {
867          switch (rand() % 3) {
868          case 0:
869             info.mask = PIPE_MASK_ZS;
870             break;
871          case 1:
872             info.mask = PIPE_MASK_Z;
873             break;
874          case 2:
875             info.mask = PIPE_MASK_S;
876             break;
877          }
878       } else {
879          /* RGBA, Z, or S */
880          info.mask = util_format_get_mask(tdst.format);
881       }
882 
883       /* Don't filter MSAA and integer sources. */
884       if (allow_filter && tsrc.nr_samples <= 1 &&
885           !util_format_is_pure_integer(info.src.format) && rand() % 2)
886          info.filter = PIPE_TEX_FILTER_LINEAR;
887       else
888          info.filter = PIPE_TEX_FILTER_NEAREST;
889 
890       info.scissor_enable = allow_scissor ? rand() % 2 : false;
891 
892       if (info.scissor_enable) {
893          info.scissor.minx = MAX2(MIN2(info.dst.box.x, info.dst.box.x + info.dst.box.width), 0);
894          info.scissor.miny = MAX2(MIN2(info.dst.box.y, info.dst.box.y + info.dst.box.height), 0);
895          info.scissor.maxx = MIN2(MAX2(info.dst.box.x, info.dst.box.x + info.dst.box.width), UINT16_MAX);
896          info.scissor.maxy = MIN2(MAX2(info.dst.box.y, info.dst.box.y + info.dst.box.height), UINT16_MAX);
897 
898          if (abs(info.dst.box.width) / 2 >= 2) {
899             info.scissor.minx += rand() % (abs(info.dst.box.width) / 2);
900             info.scissor.maxx -= rand() % (abs(info.dst.box.width) / 2);
901          }
902          if (abs(info.dst.box.height) / 2 >= 2) {
903             info.scissor.miny += rand() % (abs(info.dst.box.height) / 2);
904             info.scissor.maxy -= rand() % (abs(info.dst.box.height) / 2);
905          }
906       }
907 
908       char dstbox_s[128], srcbox_s[128], scissor[128];
909 
910       snprintf(dstbox_s, sizeof(dstbox_s), "{%ix%ix%i .. %ix%ix%i}",
911                info.dst.box.x, info.dst.box.y, info.dst.box.z,
912                info.dst.box.width, info.dst.box.height, info.dst.box.depth);
913       snprintf(srcbox_s, sizeof(srcbox_s), "{%ix%ix%i .. %ix%ix%i}",
914                info.src.box.x, info.src.box.y, info.src.box.z,
915                info.src.box.width, info.src.box.height, info.src.box.depth);
916       if (info.scissor_enable) {
917          snprintf(scissor, sizeof(scissor), "(%u..%u, %u..%u)",
918                   info.scissor.minx, info.scissor.maxx, info.scissor.miny, info.scissor.maxy);
919       } else {
920          snprintf(scissor, sizeof(scissor), "(none)");
921       }
922 
923       printf(", filter %u, mask 0x%02x, ", info.filter, info.mask);
924       printf("dst(level %u, box = %-28s), ", info.dst.level, dstbox_s);
925       printf("src(level %u, box = %-28s), ", info.src.level, srcbox_s);
926       printf("scissor%-20s", scissor);
927 
928       /* Blits. */
929       info.src.resource = gfx_src;
930       info.dst.resource = gfx_dst;
931       si_gfx_blit(ctx, &info);
932 
933       info.src.resource = comp_src;
934       info.dst.resource = comp_dst;
935 
936       bool success;
937       if (only_cb_resolve)
938          success = si_msaa_resolve_blit_via_CB(ctx, &info, false);
939       else
940          success = si_compute_blit(sctx, &info, NULL, 0, 0, false);
941 
942       if (success) {
943          printf(" %-7s", only_cb_resolve ? "resolve" : "comp");
944       } else {
945          si_gfx_blit(ctx, &info);
946          printf(" %-7s", "gfx");
947       }
948 
949       bool pass = compare_gpu_textures(ctx, gfx_dst, comp_dst);
950       if (pass)
951          num_pass++;
952       else
953          num_fail++;
954 
955       printf(" %s [%u/%u]\n", pass ? "pass" : "fail", num_pass, num_pass + num_fail);
956 
957       /* cleanup */
958       pipe_resource_reference(&gfx_src, NULL);
959       pipe_resource_reference(&gfx_dst, NULL);
960       pipe_resource_reference(&comp_src, NULL);
961       pipe_resource_reference(&comp_dst, NULL);
962    }
963 
964    ctx->destroy(ctx);
965    exit(0);
966 }
967