xref: /aosp_15_r20/external/mesa3d/src/amd/common/ac_surface_meta_address_test.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * SPDX-License-Identifier: MIT
6  */
7 
8 /* Make the test not meaningless when asserts are disabled. */
9 #undef NDEBUG
10 
11 #include <assert.h>
12 #include <inttypes.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 
16 #include <amdgpu.h>
17 #include "drm-uapi/amdgpu_drm.h"
18 #include "drm-uapi/drm_fourcc.h"
19 
20 #include "ac_surface.h"
21 #include "util/macros.h"
22 #include "util/u_atomic.h"
23 #include "util/u_math.h"
24 #include "util/u_vector.h"
25 #include "util/mesa-sha1.h"
26 #include "addrlib/inc/addrinterface.h"
27 
28 #include "ac_surface_test_common.h"
29 
30 /*
31  * The main goal of this test is to validate that our dcc/htile addressing
32  * functions match addrlib behavior.
33  */
34 
35 /* DCC address computation without mipmapping.
36  * CMASK address computation without mipmapping and without multisampling.
37  */
gfx9_meta_addr_from_coord(const struct radeon_info * info,const struct gfx9_addr_meta_equation * eq,unsigned meta_block_width,unsigned meta_block_height,unsigned meta_block_depth,unsigned meta_pitch,unsigned meta_height,unsigned x,unsigned y,unsigned z,unsigned sample,unsigned pipe_xor,unsigned * bit_position)38 static unsigned gfx9_meta_addr_from_coord(const struct radeon_info *info,
39                                           /* Shader key inputs: */
40                                           /* equation varies with resource_type, swizzle_mode,
41                                            * bpp, number of fragments, pipe_aligned, rb_aligned */
42                                           const struct gfx9_addr_meta_equation *eq,
43                                           unsigned meta_block_width, unsigned meta_block_height,
44                                           unsigned meta_block_depth,
45                                           /* Shader inputs: */
46                                           unsigned meta_pitch, unsigned meta_height,
47                                           unsigned x, unsigned y, unsigned z,
48                                           unsigned sample, unsigned pipe_xor,
49                                           /* Shader outputs (CMASK only): */
50                                           unsigned *bit_position)
51 {
52    /* The compiled shader shouldn't be complicated considering there are a lot of constants here. */
53    unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
54    unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
55    unsigned meta_block_depth_log2 = util_logbase2(meta_block_depth);
56 
57    unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
58    unsigned numPipeBits = eq->numPipeBits;
59    unsigned pitchInBlock = meta_pitch >> meta_block_width_log2;
60    unsigned sliceSizeInBlock = (meta_height >> meta_block_height_log2) * pitchInBlock;
61 
62    unsigned xb = x >> meta_block_width_log2;
63    unsigned yb = y >> meta_block_height_log2;
64    unsigned zb = z >> meta_block_depth_log2;
65 
66    unsigned blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
67    unsigned coords[] = {x, y, z, sample, blockIndex};
68 
69    unsigned address = 0;
70    unsigned num_bits = eq->num_bits;
71    assert(num_bits <= 32);
72 
73    /* Compute the address up until the last bit that doesn't use the block index. */
74    for (unsigned b = 0; b < num_bits - 1; b++) {
75       unsigned xor = 0;
76       for (unsigned c = 0; c < 5; c++) {
77          if (eq->bit[b].coord[c].dim >= 5)
78             continue;
79 
80          assert(eq->bit[b].coord[c].ord < 32);
81          unsigned ison = (coords[eq->bit[b].coord[c].dim] >>
82                                  eq->bit[b].coord[c].ord) & 0x1;
83 
84          xor ^= ison;
85       }
86       address |= xor << b;
87    }
88 
89    /* Fill the remaining bits with the block index. */
90    unsigned last = num_bits - 1;
91    address |= (blockIndex >> eq->bit[last].coord[0].ord) << last;
92 
93    if (bit_position)
94       *bit_position = (address & 1) << 2;
95 
96    unsigned pipeXor = pipe_xor & ((1 << numPipeBits) - 1);
97    return (address >> 1) ^ (pipeXor << m_pipeInterleaveLog2);
98 }
99 
100 /* DCC/CMASK/HTILE address computation for GFX10. */
gfx10_meta_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned meta_block_width,unsigned meta_block_height,unsigned blkSizeLog2,unsigned meta_pitch,unsigned meta_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor,unsigned * bit_position)101 static unsigned gfx10_meta_addr_from_coord(const struct radeon_info *info,
102                                            /* Shader key inputs: */
103                                            const uint16_t *equation,
104                                            unsigned meta_block_width, unsigned meta_block_height,
105                                            unsigned blkSizeLog2,
106                                            /* Shader inputs: */
107                                            unsigned meta_pitch, unsigned meta_slice_size,
108                                            unsigned x, unsigned y, unsigned z,
109                                            unsigned pipe_xor,
110                                            /* Shader outputs: (CMASK only) */
111                                            unsigned *bit_position)
112 {
113    /* The compiled shader shouldn't be complicated considering there are a lot of constants here. */
114    unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
115    unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
116 
117    unsigned coord[] = {x, y, z, 0};
118    unsigned address = 0;
119 
120    for (unsigned i = 0; i < blkSizeLog2 + 1; i++) {
121       unsigned v = 0;
122 
123       for (unsigned c = 0; c < 4; c++) {
124          if (equation[i*4+c] != 0) {
125             unsigned mask = equation[i*4+c];
126             unsigned bits = coord[c];
127 
128             while (mask)
129                v ^= (bits >> u_bit_scan(&mask)) & 0x1;
130          }
131       }
132 
133       address |= v << i;
134    }
135 
136    unsigned blkMask = (1 << blkSizeLog2) - 1;
137    unsigned pipeMask = (1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) - 1;
138    unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
139    unsigned xb = x >> meta_block_width_log2;
140    unsigned yb = y >> meta_block_height_log2;
141    unsigned pb = meta_pitch >> meta_block_width_log2;
142    unsigned blkIndex = (yb * pb) + xb;
143    unsigned pipeXor = ((pipe_xor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
144 
145    if (bit_position)
146       *bit_position = (address & 1) << 2;
147 
148    return (meta_slice_size * z) +
149           (blkIndex * (1 << blkSizeLog2)) +
150           ((address >> 1) ^ pipeXor);
151 }
152 
153 /* DCC address computation without mipmapping and MSAA. */
gfx10_dcc_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned bpp,unsigned meta_block_width,unsigned meta_block_height,unsigned dcc_pitch,unsigned dcc_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor)154 static unsigned gfx10_dcc_addr_from_coord(const struct radeon_info *info,
155                                           /* Shader key inputs: */
156                                           /* equation varies with bpp and pipe_aligned */
157                                           const uint16_t *equation, unsigned bpp,
158                                           unsigned meta_block_width, unsigned meta_block_height,
159                                           /* Shader inputs: */
160                                           unsigned dcc_pitch, unsigned dcc_slice_size,
161                                           unsigned x, unsigned y, unsigned z,
162                                           unsigned pipe_xor)
163 {
164    unsigned bpp_log2 = util_logbase2(bpp >> 3);
165    unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
166    unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
167    unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 + bpp_log2 - 8;
168 
169    return gfx10_meta_addr_from_coord(info, equation,
170                                      meta_block_width, meta_block_height,
171                                      blkSizeLog2,
172                                      dcc_pitch, dcc_slice_size,
173                                      x, y, z, pipe_xor, NULL);
174 }
175 
one_dcc_address_test(const char * name,const char * test,ADDR_HANDLE addrlib,const struct radeon_info * info,unsigned width,unsigned height,unsigned depth,unsigned samples,unsigned bpp,unsigned swizzle_mode,bool pipe_aligned,bool rb_aligned,unsigned mrt_index,unsigned start_x,unsigned start_y,unsigned start_z,unsigned start_sample)176 static bool one_dcc_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,
177                                  const struct radeon_info *info, unsigned width, unsigned height,
178                                  unsigned depth, unsigned samples, unsigned bpp,
179                                  unsigned swizzle_mode, bool pipe_aligned, bool rb_aligned,
180                                  unsigned mrt_index,
181                                  unsigned start_x, unsigned start_y, unsigned start_z,
182                                  unsigned start_sample)
183 {
184    ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT)};
185    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT)};
186    ADDR2_COMPUTE_DCCINFO_INPUT din = {sizeof(din)};
187    ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {sizeof(dout)};
188    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT in = {sizeof(in)};
189    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT out = {sizeof(out)};
190    ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
191 
192    dout.pMipInfo = meta_mip_info;
193 
194    /* Compute DCC info. */
195    in.dccKeyFlags.pipeAligned = din.dccKeyFlags.pipeAligned = pipe_aligned;
196    in.dccKeyFlags.rbAligned = din.dccKeyFlags.rbAligned = rb_aligned;
197    xin.resourceType = in.resourceType = din.resourceType = ADDR_RSRC_TEX_2D;
198    xin.swizzleMode = in.swizzleMode = din.swizzleMode = swizzle_mode;
199    in.bpp = din.bpp = bpp;
200    xin.numFrags = xin.numSamples = in.numFrags = din.numFrags = samples;
201    in.numMipLevels = din.numMipLevels = 1; /* addrlib can't do DccAddrFromCoord with mipmapping */
202    din.unalignedWidth = width;
203    din.unalignedHeight = height;
204    din.numSlices = depth;
205    din.firstMipIdInTail = 1;
206 
207    int ret = Addr2ComputeDccInfo(addrlib, &din, &dout);
208    assert(ret == ADDR_OK);
209 
210    /* Compute xor. */
211    static AddrFormat format[] = {
212       ADDR_FMT_8,
213       ADDR_FMT_16,
214       ADDR_FMT_32,
215       ADDR_FMT_32_32,
216       ADDR_FMT_32_32_32_32,
217    };
218    xin.flags.color = 1;
219    xin.flags.texture = 1;
220    xin.flags.opt4space = 1;
221    xin.flags.metaRbUnaligned = !rb_aligned;
222    xin.flags.metaPipeUnaligned = !pipe_aligned;
223    xin.format = format[util_logbase2(bpp / 8)];
224    xin.surfIndex = mrt_index;
225 
226    ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);
227    assert(ret == ADDR_OK);
228 
229    /* Compute addresses */
230    in.compressBlkWidth = dout.compressBlkWidth;
231    in.compressBlkHeight = dout.compressBlkHeight;
232    in.compressBlkDepth = dout.compressBlkDepth;
233    in.metaBlkWidth = dout.metaBlkWidth;
234    in.metaBlkHeight = dout.metaBlkHeight;
235    in.metaBlkDepth = dout.metaBlkDepth;
236    in.dccRamSliceSize = dout.dccRamSliceSize;
237 
238    in.mipId = 0;
239    in.pitch = dout.pitch;
240    in.height = dout.height;
241    in.pipeXor = xout.pipeBankXor;
242 
243    /* Validate that the packed gfx9_meta_equation structure can fit all fields. */
244    const struct gfx9_meta_equation eq;
245    if (info->gfx_level == GFX9) {
246       /* The bit array is smaller in gfx9_meta_equation than in addrlib. */
247       assert(dout.equation.gfx9.num_bits <= ARRAY_SIZE(eq.u.gfx9.bit));
248    } else {
249       /* gfx9_meta_equation doesn't store the first 4 and the last 8 elements. They must be 0. */
250       for (unsigned i = 0; i < 4; i++)
251          assert(dout.equation.gfx10_bits[i] == 0);
252 
253       for (unsigned i = ARRAY_SIZE(eq.u.gfx10_bits) + 4; i < 68; i++)
254          assert(dout.equation.gfx10_bits[i] == 0);
255    }
256 
257    for (in.x = start_x; in.x < in.pitch; in.x += dout.compressBlkWidth) {
258       for (in.y = start_y; in.y < in.height; in.y += dout.compressBlkHeight) {
259          for (in.slice = start_z; in.slice < depth; in.slice += dout.compressBlkDepth) {
260             for (in.sample = start_sample; in.sample < samples; in.sample++) {
261                int r = Addr2ComputeDccAddrFromCoord(addrlib, &in, &out);
262                if (r != ADDR_OK) {
263                   printf("%s addrlib error: %s\n", name, test);
264                   abort();
265                }
266 
267                unsigned addr;
268                if (info->gfx_level == GFX9) {
269                   addr = gfx9_meta_addr_from_coord(info, &dout.equation.gfx9, dout.metaBlkWidth, dout.metaBlkHeight,
270                                                    dout.metaBlkDepth, dout.pitch, dout.height,
271                                                    in.x, in.y, in.slice, in.sample, in.pipeXor, NULL);
272                   if (in.sample == 1) {
273                      /* Sample 0 should be one byte before sample 1. The DCC MSAA clear relies on it. */
274                      assert(addr - 1 ==
275                             gfx9_meta_addr_from_coord(info, &dout.equation.gfx9, dout.metaBlkWidth, dout.metaBlkHeight,
276                                                       dout.metaBlkDepth, dout.pitch, dout.height,
277                                                       in.x, in.y, in.slice, 0, in.pipeXor, NULL));
278                   }
279                } else {
280                   addr = gfx10_dcc_addr_from_coord(info, dout.equation.gfx10_bits,
281                                                    in.bpp, dout.metaBlkWidth, dout.metaBlkHeight,
282                                                    dout.pitch, dout.dccRamSliceSize,
283                                                    in.x, in.y, in.slice, in.pipeXor);
284                }
285 
286                if (out.addr != addr) {
287                   printf("%s fail (%s) at %ux%ux%u@%u: expected = %llu, got = %u\n",
288                          name, test, in.x, in.y, in.slice, in.sample, out.addr, addr);
289                   return false;
290                }
291             }
292          }
293       }
294    }
295    return true;
296 }
297 
run_dcc_address_test(const char * name,const struct radeon_info * info,bool full)298 static void run_dcc_address_test(const char *name, const struct radeon_info *info, bool full)
299 {
300    unsigned total = 0;
301    unsigned fails = 0;
302    unsigned last_size, max_samples, min_bpp, max_bpp;
303    unsigned swizzle_modes[2], num_swizzle_modes = 0;
304 
305    switch (info->gfx_level) {
306    case GFX9:
307       swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_S_X;
308       break;
309    case GFX10:
310    case GFX10_3:
311       swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_R_X;
312       break;
313    case GFX11:
314       swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_R_X;
315       swizzle_modes[num_swizzle_modes++] = ADDR_SW_256KB_R_X;
316       break;
317    default:
318       unreachable("unhandled gfx level");
319    }
320 
321    if (full) {
322       last_size = 6*6 - 1;
323       max_samples = 8;
324       min_bpp = 8;
325       max_bpp = 128;
326    } else {
327       /* The test coverage is reduced for Gitlab CI because it timeouts. */
328       last_size = 0;
329       max_samples = 2;
330       min_bpp = 32;
331       max_bpp = 64;
332    }
333 
334 #ifdef HAVE_OPENMP
335 #pragma omp parallel for
336 #endif
337    for (unsigned size = 0; size <= last_size; size++) {
338       unsigned width = 8 + 379 * (size % 6);
339       unsigned height = 8 + 379 * ((size / 6) % 6);
340 
341       struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);
342       ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);
343 
344       unsigned local_fails = 0;
345       unsigned local_total = 0;
346 
347       for (unsigned swizzle_mode = 0; swizzle_mode < num_swizzle_modes; swizzle_mode++) {
348          for (unsigned bpp = min_bpp; bpp <= max_bpp; bpp *= 2) {
349             /* addrlib can do DccAddrFromCoord with MSAA images only on gfx9 */
350             for (unsigned samples = 1; samples <= (info->gfx_level == GFX9 ? max_samples : 1); samples *= 2) {
351                for (int rb_aligned = true; rb_aligned >= (samples > 1 ? true : false); rb_aligned--) {
352                   for (int pipe_aligned = true; pipe_aligned >= (samples > 1 ? true : false); pipe_aligned--) {
353                      for (unsigned mrt_index = 0; mrt_index < 2; mrt_index++) {
354                         unsigned depth = 2;
355                         char test[256];
356 
357                         snprintf(test, sizeof(test), "%ux%ux%u %ubpp %u samples rb:%u pipe:%u",
358                                  width, height, depth, bpp, samples, rb_aligned, pipe_aligned);
359 
360                         if (one_dcc_address_test(name, test, addrlib, info, width, height, depth, samples,
361                                                  bpp, swizzle_modes[swizzle_mode], pipe_aligned,
362                                                  rb_aligned, mrt_index, 0, 0, 0, 0)) {
363                         } else {
364                            local_fails++;
365                         }
366                         local_total++;
367                      }
368                   }
369                }
370             }
371          }
372       }
373 
374       ac_addrlib_destroy(ac_addrlib);
375       p_atomic_add(&fails, local_fails);
376       p_atomic_add(&total, local_total);
377    }
378    printf("%16s total: %u, fail: %u\n", name, total, fails);
379 }
380 
381 /* HTILE address computation without mipmapping. */
gfx10_htile_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned meta_block_width,unsigned meta_block_height,unsigned htile_pitch,unsigned htile_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor)382 static unsigned gfx10_htile_addr_from_coord(const struct radeon_info *info,
383                                             const uint16_t *equation,
384                                             unsigned meta_block_width,
385                                             unsigned meta_block_height,
386                                             unsigned htile_pitch, unsigned htile_slice_size,
387                                             unsigned x, unsigned y, unsigned z,
388                                             unsigned pipe_xor)
389 {
390    unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
391    unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
392    unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 - 4;
393 
394    return gfx10_meta_addr_from_coord(info, equation,
395                                      meta_block_width, meta_block_height,
396                                      blkSizeLog2,
397                                      htile_pitch, htile_slice_size,
398                                      x, y, z, pipe_xor, NULL);
399 }
400 
one_htile_address_test(const char * name,const char * test,ADDR_HANDLE addrlib,const struct radeon_info * info,unsigned width,unsigned height,unsigned depth,unsigned bpp,unsigned swizzle_mode,unsigned start_x,unsigned start_y,unsigned start_z)401 static bool one_htile_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,
402                                    const struct radeon_info *info,
403                                    unsigned width, unsigned height, unsigned depth,
404                                    unsigned bpp, unsigned swizzle_mode,
405                                    unsigned start_x, unsigned start_y, unsigned start_z)
406 {
407    ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
408    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
409    ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};
410    ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};
411    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT in = {0};
412    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT out = {0};
413    ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
414 
415    hout.pMipInfo = meta_mip_info;
416 
417    /* Compute HTILE info. */
418    hin.hTileFlags.pipeAligned = 1;
419    hin.hTileFlags.rbAligned = 1;
420    hin.depthFlags.depth = 1;
421    hin.depthFlags.texture = 1;
422    hin.depthFlags.opt4space = 1;
423    hin.swizzleMode = in.swizzleMode = xin.swizzleMode = swizzle_mode;
424    hin.unalignedWidth = in.unalignedWidth = width;
425    hin.unalignedHeight = in.unalignedHeight = height;
426    hin.numSlices = in.numSlices = depth;
427    hin.numMipLevels = in.numMipLevels = 1; /* addrlib can't do HtileAddrFromCoord with mipmapping. */
428    hin.firstMipIdInTail = 1;
429 
430    int ret = Addr2ComputeHtileInfo(addrlib, &hin, &hout);
431    assert(ret == ADDR_OK);
432 
433    /* Compute xor. */
434    static AddrFormat format[] = {
435       ADDR_FMT_8, /* unused */
436       ADDR_FMT_16,
437       ADDR_FMT_32,
438    };
439    xin.flags = hin.depthFlags;
440    xin.resourceType = ADDR_RSRC_TEX_2D;
441    xin.format = format[util_logbase2(bpp / 8)];
442    xin.numFrags = xin.numSamples = in.numSamples = 1;
443 
444    ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);
445    assert(ret == ADDR_OK);
446 
447    in.hTileFlags = hin.hTileFlags;
448    in.depthflags = xin.flags;
449    in.bpp = bpp;
450    in.pipeXor = xout.pipeBankXor;
451 
452    for (in.x = start_x; in.x < width; in.x++) {
453       for (in.y = start_y; in.y < height; in.y++) {
454          for (in.slice = start_z; in.slice < depth; in.slice++) {
455             int r = Addr2ComputeHtileAddrFromCoord(addrlib, &in, &out);
456             if (r != ADDR_OK) {
457                printf("%s addrlib error: %s\n", name, test);
458                abort();
459             }
460 
461             unsigned addr =
462                gfx10_htile_addr_from_coord(info, hout.equation.gfx10_bits,
463                                            hout.metaBlkWidth, hout.metaBlkHeight,
464                                            hout.pitch, hout.sliceSize,
465                                            in.x, in.y, in.slice, in.pipeXor);
466             if (out.addr != addr) {
467                printf("%s fail (%s) at %ux%ux%u: expected = %llu, got = %u\n",
468                       name, test, in.x, in.y, in.slice, out.addr, addr);
469                return false;
470             }
471          }
472       }
473    }
474 
475    return true;
476 }
477 
run_htile_address_test(const char * name,const struct radeon_info * info,bool full)478 static void run_htile_address_test(const char *name, const struct radeon_info *info, bool full)
479 {
480    unsigned total = 0;
481    unsigned fails = 0;
482    unsigned first_size = 0, last_size = 6*6 - 1;
483    unsigned swizzle_modes[2], num_swizzle_modes = 0;
484 
485    switch (info->gfx_level) {
486    case GFX9:
487    case GFX10:
488    case GFX10_3:
489       swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_Z_X;
490       break;
491    case GFX11:
492       swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_Z_X;
493       swizzle_modes[num_swizzle_modes++] = ADDR_SW_256KB_Z_X;
494       break;
495    default:
496       unreachable("unhandled gfx level");
497    }
498 
499    /* The test coverage is reduced for Gitlab CI because it timeouts. */
500    if (!full) {
501       first_size = last_size = 0;
502    }
503 
504 #ifdef HAVE_OPENMP
505 #pragma omp parallel for
506 #endif
507    for (unsigned size = first_size; size <= last_size; size++) {
508       unsigned width = 8 + 379 * (size % 6);
509       unsigned height = 8 + 379 * (size / 6);
510 
511       struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);
512       ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);
513 
514       for (unsigned swizzle_mode = 0; swizzle_mode < num_swizzle_modes; swizzle_mode++) {
515          for (unsigned depth = 1; depth <= 2; depth *= 2) {
516             for (unsigned bpp = 16; bpp <= 32; bpp *= 2) {
517                if (one_htile_address_test(name, name, addrlib, info, width, height, depth,
518                                           bpp, swizzle_modes[swizzle_mode], 0, 0, 0)) {
519                } else {
520                   p_atomic_inc(&fails);
521                }
522                p_atomic_inc(&total);
523             }
524          }
525       }
526 
527       ac_addrlib_destroy(ac_addrlib);
528    }
529    printf("%16s total: %u, fail: %u\n", name, total, fails);
530 }
531 
532 /* CMASK address computation without mipmapping and MSAA. */
gfx10_cmask_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned bpp,unsigned meta_block_width,unsigned meta_block_height,unsigned cmask_pitch,unsigned cmask_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor,unsigned * bit_position)533 static unsigned gfx10_cmask_addr_from_coord(const struct radeon_info *info,
534                                             /* Shader key inputs: */
535                                             /* equation varies with bpp and pipe_aligned */
536                                             const uint16_t *equation, unsigned bpp,
537                                             unsigned meta_block_width, unsigned meta_block_height,
538                                             /* Shader inputs: */
539                                             unsigned cmask_pitch, unsigned cmask_slice_size,
540                                             unsigned x, unsigned y, unsigned z,
541                                             unsigned pipe_xor,
542                                             /* Shader outputs: */
543                                             unsigned *bit_position)
544 
545 {
546    unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
547    unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
548    unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 - 7;
549 
550    return gfx10_meta_addr_from_coord(info, equation,
551                                      meta_block_width, meta_block_height,
552                                      blkSizeLog2,
553                                      cmask_pitch, cmask_slice_size,
554                                      x, y, z, pipe_xor, bit_position);
555 }
556 
one_cmask_address_test(const char * name,const char * test,ADDR_HANDLE addrlib,const struct radeon_info * info,unsigned width,unsigned height,unsigned depth,unsigned bpp,unsigned swizzle_mode,bool pipe_aligned,bool rb_aligned,unsigned mrt_index,unsigned start_x,unsigned start_y,unsigned start_z)557 static bool one_cmask_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,
558                                    const struct radeon_info *info,
559                                    unsigned width, unsigned height, unsigned depth,
560                                    unsigned bpp, unsigned swizzle_mode,
561                                    bool pipe_aligned, bool rb_aligned, unsigned mrt_index,
562                                    unsigned start_x, unsigned start_y, unsigned start_z)
563 {
564    ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {sizeof(xin)};
565    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {sizeof(xout)};
566    ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {sizeof(cin)};
567    ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {sizeof(cout)};
568    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT in = {sizeof(in)};
569    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT out = {sizeof(out)};
570 
571    /* Compute CMASK info. */
572    cin.resourceType = xin.resourceType = in.resourceType = ADDR_RSRC_TEX_2D;
573    cin.swizzleMode = xin.swizzleMode = in.swizzleMode = swizzle_mode;
574    cin.unalignedWidth = in.unalignedWidth = width;
575    cin.unalignedHeight = in.unalignedHeight = height;
576    cin.numSlices = in.numSlices = depth;
577    cin.numMipLevels = 1;
578    cin.firstMipIdInTail = 1;
579    cin.cMaskFlags.pipeAligned = pipe_aligned;
580    cin.cMaskFlags.rbAligned = rb_aligned;
581    cin.cMaskFlags.linear = false;
582    cin.colorFlags.color = 1;
583    cin.colorFlags.texture = 1;
584    cin.colorFlags.opt4space = 1;
585    cin.colorFlags.metaRbUnaligned = !rb_aligned;
586    cin.colorFlags.metaPipeUnaligned = !pipe_aligned;
587 
588    int ret = Addr2ComputeCmaskInfo(addrlib, &cin, &cout);
589    assert(ret == ADDR_OK);
590 
591    /* Compute xor. */
592    static AddrFormat format[] = {
593       ADDR_FMT_8,
594       ADDR_FMT_16,
595       ADDR_FMT_32,
596       ADDR_FMT_32_32,
597       ADDR_FMT_32_32_32_32,
598    };
599    xin.flags = cin.colorFlags;
600    xin.format = format[util_logbase2(bpp / 8)];
601    xin.surfIndex = mrt_index;
602    xin.numSamples = in.numSamples = xin.numFrags = in.numFrags = 1;
603 
604    ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);
605    assert(ret == ADDR_OK);
606 
607    in.cMaskFlags = cin.cMaskFlags;
608    in.colorFlags = cin.colorFlags;
609    in.pipeXor = xout.pipeBankXor;
610 
611    for (in.x = start_x; in.x < width; in.x++) {
612       for (in.y = start_y; in.y < height; in.y++) {
613          for (in.slice = start_z; in.slice < depth; in.slice++) {
614             int r = Addr2ComputeCmaskAddrFromCoord(addrlib, &in, &out);
615             if (r != ADDR_OK) {
616                printf("%s addrlib error: %s\n", name, test);
617                abort();
618             }
619 
620             unsigned addr, bit_position;
621 
622             if (info->gfx_level == GFX9) {
623                addr = gfx9_meta_addr_from_coord(info, &cout.equation.gfx9,
624                                                 cout.metaBlkWidth, cout.metaBlkHeight, 1,
625                                                 cout.pitch, cout.height,
626                                                 in.x, in.y, in.slice, 0, in.pipeXor,
627                                                 &bit_position);
628             } else {
629                addr = gfx10_cmask_addr_from_coord(info, cout.equation.gfx10_bits,
630                                                   bpp, cout.metaBlkWidth,
631                                                   cout.metaBlkHeight,
632                                                   cout.pitch, cout.sliceSize,
633                                                   in.x, in.y, in.slice,
634                                                   in.pipeXor,
635                                                   &bit_position);
636             }
637 
638             if (out.addr != addr || out.bitPosition != bit_position) {
639                printf("%s fail (%s) at %ux%ux%u: expected (addr) = %llu, got = %u, "
640                       "expected (bit_position) = %u, got = %u\n",
641                       name, test, in.x, in.y, in.slice, out.addr, addr,
642                       out.bitPosition, bit_position);
643                return false;
644             }
645          }
646       }
647    }
648 
649    return true;
650 }
651 
run_cmask_address_test(const char * name,const struct radeon_info * info,bool full)652 static void run_cmask_address_test(const char *name, const struct radeon_info *info, bool full)
653 {
654    unsigned total = 0;
655    unsigned fails = 0;
656    unsigned swizzle_mode = info->gfx_level == GFX9 ? ADDR_SW_64KB_S_X : ADDR_SW_64KB_Z_X;
657    unsigned first_size = 0, last_size = 6*6 - 1, max_bpp = 32;
658 
659    /* GFX11 doesn't have CMASK. */
660    if (info->gfx_level >= GFX11)
661       return;
662 
663    /* The test coverage is reduced for Gitlab CI because it timeouts. */
664    if (!full) {
665       first_size = last_size = 0;
666    }
667 
668 #ifdef HAVE_OPENMP
669 #pragma omp parallel for
670 #endif
671    for (unsigned size = first_size; size <= last_size; size++) {
672       unsigned width = 8 + 379 * (size % 6);
673       unsigned height = 8 + 379 * (size / 6);
674 
675       struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);
676       ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);
677 
678       for (unsigned depth = 1; depth <= 2; depth *= 2) {
679          for (unsigned bpp = 16; bpp <= max_bpp; bpp *= 2) {
680             for (int rb_aligned = true; rb_aligned >= true; rb_aligned--) {
681                for (int pipe_aligned = true; pipe_aligned >= true; pipe_aligned--) {
682                   if (one_cmask_address_test(name, name, addrlib, info,
683                                              width, height, depth, bpp,
684                                              swizzle_mode,
685                                              pipe_aligned, rb_aligned,
686                                              0, 0, 0, 0)) {
687                   } else {
688                      p_atomic_inc(&fails);
689                   }
690                   p_atomic_inc(&total);
691                }
692             }
693          }
694       }
695 
696       ac_addrlib_destroy(ac_addrlib);
697    }
698    printf("%16s total: %u, fail: %u\n", name, total, fails);
699 }
700 
main(int argc,char ** argv)701 int main(int argc, char **argv)
702 {
703    bool full = false;
704 
705    if (argc == 2 && !strcmp(argv[1], "--full"))
706       full = true;
707    else
708       puts("Specify --full to run the full test.");
709 
710    puts("DCC:");
711    for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {
712       struct radeon_info info = get_radeon_info(&testcases[i]);
713 
714       if (info.gfx_level >= GFX12)
715          continue;
716 
717       run_dcc_address_test(testcases[i].name, &info, full);
718    }
719 
720    puts("HTILE:");
721    for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {
722       struct radeon_info info = get_radeon_info(&testcases[i]);
723 
724       /* Only GFX10+ is currently supported. GFX12 doesn't have HTILE. */
725       if (info.gfx_level < GFX10 || info.gfx_level >= GFX12)
726          continue;
727 
728       run_htile_address_test(testcases[i].name, &info, full);
729    }
730 
731    puts("CMASK:");
732    for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {
733       struct radeon_info info = get_radeon_info(&testcases[i]);
734 
735       if (info.gfx_level >= GFX11)
736          continue;
737 
738       run_cmask_address_test(testcases[i].name, &info, full);
739    }
740 
741    return 0;
742 }
743