1 /*
2 * Copyright © 2021 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * SPDX-License-Identifier: MIT
6 */
7
8 /* Make the test not meaningless when asserts are disabled. */
9 #undef NDEBUG
10
11 #include <assert.h>
12 #include <inttypes.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15
16 #include <amdgpu.h>
17 #include "drm-uapi/amdgpu_drm.h"
18 #include "drm-uapi/drm_fourcc.h"
19
20 #include "ac_surface.h"
21 #include "util/macros.h"
22 #include "util/u_atomic.h"
23 #include "util/u_math.h"
24 #include "util/u_vector.h"
25 #include "util/mesa-sha1.h"
26 #include "addrlib/inc/addrinterface.h"
27
28 #include "ac_surface_test_common.h"
29
30 /*
31 * The main goal of this test is to validate that our dcc/htile addressing
32 * functions match addrlib behavior.
33 */
34
35 /* DCC address computation without mipmapping.
36 * CMASK address computation without mipmapping and without multisampling.
37 */
gfx9_meta_addr_from_coord(const struct radeon_info * info,const struct gfx9_addr_meta_equation * eq,unsigned meta_block_width,unsigned meta_block_height,unsigned meta_block_depth,unsigned meta_pitch,unsigned meta_height,unsigned x,unsigned y,unsigned z,unsigned sample,unsigned pipe_xor,unsigned * bit_position)38 static unsigned gfx9_meta_addr_from_coord(const struct radeon_info *info,
39 /* Shader key inputs: */
40 /* equation varies with resource_type, swizzle_mode,
41 * bpp, number of fragments, pipe_aligned, rb_aligned */
42 const struct gfx9_addr_meta_equation *eq,
43 unsigned meta_block_width, unsigned meta_block_height,
44 unsigned meta_block_depth,
45 /* Shader inputs: */
46 unsigned meta_pitch, unsigned meta_height,
47 unsigned x, unsigned y, unsigned z,
48 unsigned sample, unsigned pipe_xor,
49 /* Shader outputs (CMASK only): */
50 unsigned *bit_position)
51 {
52 /* The compiled shader shouldn't be complicated considering there are a lot of constants here. */
53 unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
54 unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
55 unsigned meta_block_depth_log2 = util_logbase2(meta_block_depth);
56
57 unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
58 unsigned numPipeBits = eq->numPipeBits;
59 unsigned pitchInBlock = meta_pitch >> meta_block_width_log2;
60 unsigned sliceSizeInBlock = (meta_height >> meta_block_height_log2) * pitchInBlock;
61
62 unsigned xb = x >> meta_block_width_log2;
63 unsigned yb = y >> meta_block_height_log2;
64 unsigned zb = z >> meta_block_depth_log2;
65
66 unsigned blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
67 unsigned coords[] = {x, y, z, sample, blockIndex};
68
69 unsigned address = 0;
70 unsigned num_bits = eq->num_bits;
71 assert(num_bits <= 32);
72
73 /* Compute the address up until the last bit that doesn't use the block index. */
74 for (unsigned b = 0; b < num_bits - 1; b++) {
75 unsigned xor = 0;
76 for (unsigned c = 0; c < 5; c++) {
77 if (eq->bit[b].coord[c].dim >= 5)
78 continue;
79
80 assert(eq->bit[b].coord[c].ord < 32);
81 unsigned ison = (coords[eq->bit[b].coord[c].dim] >>
82 eq->bit[b].coord[c].ord) & 0x1;
83
84 xor ^= ison;
85 }
86 address |= xor << b;
87 }
88
89 /* Fill the remaining bits with the block index. */
90 unsigned last = num_bits - 1;
91 address |= (blockIndex >> eq->bit[last].coord[0].ord) << last;
92
93 if (bit_position)
94 *bit_position = (address & 1) << 2;
95
96 unsigned pipeXor = pipe_xor & ((1 << numPipeBits) - 1);
97 return (address >> 1) ^ (pipeXor << m_pipeInterleaveLog2);
98 }
99
100 /* DCC/CMASK/HTILE address computation for GFX10. */
gfx10_meta_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned meta_block_width,unsigned meta_block_height,unsigned blkSizeLog2,unsigned meta_pitch,unsigned meta_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor,unsigned * bit_position)101 static unsigned gfx10_meta_addr_from_coord(const struct radeon_info *info,
102 /* Shader key inputs: */
103 const uint16_t *equation,
104 unsigned meta_block_width, unsigned meta_block_height,
105 unsigned blkSizeLog2,
106 /* Shader inputs: */
107 unsigned meta_pitch, unsigned meta_slice_size,
108 unsigned x, unsigned y, unsigned z,
109 unsigned pipe_xor,
110 /* Shader outputs: (CMASK only) */
111 unsigned *bit_position)
112 {
113 /* The compiled shader shouldn't be complicated considering there are a lot of constants here. */
114 unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
115 unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
116
117 unsigned coord[] = {x, y, z, 0};
118 unsigned address = 0;
119
120 for (unsigned i = 0; i < blkSizeLog2 + 1; i++) {
121 unsigned v = 0;
122
123 for (unsigned c = 0; c < 4; c++) {
124 if (equation[i*4+c] != 0) {
125 unsigned mask = equation[i*4+c];
126 unsigned bits = coord[c];
127
128 while (mask)
129 v ^= (bits >> u_bit_scan(&mask)) & 0x1;
130 }
131 }
132
133 address |= v << i;
134 }
135
136 unsigned blkMask = (1 << blkSizeLog2) - 1;
137 unsigned pipeMask = (1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) - 1;
138 unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
139 unsigned xb = x >> meta_block_width_log2;
140 unsigned yb = y >> meta_block_height_log2;
141 unsigned pb = meta_pitch >> meta_block_width_log2;
142 unsigned blkIndex = (yb * pb) + xb;
143 unsigned pipeXor = ((pipe_xor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
144
145 if (bit_position)
146 *bit_position = (address & 1) << 2;
147
148 return (meta_slice_size * z) +
149 (blkIndex * (1 << blkSizeLog2)) +
150 ((address >> 1) ^ pipeXor);
151 }
152
153 /* DCC address computation without mipmapping and MSAA. */
gfx10_dcc_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned bpp,unsigned meta_block_width,unsigned meta_block_height,unsigned dcc_pitch,unsigned dcc_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor)154 static unsigned gfx10_dcc_addr_from_coord(const struct radeon_info *info,
155 /* Shader key inputs: */
156 /* equation varies with bpp and pipe_aligned */
157 const uint16_t *equation, unsigned bpp,
158 unsigned meta_block_width, unsigned meta_block_height,
159 /* Shader inputs: */
160 unsigned dcc_pitch, unsigned dcc_slice_size,
161 unsigned x, unsigned y, unsigned z,
162 unsigned pipe_xor)
163 {
164 unsigned bpp_log2 = util_logbase2(bpp >> 3);
165 unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
166 unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
167 unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 + bpp_log2 - 8;
168
169 return gfx10_meta_addr_from_coord(info, equation,
170 meta_block_width, meta_block_height,
171 blkSizeLog2,
172 dcc_pitch, dcc_slice_size,
173 x, y, z, pipe_xor, NULL);
174 }
175
one_dcc_address_test(const char * name,const char * test,ADDR_HANDLE addrlib,const struct radeon_info * info,unsigned width,unsigned height,unsigned depth,unsigned samples,unsigned bpp,unsigned swizzle_mode,bool pipe_aligned,bool rb_aligned,unsigned mrt_index,unsigned start_x,unsigned start_y,unsigned start_z,unsigned start_sample)176 static bool one_dcc_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,
177 const struct radeon_info *info, unsigned width, unsigned height,
178 unsigned depth, unsigned samples, unsigned bpp,
179 unsigned swizzle_mode, bool pipe_aligned, bool rb_aligned,
180 unsigned mrt_index,
181 unsigned start_x, unsigned start_y, unsigned start_z,
182 unsigned start_sample)
183 {
184 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT)};
185 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT)};
186 ADDR2_COMPUTE_DCCINFO_INPUT din = {sizeof(din)};
187 ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {sizeof(dout)};
188 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT in = {sizeof(in)};
189 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT out = {sizeof(out)};
190 ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
191
192 dout.pMipInfo = meta_mip_info;
193
194 /* Compute DCC info. */
195 in.dccKeyFlags.pipeAligned = din.dccKeyFlags.pipeAligned = pipe_aligned;
196 in.dccKeyFlags.rbAligned = din.dccKeyFlags.rbAligned = rb_aligned;
197 xin.resourceType = in.resourceType = din.resourceType = ADDR_RSRC_TEX_2D;
198 xin.swizzleMode = in.swizzleMode = din.swizzleMode = swizzle_mode;
199 in.bpp = din.bpp = bpp;
200 xin.numFrags = xin.numSamples = in.numFrags = din.numFrags = samples;
201 in.numMipLevels = din.numMipLevels = 1; /* addrlib can't do DccAddrFromCoord with mipmapping */
202 din.unalignedWidth = width;
203 din.unalignedHeight = height;
204 din.numSlices = depth;
205 din.firstMipIdInTail = 1;
206
207 int ret = Addr2ComputeDccInfo(addrlib, &din, &dout);
208 assert(ret == ADDR_OK);
209
210 /* Compute xor. */
211 static AddrFormat format[] = {
212 ADDR_FMT_8,
213 ADDR_FMT_16,
214 ADDR_FMT_32,
215 ADDR_FMT_32_32,
216 ADDR_FMT_32_32_32_32,
217 };
218 xin.flags.color = 1;
219 xin.flags.texture = 1;
220 xin.flags.opt4space = 1;
221 xin.flags.metaRbUnaligned = !rb_aligned;
222 xin.flags.metaPipeUnaligned = !pipe_aligned;
223 xin.format = format[util_logbase2(bpp / 8)];
224 xin.surfIndex = mrt_index;
225
226 ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);
227 assert(ret == ADDR_OK);
228
229 /* Compute addresses */
230 in.compressBlkWidth = dout.compressBlkWidth;
231 in.compressBlkHeight = dout.compressBlkHeight;
232 in.compressBlkDepth = dout.compressBlkDepth;
233 in.metaBlkWidth = dout.metaBlkWidth;
234 in.metaBlkHeight = dout.metaBlkHeight;
235 in.metaBlkDepth = dout.metaBlkDepth;
236 in.dccRamSliceSize = dout.dccRamSliceSize;
237
238 in.mipId = 0;
239 in.pitch = dout.pitch;
240 in.height = dout.height;
241 in.pipeXor = xout.pipeBankXor;
242
243 /* Validate that the packed gfx9_meta_equation structure can fit all fields. */
244 const struct gfx9_meta_equation eq;
245 if (info->gfx_level == GFX9) {
246 /* The bit array is smaller in gfx9_meta_equation than in addrlib. */
247 assert(dout.equation.gfx9.num_bits <= ARRAY_SIZE(eq.u.gfx9.bit));
248 } else {
249 /* gfx9_meta_equation doesn't store the first 4 and the last 8 elements. They must be 0. */
250 for (unsigned i = 0; i < 4; i++)
251 assert(dout.equation.gfx10_bits[i] == 0);
252
253 for (unsigned i = ARRAY_SIZE(eq.u.gfx10_bits) + 4; i < 68; i++)
254 assert(dout.equation.gfx10_bits[i] == 0);
255 }
256
257 for (in.x = start_x; in.x < in.pitch; in.x += dout.compressBlkWidth) {
258 for (in.y = start_y; in.y < in.height; in.y += dout.compressBlkHeight) {
259 for (in.slice = start_z; in.slice < depth; in.slice += dout.compressBlkDepth) {
260 for (in.sample = start_sample; in.sample < samples; in.sample++) {
261 int r = Addr2ComputeDccAddrFromCoord(addrlib, &in, &out);
262 if (r != ADDR_OK) {
263 printf("%s addrlib error: %s\n", name, test);
264 abort();
265 }
266
267 unsigned addr;
268 if (info->gfx_level == GFX9) {
269 addr = gfx9_meta_addr_from_coord(info, &dout.equation.gfx9, dout.metaBlkWidth, dout.metaBlkHeight,
270 dout.metaBlkDepth, dout.pitch, dout.height,
271 in.x, in.y, in.slice, in.sample, in.pipeXor, NULL);
272 if (in.sample == 1) {
273 /* Sample 0 should be one byte before sample 1. The DCC MSAA clear relies on it. */
274 assert(addr - 1 ==
275 gfx9_meta_addr_from_coord(info, &dout.equation.gfx9, dout.metaBlkWidth, dout.metaBlkHeight,
276 dout.metaBlkDepth, dout.pitch, dout.height,
277 in.x, in.y, in.slice, 0, in.pipeXor, NULL));
278 }
279 } else {
280 addr = gfx10_dcc_addr_from_coord(info, dout.equation.gfx10_bits,
281 in.bpp, dout.metaBlkWidth, dout.metaBlkHeight,
282 dout.pitch, dout.dccRamSliceSize,
283 in.x, in.y, in.slice, in.pipeXor);
284 }
285
286 if (out.addr != addr) {
287 printf("%s fail (%s) at %ux%ux%u@%u: expected = %llu, got = %u\n",
288 name, test, in.x, in.y, in.slice, in.sample, out.addr, addr);
289 return false;
290 }
291 }
292 }
293 }
294 }
295 return true;
296 }
297
run_dcc_address_test(const char * name,const struct radeon_info * info,bool full)298 static void run_dcc_address_test(const char *name, const struct radeon_info *info, bool full)
299 {
300 unsigned total = 0;
301 unsigned fails = 0;
302 unsigned last_size, max_samples, min_bpp, max_bpp;
303 unsigned swizzle_modes[2], num_swizzle_modes = 0;
304
305 switch (info->gfx_level) {
306 case GFX9:
307 swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_S_X;
308 break;
309 case GFX10:
310 case GFX10_3:
311 swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_R_X;
312 break;
313 case GFX11:
314 swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_R_X;
315 swizzle_modes[num_swizzle_modes++] = ADDR_SW_256KB_R_X;
316 break;
317 default:
318 unreachable("unhandled gfx level");
319 }
320
321 if (full) {
322 last_size = 6*6 - 1;
323 max_samples = 8;
324 min_bpp = 8;
325 max_bpp = 128;
326 } else {
327 /* The test coverage is reduced for Gitlab CI because it timeouts. */
328 last_size = 0;
329 max_samples = 2;
330 min_bpp = 32;
331 max_bpp = 64;
332 }
333
334 #ifdef HAVE_OPENMP
335 #pragma omp parallel for
336 #endif
337 for (unsigned size = 0; size <= last_size; size++) {
338 unsigned width = 8 + 379 * (size % 6);
339 unsigned height = 8 + 379 * ((size / 6) % 6);
340
341 struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);
342 ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);
343
344 unsigned local_fails = 0;
345 unsigned local_total = 0;
346
347 for (unsigned swizzle_mode = 0; swizzle_mode < num_swizzle_modes; swizzle_mode++) {
348 for (unsigned bpp = min_bpp; bpp <= max_bpp; bpp *= 2) {
349 /* addrlib can do DccAddrFromCoord with MSAA images only on gfx9 */
350 for (unsigned samples = 1; samples <= (info->gfx_level == GFX9 ? max_samples : 1); samples *= 2) {
351 for (int rb_aligned = true; rb_aligned >= (samples > 1 ? true : false); rb_aligned--) {
352 for (int pipe_aligned = true; pipe_aligned >= (samples > 1 ? true : false); pipe_aligned--) {
353 for (unsigned mrt_index = 0; mrt_index < 2; mrt_index++) {
354 unsigned depth = 2;
355 char test[256];
356
357 snprintf(test, sizeof(test), "%ux%ux%u %ubpp %u samples rb:%u pipe:%u",
358 width, height, depth, bpp, samples, rb_aligned, pipe_aligned);
359
360 if (one_dcc_address_test(name, test, addrlib, info, width, height, depth, samples,
361 bpp, swizzle_modes[swizzle_mode], pipe_aligned,
362 rb_aligned, mrt_index, 0, 0, 0, 0)) {
363 } else {
364 local_fails++;
365 }
366 local_total++;
367 }
368 }
369 }
370 }
371 }
372 }
373
374 ac_addrlib_destroy(ac_addrlib);
375 p_atomic_add(&fails, local_fails);
376 p_atomic_add(&total, local_total);
377 }
378 printf("%16s total: %u, fail: %u\n", name, total, fails);
379 }
380
381 /* HTILE address computation without mipmapping. */
gfx10_htile_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned meta_block_width,unsigned meta_block_height,unsigned htile_pitch,unsigned htile_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor)382 static unsigned gfx10_htile_addr_from_coord(const struct radeon_info *info,
383 const uint16_t *equation,
384 unsigned meta_block_width,
385 unsigned meta_block_height,
386 unsigned htile_pitch, unsigned htile_slice_size,
387 unsigned x, unsigned y, unsigned z,
388 unsigned pipe_xor)
389 {
390 unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
391 unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
392 unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 - 4;
393
394 return gfx10_meta_addr_from_coord(info, equation,
395 meta_block_width, meta_block_height,
396 blkSizeLog2,
397 htile_pitch, htile_slice_size,
398 x, y, z, pipe_xor, NULL);
399 }
400
one_htile_address_test(const char * name,const char * test,ADDR_HANDLE addrlib,const struct radeon_info * info,unsigned width,unsigned height,unsigned depth,unsigned bpp,unsigned swizzle_mode,unsigned start_x,unsigned start_y,unsigned start_z)401 static bool one_htile_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,
402 const struct radeon_info *info,
403 unsigned width, unsigned height, unsigned depth,
404 unsigned bpp, unsigned swizzle_mode,
405 unsigned start_x, unsigned start_y, unsigned start_z)
406 {
407 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
408 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
409 ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};
410 ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};
411 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT in = {0};
412 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT out = {0};
413 ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
414
415 hout.pMipInfo = meta_mip_info;
416
417 /* Compute HTILE info. */
418 hin.hTileFlags.pipeAligned = 1;
419 hin.hTileFlags.rbAligned = 1;
420 hin.depthFlags.depth = 1;
421 hin.depthFlags.texture = 1;
422 hin.depthFlags.opt4space = 1;
423 hin.swizzleMode = in.swizzleMode = xin.swizzleMode = swizzle_mode;
424 hin.unalignedWidth = in.unalignedWidth = width;
425 hin.unalignedHeight = in.unalignedHeight = height;
426 hin.numSlices = in.numSlices = depth;
427 hin.numMipLevels = in.numMipLevels = 1; /* addrlib can't do HtileAddrFromCoord with mipmapping. */
428 hin.firstMipIdInTail = 1;
429
430 int ret = Addr2ComputeHtileInfo(addrlib, &hin, &hout);
431 assert(ret == ADDR_OK);
432
433 /* Compute xor. */
434 static AddrFormat format[] = {
435 ADDR_FMT_8, /* unused */
436 ADDR_FMT_16,
437 ADDR_FMT_32,
438 };
439 xin.flags = hin.depthFlags;
440 xin.resourceType = ADDR_RSRC_TEX_2D;
441 xin.format = format[util_logbase2(bpp / 8)];
442 xin.numFrags = xin.numSamples = in.numSamples = 1;
443
444 ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);
445 assert(ret == ADDR_OK);
446
447 in.hTileFlags = hin.hTileFlags;
448 in.depthflags = xin.flags;
449 in.bpp = bpp;
450 in.pipeXor = xout.pipeBankXor;
451
452 for (in.x = start_x; in.x < width; in.x++) {
453 for (in.y = start_y; in.y < height; in.y++) {
454 for (in.slice = start_z; in.slice < depth; in.slice++) {
455 int r = Addr2ComputeHtileAddrFromCoord(addrlib, &in, &out);
456 if (r != ADDR_OK) {
457 printf("%s addrlib error: %s\n", name, test);
458 abort();
459 }
460
461 unsigned addr =
462 gfx10_htile_addr_from_coord(info, hout.equation.gfx10_bits,
463 hout.metaBlkWidth, hout.metaBlkHeight,
464 hout.pitch, hout.sliceSize,
465 in.x, in.y, in.slice, in.pipeXor);
466 if (out.addr != addr) {
467 printf("%s fail (%s) at %ux%ux%u: expected = %llu, got = %u\n",
468 name, test, in.x, in.y, in.slice, out.addr, addr);
469 return false;
470 }
471 }
472 }
473 }
474
475 return true;
476 }
477
run_htile_address_test(const char * name,const struct radeon_info * info,bool full)478 static void run_htile_address_test(const char *name, const struct radeon_info *info, bool full)
479 {
480 unsigned total = 0;
481 unsigned fails = 0;
482 unsigned first_size = 0, last_size = 6*6 - 1;
483 unsigned swizzle_modes[2], num_swizzle_modes = 0;
484
485 switch (info->gfx_level) {
486 case GFX9:
487 case GFX10:
488 case GFX10_3:
489 swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_Z_X;
490 break;
491 case GFX11:
492 swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_Z_X;
493 swizzle_modes[num_swizzle_modes++] = ADDR_SW_256KB_Z_X;
494 break;
495 default:
496 unreachable("unhandled gfx level");
497 }
498
499 /* The test coverage is reduced for Gitlab CI because it timeouts. */
500 if (!full) {
501 first_size = last_size = 0;
502 }
503
504 #ifdef HAVE_OPENMP
505 #pragma omp parallel for
506 #endif
507 for (unsigned size = first_size; size <= last_size; size++) {
508 unsigned width = 8 + 379 * (size % 6);
509 unsigned height = 8 + 379 * (size / 6);
510
511 struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);
512 ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);
513
514 for (unsigned swizzle_mode = 0; swizzle_mode < num_swizzle_modes; swizzle_mode++) {
515 for (unsigned depth = 1; depth <= 2; depth *= 2) {
516 for (unsigned bpp = 16; bpp <= 32; bpp *= 2) {
517 if (one_htile_address_test(name, name, addrlib, info, width, height, depth,
518 bpp, swizzle_modes[swizzle_mode], 0, 0, 0)) {
519 } else {
520 p_atomic_inc(&fails);
521 }
522 p_atomic_inc(&total);
523 }
524 }
525 }
526
527 ac_addrlib_destroy(ac_addrlib);
528 }
529 printf("%16s total: %u, fail: %u\n", name, total, fails);
530 }
531
532 /* CMASK address computation without mipmapping and MSAA. */
gfx10_cmask_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned bpp,unsigned meta_block_width,unsigned meta_block_height,unsigned cmask_pitch,unsigned cmask_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor,unsigned * bit_position)533 static unsigned gfx10_cmask_addr_from_coord(const struct radeon_info *info,
534 /* Shader key inputs: */
535 /* equation varies with bpp and pipe_aligned */
536 const uint16_t *equation, unsigned bpp,
537 unsigned meta_block_width, unsigned meta_block_height,
538 /* Shader inputs: */
539 unsigned cmask_pitch, unsigned cmask_slice_size,
540 unsigned x, unsigned y, unsigned z,
541 unsigned pipe_xor,
542 /* Shader outputs: */
543 unsigned *bit_position)
544
545 {
546 unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
547 unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
548 unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 - 7;
549
550 return gfx10_meta_addr_from_coord(info, equation,
551 meta_block_width, meta_block_height,
552 blkSizeLog2,
553 cmask_pitch, cmask_slice_size,
554 x, y, z, pipe_xor, bit_position);
555 }
556
one_cmask_address_test(const char * name,const char * test,ADDR_HANDLE addrlib,const struct radeon_info * info,unsigned width,unsigned height,unsigned depth,unsigned bpp,unsigned swizzle_mode,bool pipe_aligned,bool rb_aligned,unsigned mrt_index,unsigned start_x,unsigned start_y,unsigned start_z)557 static bool one_cmask_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,
558 const struct radeon_info *info,
559 unsigned width, unsigned height, unsigned depth,
560 unsigned bpp, unsigned swizzle_mode,
561 bool pipe_aligned, bool rb_aligned, unsigned mrt_index,
562 unsigned start_x, unsigned start_y, unsigned start_z)
563 {
564 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {sizeof(xin)};
565 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {sizeof(xout)};
566 ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {sizeof(cin)};
567 ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {sizeof(cout)};
568 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT in = {sizeof(in)};
569 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT out = {sizeof(out)};
570
571 /* Compute CMASK info. */
572 cin.resourceType = xin.resourceType = in.resourceType = ADDR_RSRC_TEX_2D;
573 cin.swizzleMode = xin.swizzleMode = in.swizzleMode = swizzle_mode;
574 cin.unalignedWidth = in.unalignedWidth = width;
575 cin.unalignedHeight = in.unalignedHeight = height;
576 cin.numSlices = in.numSlices = depth;
577 cin.numMipLevels = 1;
578 cin.firstMipIdInTail = 1;
579 cin.cMaskFlags.pipeAligned = pipe_aligned;
580 cin.cMaskFlags.rbAligned = rb_aligned;
581 cin.cMaskFlags.linear = false;
582 cin.colorFlags.color = 1;
583 cin.colorFlags.texture = 1;
584 cin.colorFlags.opt4space = 1;
585 cin.colorFlags.metaRbUnaligned = !rb_aligned;
586 cin.colorFlags.metaPipeUnaligned = !pipe_aligned;
587
588 int ret = Addr2ComputeCmaskInfo(addrlib, &cin, &cout);
589 assert(ret == ADDR_OK);
590
591 /* Compute xor. */
592 static AddrFormat format[] = {
593 ADDR_FMT_8,
594 ADDR_FMT_16,
595 ADDR_FMT_32,
596 ADDR_FMT_32_32,
597 ADDR_FMT_32_32_32_32,
598 };
599 xin.flags = cin.colorFlags;
600 xin.format = format[util_logbase2(bpp / 8)];
601 xin.surfIndex = mrt_index;
602 xin.numSamples = in.numSamples = xin.numFrags = in.numFrags = 1;
603
604 ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);
605 assert(ret == ADDR_OK);
606
607 in.cMaskFlags = cin.cMaskFlags;
608 in.colorFlags = cin.colorFlags;
609 in.pipeXor = xout.pipeBankXor;
610
611 for (in.x = start_x; in.x < width; in.x++) {
612 for (in.y = start_y; in.y < height; in.y++) {
613 for (in.slice = start_z; in.slice < depth; in.slice++) {
614 int r = Addr2ComputeCmaskAddrFromCoord(addrlib, &in, &out);
615 if (r != ADDR_OK) {
616 printf("%s addrlib error: %s\n", name, test);
617 abort();
618 }
619
620 unsigned addr, bit_position;
621
622 if (info->gfx_level == GFX9) {
623 addr = gfx9_meta_addr_from_coord(info, &cout.equation.gfx9,
624 cout.metaBlkWidth, cout.metaBlkHeight, 1,
625 cout.pitch, cout.height,
626 in.x, in.y, in.slice, 0, in.pipeXor,
627 &bit_position);
628 } else {
629 addr = gfx10_cmask_addr_from_coord(info, cout.equation.gfx10_bits,
630 bpp, cout.metaBlkWidth,
631 cout.metaBlkHeight,
632 cout.pitch, cout.sliceSize,
633 in.x, in.y, in.slice,
634 in.pipeXor,
635 &bit_position);
636 }
637
638 if (out.addr != addr || out.bitPosition != bit_position) {
639 printf("%s fail (%s) at %ux%ux%u: expected (addr) = %llu, got = %u, "
640 "expected (bit_position) = %u, got = %u\n",
641 name, test, in.x, in.y, in.slice, out.addr, addr,
642 out.bitPosition, bit_position);
643 return false;
644 }
645 }
646 }
647 }
648
649 return true;
650 }
651
run_cmask_address_test(const char * name,const struct radeon_info * info,bool full)652 static void run_cmask_address_test(const char *name, const struct radeon_info *info, bool full)
653 {
654 unsigned total = 0;
655 unsigned fails = 0;
656 unsigned swizzle_mode = info->gfx_level == GFX9 ? ADDR_SW_64KB_S_X : ADDR_SW_64KB_Z_X;
657 unsigned first_size = 0, last_size = 6*6 - 1, max_bpp = 32;
658
659 /* GFX11 doesn't have CMASK. */
660 if (info->gfx_level >= GFX11)
661 return;
662
663 /* The test coverage is reduced for Gitlab CI because it timeouts. */
664 if (!full) {
665 first_size = last_size = 0;
666 }
667
668 #ifdef HAVE_OPENMP
669 #pragma omp parallel for
670 #endif
671 for (unsigned size = first_size; size <= last_size; size++) {
672 unsigned width = 8 + 379 * (size % 6);
673 unsigned height = 8 + 379 * (size / 6);
674
675 struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);
676 ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);
677
678 for (unsigned depth = 1; depth <= 2; depth *= 2) {
679 for (unsigned bpp = 16; bpp <= max_bpp; bpp *= 2) {
680 for (int rb_aligned = true; rb_aligned >= true; rb_aligned--) {
681 for (int pipe_aligned = true; pipe_aligned >= true; pipe_aligned--) {
682 if (one_cmask_address_test(name, name, addrlib, info,
683 width, height, depth, bpp,
684 swizzle_mode,
685 pipe_aligned, rb_aligned,
686 0, 0, 0, 0)) {
687 } else {
688 p_atomic_inc(&fails);
689 }
690 p_atomic_inc(&total);
691 }
692 }
693 }
694 }
695
696 ac_addrlib_destroy(ac_addrlib);
697 }
698 printf("%16s total: %u, fail: %u\n", name, total, fails);
699 }
700
main(int argc,char ** argv)701 int main(int argc, char **argv)
702 {
703 bool full = false;
704
705 if (argc == 2 && !strcmp(argv[1], "--full"))
706 full = true;
707 else
708 puts("Specify --full to run the full test.");
709
710 puts("DCC:");
711 for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {
712 struct radeon_info info = get_radeon_info(&testcases[i]);
713
714 if (info.gfx_level >= GFX12)
715 continue;
716
717 run_dcc_address_test(testcases[i].name, &info, full);
718 }
719
720 puts("HTILE:");
721 for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {
722 struct radeon_info info = get_radeon_info(&testcases[i]);
723
724 /* Only GFX10+ is currently supported. GFX12 doesn't have HTILE. */
725 if (info.gfx_level < GFX10 || info.gfx_level >= GFX12)
726 continue;
727
728 run_htile_address_test(testcases[i].name, &info, full);
729 }
730
731 puts("CMASK:");
732 for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {
733 struct radeon_info info = get_radeon_info(&testcases[i]);
734
735 if (info.gfx_level >= GFX11)
736 continue;
737
738 run_cmask_address_test(testcases[i].name, &info, full);
739 }
740
741 return 0;
742 }
743