1 /*
2 ************************************************************************************************************************
3 *
4 * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
5 * SPDX-License-Identifier: MIT
6 *
7 ***********************************************************************************************************************/
8
9 /**
10 ************************************************************************************************************************
11 * @file gfx10addrlib.cpp
12 * @brief Contain the implementation for the Gfx10Lib class.
13 ************************************************************************************************************************
14 */
15
16 #include "gfx10addrlib.h"
17 #include "addrcommon.h"
18 #include "gfx10_gb_reg.h"
19
20 #include "amdgpu_asic_addr.h"
21
22 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
23 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
24
25 namespace Addr
26 {
27 /**
28 ************************************************************************************************************************
29 * Gfx10HwlInit
30 *
31 * @brief
32 * Creates an Gfx10Lib object.
33 *
34 * @return
35 * Returns an Gfx10Lib object pointer.
36 ************************************************************************************************************************
37 */
Gfx10HwlInit(const Client * pClient)38 Addr::Lib* Gfx10HwlInit(const Client* pClient)
39 {
40 return V2::Gfx10Lib::CreateObj(pClient);
41 }
42
43 namespace V2
44 {
45
46 ////////////////////////////////////////////////////////////////////////////////////////////////////
47 // Static Const Member
48 ////////////////////////////////////////////////////////////////////////////////////////////////////
49
50 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
51 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
52 {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR
53 {{0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_256B_S
54 {{0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_256B_D
55 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
56
57 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
58 {{0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_S
59 {{0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_D
60 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
61
62 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
63 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_S
64 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_D
65 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
66
67 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
68 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
69 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
70 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
71
72 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
73 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_S_T
74 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_D_T
75 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
76
77 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
78 {{0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_S_X
79 {{0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_D_X
80 {{0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_4KB_R_X
81
82 {{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_Z_X
83 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_S_X
84 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_D_X
85 {{0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_64KB_R_X
86
87 {{0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_VAR_Z_X
88 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
89 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
90 {{0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_VAR_R_X
91 {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR_GENERAL
92 };
93
94 const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
95
96 const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
97 const Dim3d Gfx10Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
98
99 /**
100 ************************************************************************************************************************
101 * Gfx10Lib::Gfx10Lib
102 *
103 * @brief
104 * Constructor
105 *
106 ************************************************************************************************************************
107 */
Gfx10Lib(const Client * pClient)108 Gfx10Lib::Gfx10Lib(const Client* pClient)
109 :
110 Lib(pClient),
111 m_numPkrLog2(0),
112 m_numSaLog2(0),
113 m_colorBaseIndex(0),
114 m_xmaskBaseIndex(0),
115 m_htileBaseIndex(0),
116 m_dccBaseIndex(0)
117 {
118 memset(&m_settings, 0, sizeof(m_settings));
119 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
120 }
121
122 /**
123 ************************************************************************************************************************
124 * Gfx10Lib::~Gfx10Lib
125 *
126 * @brief
127 * Destructor
128 ************************************************************************************************************************
129 */
~Gfx10Lib()130 Gfx10Lib::~Gfx10Lib()
131 {
132 }
133
134 /**
135 ************************************************************************************************************************
136 * Gfx10Lib::HwlComputeHtileInfo
137 *
138 * @brief
139 * Interface function stub of AddrComputeHtilenfo
140 *
141 * @return
142 * ADDR_E_RETURNCODE
143 ************************************************************************************************************************
144 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const145 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
146 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
147 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
148 ) const
149 {
150 ADDR_E_RETURNCODE ret = ADDR_OK;
151
152 if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
153 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
154 (pIn->hTileFlags.pipeAligned != TRUE))
155 {
156 ret = ADDR_INVALIDPARAMS;
157 }
158 else
159 {
160 Dim3d metaBlk = {};
161 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
162 ADDR_RSRC_TEX_2D,
163 pIn->swizzleMode,
164 0,
165 0,
166 TRUE,
167 &metaBlk);
168
169 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
170 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
171 pOut->baseAlign = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
172 pOut->metaBlkWidth = metaBlk.w;
173 pOut->metaBlkHeight = metaBlk.h;
174
175 if (pIn->numMipLevels > 1)
176 {
177 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
178
179 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
180
181 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
182 {
183 UINT_32 mipWidth, mipHeight;
184
185 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
186
187 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
188 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
189
190 const UINT_32 pitchInM = mipWidth / metaBlk.w;
191 const UINT_32 heightInM = mipHeight / metaBlk.h;
192 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
193
194 if (pOut->pMipInfo != NULL)
195 {
196 pOut->pMipInfo[i].inMiptail = FALSE;
197 pOut->pMipInfo[i].offset = offset;
198 pOut->pMipInfo[i].sliceSize = mipSliceSize;
199 }
200
201 offset += mipSliceSize;
202 }
203
204 pOut->sliceSize = offset;
205 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
206 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
207
208 if (pOut->pMipInfo != NULL)
209 {
210 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
211 {
212 pOut->pMipInfo[i].inMiptail = TRUE;
213 pOut->pMipInfo[i].offset = 0;
214 pOut->pMipInfo[i].sliceSize = 0;
215 }
216
217 if (pIn->firstMipIdInTail != pIn->numMipLevels)
218 {
219 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
220 }
221 }
222 }
223 else
224 {
225 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
226 const UINT_32 heightInM = pOut->height / metaBlk.h;
227
228 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
229 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
230 pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
231
232 if (pOut->pMipInfo != NULL)
233 {
234 pOut->pMipInfo[0].inMiptail = FALSE;
235 pOut->pMipInfo[0].offset = 0;
236 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
237 }
238 }
239
240 // Get the HTILE address equation (copied from HtileAddrFromCoord).
241 // HTILE addressing depends on the number of samples, but this code doesn't support it yet.
242 const UINT_32 index = m_xmaskBaseIndex;
243 const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
244
245 ADDR_C_ASSERT(sizeof(GFX10_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
246 pOut->equation.gfx10_bits = (UINT_16 *)GFX10_HTILE_SW_PATTERN[patIdxTable[index]];
247 }
248
249 return ret;
250 }
251
252 /**
253 ************************************************************************************************************************
254 * Gfx10Lib::HwlComputeCmaskInfo
255 *
256 * @brief
257 * Interface function stub of AddrComputeCmaskInfo
258 *
259 * @return
260 * ADDR_E_RETURNCODE
261 ************************************************************************************************************************
262 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const263 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
264 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
265 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
266 ) const
267 {
268 ADDR_E_RETURNCODE ret = ADDR_OK;
269
270 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
271 (pIn->cMaskFlags.pipeAligned != TRUE) ||
272 ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
273 ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
274 {
275 ret = ADDR_INVALIDPARAMS;
276 }
277 else
278 {
279 Dim3d metaBlk = {};
280 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
281 ADDR_RSRC_TEX_2D,
282 pIn->swizzleMode,
283 0,
284 0,
285 TRUE,
286 &metaBlk);
287
288 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
289 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
290 pOut->baseAlign = metaBlkSize;
291 pOut->metaBlkWidth = metaBlk.w;
292 pOut->metaBlkHeight = metaBlk.h;
293
294 if (pIn->numMipLevels > 1)
295 {
296 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
297
298 UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
299
300 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
301 {
302 UINT_32 mipWidth, mipHeight;
303
304 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
305
306 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
307 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
308
309 const UINT_32 pitchInM = mipWidth / metaBlk.w;
310 const UINT_32 heightInM = mipHeight / metaBlk.h;
311
312 if (pOut->pMipInfo != NULL)
313 {
314 pOut->pMipInfo[i].inMiptail = FALSE;
315 pOut->pMipInfo[i].offset = metaBlkPerSlice * metaBlkSize;
316 pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
317 }
318
319 metaBlkPerSlice += pitchInM * heightInM;
320 }
321
322 pOut->metaBlkNumPerSlice = metaBlkPerSlice;
323
324 if (pOut->pMipInfo != NULL)
325 {
326 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
327 {
328 pOut->pMipInfo[i].inMiptail = TRUE;
329 pOut->pMipInfo[i].offset = 0;
330 pOut->pMipInfo[i].sliceSize = 0;
331 }
332
333 if (pIn->firstMipIdInTail != pIn->numMipLevels)
334 {
335 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
336 }
337 }
338 }
339 else
340 {
341 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
342 const UINT_32 heightInM = pOut->height / metaBlk.h;
343
344 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
345
346 if (pOut->pMipInfo != NULL)
347 {
348 pOut->pMipInfo[0].inMiptail = FALSE;
349 pOut->pMipInfo[0].offset = 0;
350 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
351 }
352 }
353
354 pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
355 pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
356
357 // Get the CMASK address equation (copied from CmaskAddrFromCoord)
358 const UINT_32 fmaskBpp = GetFmaskBpp(1, 1);
359 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);
360 const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2;
361 const UINT_8* patIdxTable =
362 (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
363 (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
364
365 ADDR_C_ASSERT(sizeof(GFX10_CMASK_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
366 pOut->equation.gfx10_bits = (UINT_16*)GFX10_CMASK_SW_PATTERN[patIdxTable[index]];
367 }
368
369 return ret;
370 }
371
372 /**
373 ************************************************************************************************************************
374 * Gfx10Lib::HwlComputeDccInfo
375 *
376 * @brief
377 * Interface function to compute DCC key info
378 *
379 * @return
380 * ADDR_E_RETURNCODE
381 ************************************************************************************************************************
382 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const383 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
384 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
385 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
386 ) const
387 {
388 ADDR_E_RETURNCODE ret = ADDR_OK;
389
390 if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode))
391 {
392 // Hardware support dcc for 256 swizzle mode, but address lib will not support it because we only
393 // select 256 swizzle mode for small surface, and it's not helpful to enable dcc for small surface.
394 ret = ADDR_INVALIDPARAMS;
395 }
396 else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
397 {
398 // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
399 ret = ADDR_INVALIDPARAMS;
400 }
401 else
402 {
403 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
404
405 {
406 // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
407 ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
408
409 const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
410
411 pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
412 pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
413 pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1;
414 }
415
416 if (ret == ADDR_OK)
417 {
418 Dim3d metaBlk = {};
419 const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
420 const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
421 pIn->resourceType,
422 pIn->swizzleMode,
423 elemLog2,
424 numFragLog2,
425 pIn->dccKeyFlags.pipeAligned,
426 &metaBlk);
427
428 pOut->dccRamBaseAlign = metaBlkSize;
429 pOut->metaBlkWidth = metaBlk.w;
430 pOut->metaBlkHeight = metaBlk.h;
431 pOut->metaBlkDepth = metaBlk.d;
432 pOut->metaBlkSize = metaBlkSize;
433
434 pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
435 pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
436 pOut->depth = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
437
438 if (pIn->numMipLevels > 1)
439 {
440 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
441
442 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
443
444 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
445 {
446 UINT_32 mipWidth, mipHeight;
447
448 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
449
450 mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
451 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
452
453 const UINT_32 pitchInM = mipWidth / metaBlk.w;
454 const UINT_32 heightInM = mipHeight / metaBlk.h;
455 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
456
457 if (pOut->pMipInfo != NULL)
458 {
459 pOut->pMipInfo[i].inMiptail = FALSE;
460 pOut->pMipInfo[i].offset = offset;
461 pOut->pMipInfo[i].sliceSize = mipSliceSize;
462 }
463
464 offset += mipSliceSize;
465 }
466
467 pOut->dccRamSliceSize = offset;
468 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
469 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
470
471 if (pOut->pMipInfo != NULL)
472 {
473 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
474 {
475 pOut->pMipInfo[i].inMiptail = TRUE;
476 pOut->pMipInfo[i].offset = 0;
477 pOut->pMipInfo[i].sliceSize = 0;
478 }
479
480 if (pIn->firstMipIdInTail != pIn->numMipLevels)
481 {
482 pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
483 }
484 }
485 }
486 else
487 {
488 const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
489 const UINT_32 heightInM = pOut->height / metaBlk.h;
490
491 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
492 pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
493 pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
494
495 if (pOut->pMipInfo != NULL)
496 {
497 pOut->pMipInfo[0].inMiptail = FALSE;
498 pOut->pMipInfo[0].offset = 0;
499 pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
500 }
501 }
502
503 // Get the DCC address equation (copied from DccAddrFromCoord)
504 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
505 const UINT_32 numPipeLog2 = m_pipesLog2;
506 UINT_32 index = m_dccBaseIndex + elemLog2;
507 const UINT_8* patIdxTable;
508
509 if (m_settings.supportRbPlus)
510 {
511 patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
512
513 if (pIn->dccKeyFlags.pipeAligned)
514 {
515 index += MaxNumOfBpp;
516
517 if (m_numPkrLog2 < 2)
518 {
519 index += m_pipesLog2 * MaxNumOfBpp;
520 }
521 else
522 {
523 // 4 groups for "m_numPkrLog2 < 2" case
524 index += 4 * MaxNumOfBpp;
525
526 const UINT_32 dccPipePerPkr = 3;
527
528 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
529 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
530 }
531 }
532 }
533 else
534 {
535 patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
536
537 if (pIn->dccKeyFlags.pipeAligned)
538 {
539 index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
540 }
541 else
542 {
543 index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
544 }
545 }
546
547 ADDR_C_ASSERT(sizeof(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
548 pOut->equation.gfx10_bits = (UINT_16*)GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]];
549 }
550 }
551
552 return ret;
553 }
554
555 /**
556 ************************************************************************************************************************
557 * Gfx10Lib::HwlComputeCmaskAddrFromCoord
558 *
559 * @brief
560 * Interface function stub of AddrComputeCmaskAddrFromCoord
561 *
562 * @return
563 * ADDR_E_RETURNCODE
564 ************************************************************************************************************************
565 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)566 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
567 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
568 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
569 {
570 // Only support pipe aligned CMask
571 ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);
572
573 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {};
574 input.size = sizeof(input);
575 input.cMaskFlags = pIn->cMaskFlags;
576 input.colorFlags = pIn->colorFlags;
577 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
578 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
579 input.numSlices = Max(pIn->numSlices, 1u);
580 input.swizzleMode = pIn->swizzleMode;
581 input.resourceType = pIn->resourceType;
582
583 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {};
584 output.size = sizeof(output);
585
586 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
587
588 if (returnCode == ADDR_OK)
589 {
590 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
591 const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);
592 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
593 const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2;
594 const UINT_8* patIdxTable =
595 (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
596 (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
597
598 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
599 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
600 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]],
601 blkSizeLog2 + 1, // +1 for nibble offset
602 pIn->x,
603 pIn->y,
604 pIn->slice,
605 0);
606 const UINT_32 xb = pIn->x / output.metaBlkWidth;
607 const UINT_32 yb = pIn->y / output.metaBlkHeight;
608 const UINT_32 pb = output.pitch / output.metaBlkWidth;
609 const UINT_32 blkIndex = (yb * pb) + xb;
610 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
611
612 pOut->addr = (output.sliceSize * pIn->slice) +
613 (blkIndex * (1 << blkSizeLog2)) +
614 ((blkOffset >> 1) ^ pipeXor);
615 pOut->bitPosition = (blkOffset & 1) << 2;
616 }
617
618 return returnCode;
619 }
620
621 /**
622 ************************************************************************************************************************
623 * Gfx10Lib::HwlComputeHtileAddrFromCoord
624 *
625 * @brief
626 * Interface function stub of AddrComputeHtileAddrFromCoord
627 *
628 * @return
629 * ADDR_E_RETURNCODE
630 ************************************************************************************************************************
631 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)632 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
633 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
634 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
635 {
636 ADDR_E_RETURNCODE returnCode = ADDR_OK;
637
638 if (pIn->numMipLevels > 1)
639 {
640 returnCode = ADDR_NOTIMPLEMENTED;
641 }
642 else
643 {
644 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
645 input.size = sizeof(input);
646 input.hTileFlags = pIn->hTileFlags;
647 input.depthFlags = pIn->depthflags;
648 input.swizzleMode = pIn->swizzleMode;
649 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
650 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
651 input.numSlices = Max(pIn->numSlices, 1u);
652 input.numMipLevels = 1;
653
654 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
655 output.size = sizeof(output);
656
657 returnCode = ComputeHtileInfo(&input, &output);
658
659 if (returnCode == ADDR_OK)
660 {
661 const UINT_32 numSampleLog2 = Log2(pIn->numSamples);
662 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
663 const UINT_32 index = m_htileBaseIndex + numSampleLog2;
664 const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
665
666 const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
667 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
668 const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]],
669 blkSizeLog2 + 1, // +1 for nibble offset
670 pIn->x,
671 pIn->y,
672 pIn->slice,
673 0);
674 const UINT_32 xb = pIn->x / output.metaBlkWidth;
675 const UINT_32 yb = pIn->y / output.metaBlkHeight;
676 const UINT_32 pb = output.pitch / output.metaBlkWidth;
677 const UINT_32 blkIndex = (yb * pb) + xb;
678 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
679
680 pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
681 (blkIndex * (1 << blkSizeLog2)) +
682 ((blkOffset >> 1) ^ pipeXor);
683 }
684 }
685
686 return returnCode;
687 }
688
689 /**
690 ************************************************************************************************************************
691 * Gfx10Lib::HwlComputeHtileCoordFromAddr
692 *
693 * @brief
694 * Interface function stub of AddrComputeHtileCoordFromAddr
695 *
696 * @return
697 * ADDR_E_RETURNCODE
698 ************************************************************************************************************************
699 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)700 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
701 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
702 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
703 {
704 ADDR_NOT_IMPLEMENTED();
705
706 return ADDR_OK;
707 }
708
709 /**
710 ************************************************************************************************************************
711 * Gfx10Lib::HwlSupportComputeDccAddrFromCoord
712 *
713 * @brief
714 * Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
715 *
716 * @return
717 * ADDR_E_RETURNCODE
718 ************************************************************************************************************************
719 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)720 ADDR_E_RETURNCODE Gfx10Lib::HwlSupportComputeDccAddrFromCoord(
721 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
722 {
723 ADDR_E_RETURNCODE returnCode = ADDR_OK;
724
725 if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
726 (pIn->swizzleMode != ADDR_SW_64KB_R_X) ||
727 (pIn->dccKeyFlags.linear == TRUE) ||
728 (pIn->numFrags > 1) ||
729 (pIn->numMipLevels > 1) ||
730 (pIn->mipId > 0))
731 {
732 returnCode = ADDR_NOTSUPPORTED;
733 }
734 else if ((pIn->pitch == 0) ||
735 (pIn->metaBlkWidth == 0) ||
736 (pIn->metaBlkHeight == 0) ||
737 (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
738 {
739 returnCode = ADDR_NOTSUPPORTED;
740 }
741
742 return returnCode;
743 }
744
745 /**
746 ************************************************************************************************************************
747 * Gfx10Lib::HwlComputeDccAddrFromCoord
748 *
749 * @brief
750 * Interface function stub of AddrComputeDccAddrFromCoord
751 *
752 * @return
753 * N/A
754 ************************************************************************************************************************
755 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)756 VOID Gfx10Lib::HwlComputeDccAddrFromCoord(
757 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
758 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
759 {
760 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
761 const UINT_32 numPipeLog2 = m_pipesLog2;
762 const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
763 UINT_32 index = m_dccBaseIndex + elemLog2;
764 const UINT_8* patIdxTable;
765
766 if (m_settings.supportRbPlus)
767 {
768 patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
769
770 if (pIn->dccKeyFlags.pipeAligned)
771 {
772 index += MaxNumOfBpp;
773
774 if (m_numPkrLog2 < 2)
775 {
776 index += m_pipesLog2 * MaxNumOfBpp;
777 }
778 else
779 {
780 // 4 groups for "m_numPkrLog2 < 2" case
781 index += 4 * MaxNumOfBpp;
782
783 const UINT_32 dccPipePerPkr = 3;
784
785 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
786 (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
787 }
788 }
789 }
790 else
791 {
792 patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
793
794 if (pIn->dccKeyFlags.pipeAligned)
795 {
796 index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
797 }
798 else
799 {
800 index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
801 }
802 }
803
804 const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
805 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
806 const UINT_32 blkOffset =
807 ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
808 blkSizeLog2 + 1, // +1 for nibble offset
809 pIn->x,
810 pIn->y,
811 pIn->slice,
812 0);
813 const UINT_32 xb = pIn->x / pIn->metaBlkWidth;
814 const UINT_32 yb = pIn->y / pIn->metaBlkHeight;
815 const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth;
816 const UINT_32 blkIndex = (yb * pb) + xb;
817 const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
818
819 pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
820 (blkIndex * (1 << blkSizeLog2)) +
821 ((blkOffset >> 1) ^ pipeXor);
822 }
823
824 /**
825 ************************************************************************************************************************
826 * Gfx10Lib::HwlInitGlobalParams
827 *
828 * @brief
829 * Initializes global parameters
830 *
831 * @return
832 * TRUE if all settings are valid
833 *
834 ************************************************************************************************************************
835 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)836 BOOL_32 Gfx10Lib::HwlInitGlobalParams(
837 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
838 {
839 BOOL_32 valid = TRUE;
840 GB_ADDR_CONFIG_GFX10 gbAddrConfig;
841
842 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
843
844 // These values are copied from CModel code
845 switch (gbAddrConfig.bits.NUM_PIPES)
846 {
847 case ADDR_CONFIG_1_PIPE:
848 m_pipes = 1;
849 m_pipesLog2 = 0;
850 break;
851 case ADDR_CONFIG_2_PIPE:
852 m_pipes = 2;
853 m_pipesLog2 = 1;
854 break;
855 case ADDR_CONFIG_4_PIPE:
856 m_pipes = 4;
857 m_pipesLog2 = 2;
858 break;
859 case ADDR_CONFIG_8_PIPE:
860 m_pipes = 8;
861 m_pipesLog2 = 3;
862 break;
863 case ADDR_CONFIG_16_PIPE:
864 m_pipes = 16;
865 m_pipesLog2 = 4;
866 break;
867 case ADDR_CONFIG_32_PIPE:
868 m_pipes = 32;
869 m_pipesLog2 = 5;
870 break;
871 case ADDR_CONFIG_64_PIPE:
872 m_pipes = 64;
873 m_pipesLog2 = 6;
874 break;
875 default:
876 ADDR_ASSERT_ALWAYS();
877 valid = FALSE;
878 break;
879 }
880
881 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
882 {
883 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
884 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
885 m_pipeInterleaveLog2 = 8;
886 break;
887 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
888 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
889 m_pipeInterleaveLog2 = 9;
890 break;
891 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
892 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
893 m_pipeInterleaveLog2 = 10;
894 break;
895 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
896 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
897 m_pipeInterleaveLog2 = 11;
898 break;
899 default:
900 ADDR_ASSERT_ALWAYS();
901 valid = FALSE;
902 break;
903 }
904
905 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
906 // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
907 // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
908 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
909
910 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
911 {
912 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
913 m_maxCompFrag = 1;
914 m_maxCompFragLog2 = 0;
915 break;
916 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
917 m_maxCompFrag = 2;
918 m_maxCompFragLog2 = 1;
919 break;
920 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
921 m_maxCompFrag = 4;
922 m_maxCompFragLog2 = 2;
923 break;
924 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
925 m_maxCompFrag = 8;
926 m_maxCompFragLog2 = 3;
927 break;
928 default:
929 ADDR_ASSERT_ALWAYS();
930 valid = FALSE;
931 break;
932 }
933
934 {
935 // Skip unaligned case
936 m_xmaskBaseIndex += MaxNumOfBppCMask;
937 m_htileBaseIndex += MaxNumOfAA;
938
939 m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfBppCMask;
940 m_htileBaseIndex += m_pipesLog2 * MaxNumOfAA;
941 m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
942
943 if (m_settings.supportRbPlus)
944 {
945 m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
946 m_numSaLog2 = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
947
948 ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
949
950 ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) ==
951 sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0]));
952
953 if (m_numPkrLog2 >= 2)
954 {
955 m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
956 m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfBppCMask;
957 m_htileBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
958 }
959 }
960 else
961 {
962 const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
963 static_cast<UINT_32>(ADDR_CONFIG_1_PIPE) +
964 1;
965
966 ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
967 ADDR_C_ASSERT(sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0]) ==
968 (numPipeType + 1) * MaxNumOfBppCMask);
969 }
970 }
971
972 if (m_settings.supportRbPlus)
973 {
974 // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
975 // corresponding SW_64KB_* mode
976 m_blockVarSizeLog2 = m_pipesLog2 + 14;
977 }
978
979 if (valid)
980 {
981 InitEquationTable();
982 }
983
984 return valid;
985 }
986
987 /**
988 ************************************************************************************************************************
989 * Gfx10Lib::HwlConvertChipFamily
990 *
991 * @brief
992 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
993 * @return
994 * ChipFamily
995 ************************************************************************************************************************
996 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)997 ChipFamily Gfx10Lib::HwlConvertChipFamily(
998 UINT_32 chipFamily, ///< [in] chip family defined in atiih.h
999 UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1000 {
1001 ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
1002
1003 m_settings.dccUnsup3DSwDis = 1;
1004 m_settings.dsMipmapHtileFix = 1;
1005
1006 switch (chipFamily)
1007 {
1008 case FAMILY_NV:
1009 if (ASICREV_IS_NAVI10_P(chipRevision))
1010 {
1011 m_settings.dsMipmapHtileFix = 0;
1012 m_settings.isDcn20 = 1;
1013 }
1014
1015 if (ASICREV_IS_NAVI12_P(chipRevision))
1016 {
1017 m_settings.isDcn20 = 1;
1018 }
1019
1020 if (ASICREV_IS_NAVI14_M(chipRevision))
1021 {
1022 m_settings.isDcn20 = 1;
1023 }
1024
1025 if (ASICREV_IS_NAVI21_M(chipRevision))
1026 {
1027 m_settings.supportRbPlus = 1;
1028 m_settings.dccUnsup3DSwDis = 0;
1029 }
1030
1031 if (ASICREV_IS_NAVI22_P(chipRevision))
1032 {
1033 m_settings.supportRbPlus = 1;
1034 m_settings.dccUnsup3DSwDis = 0;
1035 }
1036
1037 if (ASICREV_IS_NAVI23_P(chipRevision))
1038 {
1039 m_settings.supportRbPlus = 1;
1040 m_settings.dccUnsup3DSwDis = 0;
1041 }
1042
1043 if (ASICREV_IS_NAVI24_P(chipRevision))
1044 {
1045 m_settings.supportRbPlus = 1;
1046 m_settings.dccUnsup3DSwDis = 0;
1047 }
1048 break;
1049
1050 case FAMILY_VGH:
1051 if (ASICREV_IS_VANGOGH(chipRevision))
1052 {
1053 m_settings.supportRbPlus = 1;
1054 m_settings.dccUnsup3DSwDis = 0;
1055 }
1056 else
1057 {
1058 ADDR_ASSERT(!"Unknown chip revision");
1059 }
1060 break;
1061 case FAMILY_RMB:
1062 if (ASICREV_IS_REMBRANDT(chipRevision))
1063 {
1064 m_settings.supportRbPlus = 1;
1065 m_settings.dccUnsup3DSwDis = 0;
1066 }
1067 else
1068 {
1069 ADDR_ASSERT(!"Unknown chip revision");
1070 }
1071 break;
1072 case FAMILY_RPL:
1073 if (ASICREV_IS_RAPHAEL(chipRevision))
1074 {
1075 m_settings.supportRbPlus = 1;
1076 m_settings.dccUnsup3DSwDis = 0;
1077 }
1078 break;
1079 case FAMILY_MDN:
1080 if (ASICREV_IS_MENDOCINO(chipRevision))
1081 {
1082 m_settings.supportRbPlus = 1;
1083 m_settings.dccUnsup3DSwDis = 0;
1084 }
1085 else
1086 {
1087 ADDR_ASSERT(!"Unknown chip revision");
1088 }
1089 break;
1090 default:
1091 ADDR_ASSERT(!"Unknown chip family");
1092 break;
1093 }
1094
1095 m_configFlags.use32bppFor422Fmt = TRUE;
1096
1097 return family;
1098 }
1099
1100 /**
1101 ************************************************************************************************************************
1102 * Gfx10Lib::GetBlk256SizeLog2
1103 *
1104 * @brief
1105 * Get block 256 size
1106 *
1107 * @return
1108 * N/A
1109 ************************************************************************************************************************
1110 */
GetBlk256SizeLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1111 void Gfx10Lib::GetBlk256SizeLog2(
1112 AddrResourceType resourceType, ///< [in] Resource type
1113 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1114 UINT_32 elemLog2, ///< [in] element size log2
1115 UINT_32 numSamplesLog2, ///< [in] number of samples
1116 Dim3d* pBlock ///< [out] block size
1117 ) const
1118 {
1119 if (IsThin(resourceType, swizzleMode))
1120 {
1121 UINT_32 blockBits = 8 - elemLog2;
1122
1123 if (IsZOrderSwizzle(swizzleMode))
1124 {
1125 blockBits -= numSamplesLog2;
1126 }
1127
1128 pBlock->w = (blockBits >> 1) + (blockBits & 1);
1129 pBlock->h = (blockBits >> 1);
1130 pBlock->d = 0;
1131 }
1132 else
1133 {
1134 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1135
1136 UINT_32 blockBits = 8 - elemLog2;
1137
1138 pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1139 pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1140 pBlock->h = (blockBits / 3);
1141 }
1142 }
1143
1144 /**
1145 ************************************************************************************************************************
1146 * Gfx10Lib::GetCompressedBlockSizeLog2
1147 *
1148 * @brief
1149 * Get compress block size
1150 *
1151 * @return
1152 * N/A
1153 ************************************************************************************************************************
1154 */
GetCompressedBlockSizeLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1155 void Gfx10Lib::GetCompressedBlockSizeLog2(
1156 Gfx10DataType dataType, ///< [in] Data type
1157 AddrResourceType resourceType, ///< [in] Resource type
1158 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1159 UINT_32 elemLog2, ///< [in] element size log2
1160 UINT_32 numSamplesLog2, ///< [in] number of samples
1161 Dim3d* pBlock ///< [out] block size
1162 ) const
1163 {
1164 if (dataType == Gfx10DataColor)
1165 {
1166 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1167 }
1168 else
1169 {
1170 ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1171 pBlock->w = 3;
1172 pBlock->h = 3;
1173 pBlock->d = 0;
1174 }
1175 }
1176
1177 /**
1178 ************************************************************************************************************************
1179 * Gfx10Lib::GetMetaOverlapLog2
1180 *
1181 * @brief
1182 * Get meta block overlap
1183 *
1184 * @return
1185 * N/A
1186 ************************************************************************************************************************
1187 */
GetMetaOverlapLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2) const1188 INT_32 Gfx10Lib::GetMetaOverlapLog2(
1189 Gfx10DataType dataType, ///< [in] Data type
1190 AddrResourceType resourceType, ///< [in] Resource type
1191 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1192 UINT_32 elemLog2, ///< [in] element size log2
1193 UINT_32 numSamplesLog2 ///< [in] number of samples
1194 ) const
1195 {
1196 Dim3d compBlock;
1197 Dim3d microBlock;
1198
1199 GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1200 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, µBlock);
1201
1202 const INT_32 compSizeLog2 = compBlock.w + compBlock.h + compBlock.d;
1203 const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1204 const INT_32 maxSizeLog2 = Max(compSizeLog2, blk256SizeLog2);
1205 const INT_32 numPipesLog2 = GetEffectiveNumPipes();
1206 INT_32 overlap = numPipesLog2 - maxSizeLog2;
1207
1208 if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1209 {
1210 overlap++;
1211 }
1212
1213 // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1214 if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1215 {
1216 overlap--;
1217 }
1218 overlap = Max(overlap, 0);
1219 return overlap;
1220 }
1221
1222 /**
1223 ************************************************************************************************************************
1224 * Gfx10Lib::Get3DMetaOverlapLog2
1225 *
1226 * @brief
1227 * Get 3d meta block overlap
1228 *
1229 * @return
1230 * N/A
1231 ************************************************************************************************************************
1232 */
Get3DMetaOverlapLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2) const1233 INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1234 AddrResourceType resourceType, ///< [in] Resource type
1235 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1236 UINT_32 elemLog2 ///< [in] element size log2
1237 ) const
1238 {
1239 Dim3d microBlock;
1240 GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, µBlock);
1241
1242 INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1243
1244 if (m_settings.supportRbPlus)
1245 {
1246 overlap++;
1247 }
1248
1249 if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1250 {
1251 overlap = 0;
1252 }
1253 return overlap;
1254 }
1255
1256 /**
1257 ************************************************************************************************************************
1258 * Gfx10Lib::GetPipeRotateAmount
1259 *
1260 * @brief
1261 * Get pipe rotate amount
1262 *
1263 * @return
1264 * Pipe rotate amount
1265 ************************************************************************************************************************
1266 */
1267
GetPipeRotateAmount(AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const1268 INT_32 Gfx10Lib::GetPipeRotateAmount(
1269 AddrResourceType resourceType, ///< [in] Resource type
1270 AddrSwizzleMode swizzleMode ///< [in] Swizzle mode
1271 ) const
1272 {
1273 INT_32 amount = 0;
1274
1275 if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1276 {
1277 amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1278 1 : m_pipesLog2 - (m_numSaLog2 + 1);
1279 }
1280
1281 return amount;
1282 }
1283
1284 /**
1285 ************************************************************************************************************************
1286 * Gfx10Lib::GetMetaBlkSize
1287 *
1288 * @brief
1289 * Get metadata block size
1290 *
1291 * @return
1292 * Meta block size
1293 ************************************************************************************************************************
1294 */
GetMetaBlkSize(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,BOOL_32 pipeAlign,Dim3d * pBlock) const1295 UINT_32 Gfx10Lib::GetMetaBlkSize(
1296 Gfx10DataType dataType, ///< [in] Data type
1297 AddrResourceType resourceType, ///< [in] Resource type
1298 AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1299 UINT_32 elemLog2, ///< [in] element size log2
1300 UINT_32 numSamplesLog2, ///< [in] number of samples
1301 BOOL_32 pipeAlign, ///< [in] pipe align
1302 Dim3d* pBlock ///< [out] block size
1303 ) const
1304 {
1305 INT_32 metablkSizeLog2;
1306
1307 {
1308 const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType);
1309 const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType);
1310 const INT_32 compBlkSizeLog2 = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1311 const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1312 numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1313 const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode);
1314 INT_32 numPipesLog2 = m_pipesLog2;
1315
1316 if (IsThin(resourceType, swizzleMode))
1317 {
1318 if ((pipeAlign == FALSE) ||
1319 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1320 (IsDisplaySwizzle(resourceType, swizzleMode) == TRUE))
1321 {
1322 if (pipeAlign)
1323 {
1324 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1325 metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1326 }
1327 else
1328 {
1329 metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1330 }
1331 }
1332 else
1333 {
1334 if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1335 {
1336 numPipesLog2++;
1337 }
1338
1339 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1340
1341 if (numPipesLog2 >= 4)
1342 {
1343 INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1344
1345 // In 16Bpe 8xaa, we have an extra overlap bit
1346 if ((pipeRotateLog2 > 0) &&
1347 (elemLog2 == 4) &&
1348 (numSamplesLog2 == 3) &&
1349 (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1350 {
1351 overlapLog2++;
1352 }
1353
1354 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1355 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1356
1357 if (m_settings.supportRbPlus &&
1358 IsRtOptSwizzle(swizzleMode) &&
1359 (numPipesLog2 == 6) &&
1360 (numSamplesLog2 == 3) &&
1361 (m_maxCompFragLog2 == 3) &&
1362 (metablkSizeLog2 < 15))
1363 {
1364 metablkSizeLog2 = 15;
1365 }
1366 }
1367 else
1368 {
1369 metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1370 }
1371
1372 if (dataType == Gfx10DataDepthStencil)
1373 {
1374 // For htile surfaces, pad meta block size to 2K * num_pipes
1375 metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1376 }
1377
1378 const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1379
1380 if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1381 {
1382 const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1383
1384 metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1385 }
1386 }
1387
1388 const INT_32 metablkBitsLog2 =
1389 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1390 pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1391 pBlock->h = 1 << (metablkBitsLog2 >> 1);
1392 pBlock->d = 1;
1393 }
1394 else
1395 {
1396 ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1397
1398 if (pipeAlign)
1399 {
1400 if (m_settings.supportRbPlus &&
1401 (m_pipesLog2 == m_numSaLog2 + 1) &&
1402 (m_pipesLog2 > 1) &&
1403 IsRbAligned(resourceType, swizzleMode))
1404 {
1405 numPipesLog2++;
1406 }
1407
1408 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1409
1410 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1411 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1412 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1413 }
1414 else
1415 {
1416 metablkSizeLog2 = 12;
1417 }
1418
1419 const INT_32 metablkBitsLog2 =
1420 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1421 pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1422 pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1423 pBlock->d = 1 << (metablkBitsLog2 / 3);
1424 }
1425 }
1426
1427 return (1 << static_cast<UINT_32>(metablkSizeLog2));
1428 }
1429
1430 /**
1431 ************************************************************************************************************************
1432 * Gfx10Lib::ConvertSwizzlePatternToEquation
1433 *
1434 * @brief
1435 * Convert swizzle pattern to equation.
1436 *
1437 * @return
1438 * N/A
1439 ************************************************************************************************************************
1440 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,AddrResourceType rsrcType,AddrSwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const1441 VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1442 UINT_32 elemLog2, ///< [in] element bytes log2
1443 AddrResourceType rsrcType, ///< [in] resource type
1444 AddrSwizzleMode swMode, ///< [in] swizzle mode
1445 const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern infor
1446 ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern
1447 const
1448 {
1449 // Get full swizzle pattern and store it as an ADDR_BIT_SETTING list
1450 ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
1451 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1452
1453 const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
1454 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
1455 memset(pEquation, 0, sizeof(ADDR_EQUATION));
1456 pEquation->numBits = blockSizeLog2;
1457 pEquation->numBitComponents = pPatInfo->maxItemCount;
1458 pEquation->stackedDepthSlices = FALSE;
1459
1460 for (UINT_32 i = 0; i < elemLog2; i++)
1461 {
1462 pEquation->addr[i].channel = 0;
1463 pEquation->addr[i].valid = 1;
1464 pEquation->addr[i].index = i;
1465 }
1466
1467 if (IsXor(swMode) == FALSE)
1468 {
1469 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1470 {
1471 ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1472
1473 if (pSwizzle[i].x != 0)
1474 {
1475 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1476
1477 pEquation->addr[i].channel = 0;
1478 pEquation->addr[i].valid = 1;
1479 pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
1480 }
1481 else if (pSwizzle[i].y != 0)
1482 {
1483 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1484
1485 pEquation->addr[i].channel = 1;
1486 pEquation->addr[i].valid = 1;
1487 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1488 }
1489 else
1490 {
1491 ADDR_ASSERT(pSwizzle[i].z != 0);
1492 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1493
1494 pEquation->addr[i].channel = 2;
1495 pEquation->addr[i].valid = 1;
1496 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1497 }
1498
1499 pEquation->xor1[i].value = 0;
1500 pEquation->xor2[i].value = 0;
1501 }
1502 }
1503 else if (IsThin(rsrcType, swMode))
1504 {
1505 Dim3d dim;
1506 ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1507
1508 const UINT_32 blkXLog2 = Log2(dim.w);
1509 const UINT_32 blkYLog2 = Log2(dim.h);
1510 const UINT_32 blkXMask = dim.w - 1;
1511 const UINT_32 blkYMask = dim.h - 1;
1512
1513 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1514 UINT_32 xMask = 0;
1515 UINT_32 yMask = 0;
1516 UINT_32 bMask = (1 << elemLog2) - 1;
1517
1518 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1519 {
1520 if (IsPow2(pSwizzle[i].value))
1521 {
1522 if (pSwizzle[i].x != 0)
1523 {
1524 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1525 xMask |= pSwizzle[i].x;
1526
1527 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1528
1529 ADDR_ASSERT(xLog2 < blkXLog2);
1530
1531 pEquation->addr[i].channel = 0;
1532 pEquation->addr[i].valid = 1;
1533 pEquation->addr[i].index = xLog2 + elemLog2;
1534 }
1535 else
1536 {
1537 ADDR_ASSERT(pSwizzle[i].y != 0);
1538 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1539 yMask |= pSwizzle[i].y;
1540
1541 pEquation->addr[i].channel = 1;
1542 pEquation->addr[i].valid = 1;
1543 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1544
1545 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1546 }
1547
1548 swizzle[i].value = 0;
1549 bMask |= 1 << i;
1550 }
1551 else
1552 {
1553 if (pSwizzle[i].z != 0)
1554 {
1555 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1556
1557 pEquation->xor2[i].channel = 2;
1558 pEquation->xor2[i].valid = 1;
1559 pEquation->xor2[i].index = Log2(pSwizzle[i].z);
1560 }
1561
1562 swizzle[i].x = pSwizzle[i].x;
1563 swizzle[i].y = pSwizzle[i].y;
1564 swizzle[i].z = swizzle[i].s = 0;
1565
1566 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1567
1568 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1569
1570 if (xHi != 0)
1571 {
1572 ADDR_ASSERT(IsPow2(xHi));
1573 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1574
1575 pEquation->xor1[i].channel = 0;
1576 pEquation->xor1[i].valid = 1;
1577 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1578
1579 swizzle[i].x &= blkXMask;
1580 }
1581
1582 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1583
1584 if (yHi != 0)
1585 {
1586 ADDR_ASSERT(IsPow2(yHi));
1587
1588 if (xHi == 0)
1589 {
1590 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1591 pEquation->xor1[i].channel = 1;
1592 pEquation->xor1[i].valid = 1;
1593 pEquation->xor1[i].index = Log2(yHi);
1594 }
1595 else
1596 {
1597 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1598 pEquation->xor2[i].channel = 1;
1599 pEquation->xor2[i].valid = 1;
1600 pEquation->xor2[i].index = Log2(yHi);
1601 }
1602
1603 swizzle[i].y &= blkYMask;
1604 }
1605
1606 if (swizzle[i].value == 0)
1607 {
1608 bMask |= 1 << i;
1609 }
1610 }
1611 }
1612
1613 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1614 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1615
1616 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1617
1618 while (bMask != blockMask)
1619 {
1620 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1621 {
1622 if ((bMask & (1 << i)) == 0)
1623 {
1624 if (IsPow2(swizzle[i].value))
1625 {
1626 if (swizzle[i].x != 0)
1627 {
1628 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1629 xMask |= swizzle[i].x;
1630
1631 const UINT_32 xLog2 = Log2(swizzle[i].x);
1632
1633 ADDR_ASSERT(xLog2 < blkXLog2);
1634
1635 pEquation->addr[i].channel = 0;
1636 pEquation->addr[i].valid = 1;
1637 pEquation->addr[i].index = xLog2 + elemLog2;
1638 }
1639 else
1640 {
1641 ADDR_ASSERT(swizzle[i].y != 0);
1642 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1643 yMask |= swizzle[i].y;
1644
1645 pEquation->addr[i].channel = 1;
1646 pEquation->addr[i].valid = 1;
1647 pEquation->addr[i].index = Log2(swizzle[i].y);
1648
1649 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1650 }
1651
1652 swizzle[i].value = 0;
1653 bMask |= 1 << i;
1654 }
1655 else
1656 {
1657 const UINT_32 x = swizzle[i].x & xMask;
1658 const UINT_32 y = swizzle[i].y & yMask;
1659
1660 if (x != 0)
1661 {
1662 ADDR_ASSERT(IsPow2(x));
1663
1664 if (pEquation->xor1[i].value == 0)
1665 {
1666 pEquation->xor1[i].channel = 0;
1667 pEquation->xor1[i].valid = 1;
1668 pEquation->xor1[i].index = Log2(x) + elemLog2;
1669 }
1670 else
1671 {
1672 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1673 pEquation->xor2[i].channel = 0;
1674 pEquation->xor2[i].valid = 1;
1675 pEquation->xor2[i].index = Log2(x) + elemLog2;
1676 }
1677 }
1678
1679 if (y != 0)
1680 {
1681 ADDR_ASSERT(IsPow2(y));
1682
1683 if (pEquation->xor1[i].value == 0)
1684 {
1685 pEquation->xor1[i].channel = 1;
1686 pEquation->xor1[i].valid = 1;
1687 pEquation->xor1[i].index = Log2(y);
1688 }
1689 else
1690 {
1691 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1692 pEquation->xor2[i].channel = 1;
1693 pEquation->xor2[i].valid = 1;
1694 pEquation->xor2[i].index = Log2(y);
1695 }
1696 }
1697
1698 swizzle[i].x &= ~x;
1699 swizzle[i].y &= ~y;
1700 }
1701 }
1702 }
1703 }
1704
1705 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1706 }
1707 else
1708 {
1709 const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
1710 const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
1711 const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
1712 const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1713 const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1714 const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1715
1716 ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1717 UINT_32 xMask = 0;
1718 UINT_32 yMask = 0;
1719 UINT_32 zMask = 0;
1720 UINT_32 bMask = (1 << elemLog2) - 1;
1721
1722 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1723 {
1724 if (IsPow2(pSwizzle[i].value))
1725 {
1726 if (pSwizzle[i].x != 0)
1727 {
1728 ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1729 xMask |= pSwizzle[i].x;
1730
1731 const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1732
1733 ADDR_ASSERT(xLog2 < blkXLog2);
1734
1735 pEquation->addr[i].channel = 0;
1736 pEquation->addr[i].valid = 1;
1737 pEquation->addr[i].index = xLog2 + elemLog2;
1738 }
1739 else if (pSwizzle[i].y != 0)
1740 {
1741 ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1742 yMask |= pSwizzle[i].y;
1743
1744 pEquation->addr[i].channel = 1;
1745 pEquation->addr[i].valid = 1;
1746 pEquation->addr[i].index = Log2(pSwizzle[i].y);
1747
1748 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1749 }
1750 else
1751 {
1752 ADDR_ASSERT(pSwizzle[i].z != 0);
1753 ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1754 zMask |= pSwizzle[i].z;
1755
1756 pEquation->addr[i].channel = 2;
1757 pEquation->addr[i].valid = 1;
1758 pEquation->addr[i].index = Log2(pSwizzle[i].z);
1759
1760 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1761 }
1762
1763 swizzle[i].value = 0;
1764 bMask |= 1 << i;
1765 }
1766 else
1767 {
1768 swizzle[i].x = pSwizzle[i].x;
1769 swizzle[i].y = pSwizzle[i].y;
1770 swizzle[i].z = pSwizzle[i].z;
1771 swizzle[i].s = 0;
1772
1773 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1774
1775 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1776 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1777 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1778
1779 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1780
1781 if (xHi != 0)
1782 {
1783 ADDR_ASSERT(IsPow2(xHi));
1784 ADDR_ASSERT(pEquation->xor1[i].value == 0);
1785
1786 pEquation->xor1[i].channel = 0;
1787 pEquation->xor1[i].valid = 1;
1788 pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1789
1790 swizzle[i].x &= blkXMask;
1791 }
1792
1793 if (yHi != 0)
1794 {
1795 ADDR_ASSERT(IsPow2(yHi));
1796
1797 if (pEquation->xor1[i].value == 0)
1798 {
1799 pEquation->xor1[i].channel = 1;
1800 pEquation->xor1[i].valid = 1;
1801 pEquation->xor1[i].index = Log2(yHi);
1802 }
1803 else
1804 {
1805 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1806 pEquation->xor2[i].channel = 1;
1807 pEquation->xor2[i].valid = 1;
1808 pEquation->xor2[i].index = Log2(yHi);
1809 }
1810
1811 swizzle[i].y &= blkYMask;
1812 }
1813
1814 if (zHi != 0)
1815 {
1816 ADDR_ASSERT(IsPow2(zHi));
1817
1818 if (pEquation->xor1[i].value == 0)
1819 {
1820 pEquation->xor1[i].channel = 2;
1821 pEquation->xor1[i].valid = 1;
1822 pEquation->xor1[i].index = Log2(zHi);
1823 }
1824 else
1825 {
1826 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1827 pEquation->xor2[i].channel = 2;
1828 pEquation->xor2[i].valid = 1;
1829 pEquation->xor2[i].index = Log2(zHi);
1830 }
1831
1832 swizzle[i].z &= blkZMask;
1833 }
1834
1835 if (swizzle[i].value == 0)
1836 {
1837 bMask |= 1 << i;
1838 }
1839 }
1840 }
1841
1842 const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1843 const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1844
1845 ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1846
1847 while (bMask != blockMask)
1848 {
1849 for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1850 {
1851 if ((bMask & (1 << i)) == 0)
1852 {
1853 if (IsPow2(swizzle[i].value))
1854 {
1855 if (swizzle[i].x != 0)
1856 {
1857 ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1858 xMask |= swizzle[i].x;
1859
1860 const UINT_32 xLog2 = Log2(swizzle[i].x);
1861
1862 ADDR_ASSERT(xLog2 < blkXLog2);
1863
1864 pEquation->addr[i].channel = 0;
1865 pEquation->addr[i].valid = 1;
1866 pEquation->addr[i].index = xLog2 + elemLog2;
1867 }
1868 else if (swizzle[i].y != 0)
1869 {
1870 ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1871 yMask |= swizzle[i].y;
1872
1873 pEquation->addr[i].channel = 1;
1874 pEquation->addr[i].valid = 1;
1875 pEquation->addr[i].index = Log2(swizzle[i].y);
1876
1877 ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1878 }
1879 else
1880 {
1881 ADDR_ASSERT(swizzle[i].z != 0);
1882 ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1883 zMask |= swizzle[i].z;
1884
1885 pEquation->addr[i].channel = 2;
1886 pEquation->addr[i].valid = 1;
1887 pEquation->addr[i].index = Log2(swizzle[i].z);
1888
1889 ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1890 }
1891
1892 swizzle[i].value = 0;
1893 bMask |= 1 << i;
1894 }
1895 else
1896 {
1897 const UINT_32 x = swizzle[i].x & xMask;
1898 const UINT_32 y = swizzle[i].y & yMask;
1899 const UINT_32 z = swizzle[i].z & zMask;
1900
1901 if (x != 0)
1902 {
1903 ADDR_ASSERT(IsPow2(x));
1904
1905 if (pEquation->xor1[i].value == 0)
1906 {
1907 pEquation->xor1[i].channel = 0;
1908 pEquation->xor1[i].valid = 1;
1909 pEquation->xor1[i].index = Log2(x) + elemLog2;
1910 }
1911 else
1912 {
1913 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1914 pEquation->xor2[i].channel = 0;
1915 pEquation->xor2[i].valid = 1;
1916 pEquation->xor2[i].index = Log2(x) + elemLog2;
1917 }
1918 }
1919
1920 if (y != 0)
1921 {
1922 ADDR_ASSERT(IsPow2(y));
1923
1924 if (pEquation->xor1[i].value == 0)
1925 {
1926 pEquation->xor1[i].channel = 1;
1927 pEquation->xor1[i].valid = 1;
1928 pEquation->xor1[i].index = Log2(y);
1929 }
1930 else
1931 {
1932 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1933 pEquation->xor2[i].channel = 1;
1934 pEquation->xor2[i].valid = 1;
1935 pEquation->xor2[i].index = Log2(y);
1936 }
1937 }
1938
1939 if (z != 0)
1940 {
1941 ADDR_ASSERT(IsPow2(z));
1942
1943 if (pEquation->xor1[i].value == 0)
1944 {
1945 pEquation->xor1[i].channel = 2;
1946 pEquation->xor1[i].valid = 1;
1947 pEquation->xor1[i].index = Log2(z);
1948 }
1949 else
1950 {
1951 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1952 pEquation->xor2[i].channel = 2;
1953 pEquation->xor2[i].valid = 1;
1954 pEquation->xor2[i].index = Log2(z);
1955 }
1956 }
1957
1958 swizzle[i].x &= ~x;
1959 swizzle[i].y &= ~y;
1960 swizzle[i].z &= ~z;
1961 }
1962 }
1963 }
1964 }
1965
1966 ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1967 }
1968 }
1969
1970 /**
1971 ************************************************************************************************************************
1972 * Gfx10Lib::InitEquationTable
1973 *
1974 * @brief
1975 * Initialize Equation table.
1976 *
1977 * @return
1978 * N/A
1979 ************************************************************************************************************************
1980 */
InitEquationTable()1981 VOID Gfx10Lib::InitEquationTable()
1982 {
1983 memset(m_equationTable, 0, sizeof(m_equationTable));
1984
1985 // Iterate through resourceTypes, up to MaxRsrcType where a "resourceType" refers to AddrResourceType (1D/2D/3D)
1986 // resources. This starts with rsrcTypeIdx = 0, however there is an offset added that will start us off at
1987 // computing 2D resources.
1988 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1989 {
1990 // Add offset. Start iterating from ADDR_RSRC_TEX_2D
1991 const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1992
1993 // Iterate through the maximum number of swizzlemodes a type can hold
1994 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1995 {
1996 const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1997
1998 // Iterate through the different bits-per-pixel settings (8bpp/16bpp/32bpp/64bpp/128bpp)
1999 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
2000 {
2001 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2002 // May or may not return a ADDR_SW_PATINFO for a completely different swizzle mode, essentially
2003 // overwriting the choice.
2004 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
2005
2006 if (pPatInfo != NULL)
2007 {
2008 ADDR_ASSERT(IsValidSwMode(swMode));
2009 if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex
2010 {
2011 ADDR_EQUATION equation = {};
2012
2013 // Passing in pPatInfo to get the addr equation
2014 ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
2015
2016 equationIndex = m_numEquations;
2017 ADDR_ASSERT(equationIndex < EquationTableSize);
2018 // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo
2019 m_equationTable[equationIndex] = equation;
2020 // Increment m_numEquations
2021 m_numEquations++;
2022 }
2023 else // There is no equationIndex
2024 {
2025 // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
2026 ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
2027 ADDR_ASSERT(rsrcTypeIdx == 1);
2028 ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
2029 ADDR_ASSERT(m_settings.supportRbPlus == 1);
2030 }
2031 }
2032 // equationIndex, which is used to look up equations in m_equationTable, will be cached for every
2033 // iteration in this nested for-loop
2034 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
2035 }
2036 }
2037 }
2038 }
2039
2040 /**
2041 ************************************************************************************************************************
2042 * Gfx10Lib::HwlGetEquationIndex
2043 *
2044 * @brief
2045 * Interface function stub of GetEquationIndex
2046 *
2047 * @return
2048 * ADDR_E_RETURNCODE
2049 ************************************************************************************************************************
2050 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2051 UINT_32 Gfx10Lib::HwlGetEquationIndex(
2052 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
2053 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
2054 ) const
2055 {
2056 UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
2057
2058 if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
2059 (pIn->resourceType == ADDR_RSRC_TEX_3D))
2060 {
2061 const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
2062 const UINT_32 swModeIdx = static_cast<UINT_32>(pIn->swizzleMode);
2063 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
2064
2065 equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
2066 }
2067
2068 if (pOut->pMipInfo != NULL)
2069 {
2070 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2071 {
2072 pOut->pMipInfo[i].equationIndex = equationIdx;
2073 }
2074 }
2075
2076 return equationIdx;
2077 }
2078
2079 /**
2080 ************************************************************************************************************************
2081 * Gfx10Lib::GetValidDisplaySwizzleModes
2082 *
2083 * @brief
2084 * Get valid swizzle modes mask for displayable surface
2085 *
2086 * @return
2087 * Valid swizzle modes mask for displayable surface
2088 ************************************************************************************************************************
2089 */
GetValidDisplaySwizzleModes(UINT_32 bpp) const2090 UINT_32 Gfx10Lib::GetValidDisplaySwizzleModes(
2091 UINT_32 bpp
2092 ) const
2093 {
2094 UINT_32 swModeMask = 0;
2095
2096 if (bpp <= 64)
2097 {
2098 if (m_settings.isDcn20)
2099 {
2100 swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask;
2101 }
2102 else
2103 {
2104 swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask;
2105 }
2106 }
2107
2108 return swModeMask;
2109 }
2110
2111 /**
2112 ************************************************************************************************************************
2113 * Gfx10Lib::IsValidDisplaySwizzleMode
2114 *
2115 * @brief
2116 * Check if a swizzle mode is supported by display engine
2117 *
2118 * @return
2119 * TRUE is swizzle mode is supported by display engine
2120 ************************************************************************************************************************
2121 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2122 BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
2123 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2124 ) const
2125 {
2126 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
2127
2128 return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
2129 }
2130
2131 /**
2132 ************************************************************************************************************************
2133 * Gfx10Lib::GetMaxNumMipsInTail
2134 *
2135 * @brief
2136 * Return max number of mips in tails
2137 *
2138 * @return
2139 * Max number of mips in tails
2140 ************************************************************************************************************************
2141 */
GetMaxNumMipsInTail(UINT_32 blockSizeLog2,BOOL_32 isThin) const2142 UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
2143 UINT_32 blockSizeLog2, ///< block size log2
2144 BOOL_32 isThin ///< is thin or thick
2145 ) const
2146 {
2147 UINT_32 effectiveLog2 = blockSizeLog2;
2148
2149 if (isThin == FALSE)
2150 {
2151 effectiveLog2 -= (blockSizeLog2 - 8) / 3;
2152 }
2153
2154 return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
2155 }
2156
2157 /**
2158 ************************************************************************************************************************
2159 * Gfx10Lib::HwlComputePipeBankXor
2160 *
2161 * @brief
2162 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2163 *
2164 * @return
2165 * PipeBankXor value
2166 ************************************************************************************************************************
2167 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const2168 ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
2169 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2170 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2171 ) const
2172 {
2173 if (IsNonPrtXor(pIn->swizzleMode))
2174 {
2175 const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode));
2176
2177 // No pipe xor...
2178 const UINT_32 pipeXor = 0;
2179 UINT_32 bankXor = 0;
2180
2181 const UINT_32 XorPatternLen = 8;
2182 static const UINT_32 XorBankRot1b[XorPatternLen] = {0, 1, 0, 1, 0, 1, 0, 1};
2183 static const UINT_32 XorBankRot2b[XorPatternLen] = {0, 2, 1, 3, 2, 0, 3, 1};
2184 static const UINT_32 XorBankRot3b[XorPatternLen] = {0, 4, 2, 6, 1, 5, 3, 7};
2185 static const UINT_32 XorBankRot4b[XorPatternLen] = {0, 8, 4, 12, 2, 10, 6, 14};
2186 static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b};
2187
2188 switch (bankBits)
2189 {
2190 case 1:
2191 case 2:
2192 case 3:
2193 case 4:
2194 bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits);
2195 break;
2196 default:
2197 // valid bank bits should be 0~4
2198 ADDR_ASSERT_ALWAYS();
2199 case 0:
2200 break;
2201 }
2202
2203 pOut->pipeBankXor = bankXor | pipeXor;
2204 }
2205 else
2206 {
2207 pOut->pipeBankXor = 0;
2208 }
2209
2210 return ADDR_OK;
2211 }
2212
2213 /**
2214 ************************************************************************************************************************
2215 * Gfx10Lib::HwlComputeSlicePipeBankXor
2216 *
2217 * @brief
2218 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2219 *
2220 * @return
2221 * PipeBankXor value
2222 ************************************************************************************************************************
2223 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const2224 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
2225 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2226 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2227 ) const
2228 {
2229 if (IsNonPrtXor(pIn->swizzleMode))
2230 {
2231 const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2232 const UINT_32 pipeBits = GetPipeXorBits(blockBits);
2233 const UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
2234
2235 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
2236
2237 if (pIn->bpe != 0)
2238 {
2239 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
2240 pIn->resourceType,
2241 Log2(pIn->bpe >> 3),
2242 1);
2243
2244 if (pPatInfo != NULL)
2245 {
2246 ADDR_BIT_SETTING fullSwizzlePattern[20];
2247 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
2248
2249 const UINT_32 pipeBankXorOffset =
2250 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
2251 blockBits,
2252 0,
2253 0,
2254 pIn->slice,
2255 0);
2256
2257 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
2258
2259 // Should have no bit set under pipe interleave
2260 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
2261
2262 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
2263 }
2264 }
2265 }
2266 else
2267 {
2268 pOut->pipeBankXor = 0;
2269 }
2270
2271 return ADDR_OK;
2272 }
2273
2274 /**
2275 ************************************************************************************************************************
2276 * Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2277 *
2278 * @brief
2279 * Compute sub resource offset to support swizzle pattern
2280 *
2281 * @return
2282 * Offset
2283 ************************************************************************************************************************
2284 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const2285 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2286 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure
2287 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure
2288 ) const
2289 {
2290 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2291
2292 pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
2293
2294 return ADDR_OK;
2295 }
2296
2297 /**
2298 ************************************************************************************************************************
2299 * Gfx10Lib::HwlComputeNonBlockCompressedView
2300 *
2301 * @brief
2302 * Compute non-block-compressed view for a given mipmap level/slice.
2303 *
2304 * @return
2305 * ADDR_E_RETURNCODE
2306 ************************************************************************************************************************
2307 */
HwlComputeNonBlockCompressedView(const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const2308 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView(
2309 const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, ///< [in] input structure
2310 ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut ///< [out] output structure
2311 ) const
2312 {
2313 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2314
2315 if (IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE)
2316 {
2317 // Only thin swizzle mode can have a NonBC view...
2318 returnCode = ADDR_INVALIDPARAMS;
2319 }
2320 else if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) &&
2321 ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
2322 {
2323 // Only support BC1~BC7, ASTC, or ETC2 for now...
2324 returnCode = ADDR_NOTSUPPORTED;
2325 }
2326 else
2327 {
2328 UINT_32 bcWidth, bcHeight;
2329 UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
2330
2331 ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
2332 infoIn.flags = pIn->flags;
2333 infoIn.swizzleMode = pIn->swizzleMode;
2334 infoIn.resourceType = pIn->resourceType;
2335 infoIn.bpp = bpp;
2336 infoIn.width = RoundUpQuotient(pIn->width, bcWidth);
2337 infoIn.height = RoundUpQuotient(pIn->height, bcHeight);
2338 infoIn.numSlices = pIn->numSlices;
2339 infoIn.numMipLevels = pIn->numMipLevels;
2340 infoIn.numSamples = 1;
2341 infoIn.numFrags = 1;
2342
2343 ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};
2344 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
2345
2346 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
2347 infoOut.pMipInfo = mipInfo;
2348
2349 const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;
2350
2351 if (tiled)
2352 {
2353 returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
2354 }
2355 else
2356 {
2357 returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
2358 }
2359
2360 if (returnCode == ADDR_OK)
2361 {
2362 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
2363 subOffIn.swizzleMode = infoIn.swizzleMode;
2364 subOffIn.resourceType = infoIn.resourceType;
2365 subOffIn.slice = pIn->slice;
2366 subOffIn.sliceSize = infoOut.sliceSize;
2367 subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
2368 subOffIn.mipTailOffset = mipInfo[pIn->mipId].mipTailOffset;
2369
2370 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
2371
2372 // For any mipmap level, move nonBc view base address by offset
2373 HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
2374 pOut->offset = subOffOut.offset;
2375
2376 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
2377 slicePbXorIn.bpe = infoIn.bpp;
2378 slicePbXorIn.swizzleMode = infoIn.swizzleMode;
2379 slicePbXorIn.resourceType = infoIn.resourceType;
2380 slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
2381 slicePbXorIn.slice = pIn->slice;
2382
2383 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
2384
2385 // For any mipmap level, nonBc view should use computed pbXor
2386 HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
2387 pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
2388
2389 const BOOL_32 inTail = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
2390 const UINT_32 requestMipWidth = RoundUpQuotient(Max(pIn->width >> pIn->mipId, 1u), bcWidth);
2391 const UINT_32 requestMipHeight = RoundUpQuotient(Max(pIn->height >> pIn->mipId, 1u), bcHeight);
2392
2393 if (inTail)
2394 {
2395 // For mipmap level that is in mip tail block, hack a lot of things...
2396 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
2397 // are fit in tail block:
2398
2399 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
2400 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
2401
2402 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
2403 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
2404
2405 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
2406 pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);
2407
2408 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
2409 pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
2410 }
2411 // This check should cover at least mipId == 0
2412 else if (requestMipWidth << pIn->mipId == infoIn.width)
2413 {
2414 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
2415 // - only one mipmap level and mipId = 0
2416 pOut->mipId = 0;
2417 pOut->numMipLevels = 1;
2418
2419 // (mip0) width = requestMipWidth
2420 pOut->unalignedWidth = requestMipWidth;
2421
2422 // (mip0) height = requestMipHeight
2423 pOut->unalignedHeight = requestMipHeight;
2424 }
2425 else
2426 {
2427 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
2428 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
2429 // because single mip view may have different pitch value than original (multiple) mip view...
2430 // A simple case would be:
2431 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
2432 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
2433 // mip0 width = 0x101/mip1 width = 0x80
2434 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
2435 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
2436
2437 // - 2 levels and mipId = 1
2438 pOut->mipId = 1;
2439 pOut->numMipLevels = 2;
2440
2441 const UINT_32 upperMipWidth = RoundUpQuotient(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth);
2442 const UINT_32 upperMipHeight = RoundUpQuotient(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight);
2443
2444 const BOOL_32 needToAvoidInTail =
2445 tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
2446 TRUE : FALSE;
2447
2448 const UINT_32 hwMipWidth = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
2449 const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);
2450
2451 const BOOL_32 needExtraWidth =
2452 ((upperMipWidth < requestMipWidth * 2) ||
2453 ((upperMipWidth == requestMipWidth * 2) &&
2454 ((needToAvoidInTail == TRUE) ||
2455 (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;
2456
2457 const BOOL_32 needExtraHeight =
2458 ((upperMipHeight < requestMipHeight * 2) ||
2459 ((upperMipHeight == requestMipHeight * 2) &&
2460 ((needToAvoidInTail == TRUE) ||
2461 (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;
2462
2463 // (mip0) width = requestLastMipLevelWidth
2464 pOut->unalignedWidth = upperMipWidth + (needExtraWidth ? 1: 0);
2465
2466 // (mip0) height = requestLastMipLevelHeight
2467 pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
2468 }
2469
2470 // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
2471 ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
2472 // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
2473 ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
2474 }
2475 }
2476
2477 return returnCode;
2478 }
2479
2480 /**
2481 ************************************************************************************************************************
2482 * Gfx10Lib::ValidateNonSwModeParams
2483 *
2484 * @brief
2485 * Validate compute surface info params except swizzle mode
2486 *
2487 * @return
2488 * TRUE if parameters are valid, FALSE otherwise
2489 ************************************************************************************************************************
2490 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2491 BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2492 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2493 {
2494 BOOL_32 valid = TRUE;
2495
2496 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2497 {
2498 ADDR_ASSERT_ALWAYS();
2499 valid = FALSE;
2500 }
2501
2502 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2503 {
2504 ADDR_ASSERT_ALWAYS();
2505 valid = FALSE;
2506 }
2507
2508 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2509 const AddrResourceType rsrcType = pIn->resourceType;
2510 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
2511 const BOOL_32 msaa = (pIn->numFrags > 1);
2512 const BOOL_32 display = flags.display;
2513 const BOOL_32 tex3d = IsTex3d(rsrcType);
2514 const BOOL_32 tex2d = IsTex2d(rsrcType);
2515 const BOOL_32 tex1d = IsTex1d(rsrcType);
2516 const BOOL_32 stereo = flags.qbStereo;
2517
2518 // Resource type check
2519 if (tex1d)
2520 {
2521 if (msaa || display || stereo)
2522 {
2523 ADDR_ASSERT_ALWAYS();
2524 valid = FALSE;
2525 }
2526 }
2527 else if (tex2d)
2528 {
2529 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2530 {
2531 ADDR_ASSERT_ALWAYS();
2532 valid = FALSE;
2533 }
2534 }
2535 else if (tex3d)
2536 {
2537 if (msaa || display || stereo)
2538 {
2539 ADDR_ASSERT_ALWAYS();
2540 valid = FALSE;
2541 }
2542 }
2543 else
2544 {
2545 ADDR_ASSERT_ALWAYS();
2546 valid = FALSE;
2547 }
2548
2549 return valid;
2550 }
2551
2552 /**
2553 ************************************************************************************************************************
2554 * Gfx10Lib::ValidateSwModeParams
2555 *
2556 * @brief
2557 * Validate compute surface info related to swizzle mode
2558 *
2559 * @return
2560 * TRUE if parameters are valid, FALSE otherwise
2561 ************************************************************************************************************************
2562 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2563 BOOL_32 Gfx10Lib::ValidateSwModeParams(
2564 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2565 {
2566 BOOL_32 valid = TRUE;
2567
2568 if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2569 {
2570 ADDR_ASSERT_ALWAYS();
2571 valid = FALSE;
2572 }
2573 else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2574 {
2575 {
2576 ADDR_ASSERT_ALWAYS();
2577 valid = FALSE;
2578 }
2579 }
2580
2581 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2582 const AddrResourceType rsrcType = pIn->resourceType;
2583 const AddrSwizzleMode swizzle = pIn->swizzleMode;
2584 const BOOL_32 msaa = (pIn->numFrags > 1);
2585 const BOOL_32 zbuffer = flags.depth || flags.stencil;
2586 const BOOL_32 color = flags.color;
2587 const BOOL_32 display = flags.display;
2588 const BOOL_32 tex3d = IsTex3d(rsrcType);
2589 const BOOL_32 tex2d = IsTex2d(rsrcType);
2590 const BOOL_32 tex1d = IsTex1d(rsrcType);
2591 const BOOL_32 thin3d = flags.view3dAs2dArray;
2592 const BOOL_32 linear = IsLinear(swizzle);
2593 const BOOL_32 blk256B = IsBlock256b(swizzle);
2594 const BOOL_32 blkVar = IsBlockVariable(swizzle);
2595 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
2596 const BOOL_32 prt = flags.prt;
2597 const BOOL_32 fmask = flags.fmask;
2598
2599 // Misc check
2600 if ((pIn->numFrags > 1) &&
2601 (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2602 {
2603 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2604 ADDR_ASSERT_ALWAYS();
2605 valid = FALSE;
2606 }
2607
2608 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2609 {
2610 ADDR_ASSERT_ALWAYS();
2611 valid = FALSE;
2612 }
2613
2614 if ((pIn->bpp == 96) && (linear == FALSE))
2615 {
2616 ADDR_ASSERT_ALWAYS();
2617 valid = FALSE;
2618 }
2619
2620 const UINT_32 swizzleMask = 1 << swizzle;
2621
2622 // Resource type check
2623 if (tex1d)
2624 {
2625 if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2626 {
2627 ADDR_ASSERT_ALWAYS();
2628 valid = FALSE;
2629 }
2630 }
2631 else if (tex2d)
2632 {
2633 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0)
2634 {
2635 {
2636 ADDR_ASSERT_ALWAYS();
2637 valid = FALSE;
2638 }
2639 }
2640 else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
2641 (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
2642 {
2643 ADDR_ASSERT_ALWAYS();
2644 valid = FALSE;
2645 }
2646 }
2647 else if (tex3d)
2648 {
2649 if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2650 (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2651 (thin3d && ((swizzleMask & Gfx10Rsrc3dViewAs2dSwModeMask) == 0)))
2652 {
2653 ADDR_ASSERT_ALWAYS();
2654 valid = FALSE;
2655 }
2656 }
2657
2658 // Swizzle type check
2659 if (linear)
2660 {
2661 if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2662 {
2663 ADDR_ASSERT_ALWAYS();
2664 valid = FALSE;
2665 }
2666 }
2667 else if (IsZOrderSwizzle(swizzle))
2668 {
2669 if ((pIn->bpp > 64) ||
2670 (msaa && (color || (pIn->bpp > 32))) ||
2671 ElemLib::IsBlockCompressed(pIn->format) ||
2672 ElemLib::IsMacroPixelPacked(pIn->format))
2673 {
2674 ADDR_ASSERT_ALWAYS();
2675 valid = FALSE;
2676 }
2677 }
2678 else if (IsStandardSwizzle(rsrcType, swizzle))
2679 {
2680 if (zbuffer || msaa)
2681 {
2682 ADDR_ASSERT_ALWAYS();
2683 valid = FALSE;
2684 }
2685 }
2686 else if (IsDisplaySwizzle(rsrcType, swizzle))
2687 {
2688 if (zbuffer || msaa)
2689 {
2690 ADDR_ASSERT_ALWAYS();
2691 valid = FALSE;
2692 }
2693 }
2694 else if (IsRtOptSwizzle(swizzle))
2695 {
2696 if (zbuffer)
2697 {
2698 ADDR_ASSERT_ALWAYS();
2699 valid = FALSE;
2700 }
2701 }
2702 else
2703 {
2704 {
2705 ADDR_ASSERT_ALWAYS();
2706 valid = FALSE;
2707 }
2708 }
2709
2710 // Block type check
2711 if (blk256B)
2712 {
2713 if (zbuffer || tex3d || msaa)
2714 {
2715 ADDR_ASSERT_ALWAYS();
2716 valid = FALSE;
2717 }
2718 }
2719 else if (blkVar)
2720 {
2721 if (m_blockVarSizeLog2 == 0)
2722 {
2723 ADDR_ASSERT_ALWAYS();
2724 valid = FALSE;
2725 }
2726 }
2727
2728 return valid;
2729 }
2730
2731 /**
2732 ************************************************************************************************************************
2733 * Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2734 *
2735 * @brief
2736 * Compute surface info sanity check
2737 *
2738 * @return
2739 * Offset
2740 ************************************************************************************************************************
2741 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2742 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2743 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2744 ) const
2745 {
2746 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2747 }
2748
2749 /**
2750 ************************************************************************************************************************
2751 * Gfx10Lib::HwlGetPreferredSurfaceSetting
2752 *
2753 * @brief
2754 * Internal function to get suggested surface information for client to use
2755 *
2756 * @return
2757 * ADDR_E_RETURNCODE
2758 ************************************************************************************************************************
2759 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2760 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2761 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure
2762 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure
2763 ) const
2764 {
2765 ADDR_E_RETURNCODE returnCode = ADDR_OK;
2766
2767 if (pIn->flags.fmask)
2768 {
2769 const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
2770 const BOOL_32 forbidVarBlockType = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));
2771
2772 if (forbid64KbBlockType && forbidVarBlockType)
2773 {
2774 // Invalid combination...
2775 ADDR_ASSERT_ALWAYS();
2776 returnCode = ADDR_INVALIDPARAMS;
2777 }
2778 else
2779 {
2780 pOut->resourceType = ADDR_RSRC_TEX_2D;
2781 pOut->validBlockSet.value = 0;
2782 pOut->validBlockSet.macroThin64KB = forbid64KbBlockType ? 0 : 1;
2783 pOut->validBlockSet.var = forbidVarBlockType ? 0 : 1;
2784 pOut->validSwModeSet.value = 0;
2785 pOut->validSwModeSet.sw64KB_Z_X = forbid64KbBlockType ? 0 : 1;
2786 pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType ? 0 : 1;
2787 pOut->canXor = TRUE;
2788 pOut->validSwTypeSet.value = AddrSwSetZ;
2789 pOut->clientPreferredSwSet = pOut->validSwTypeSet;
2790
2791 BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);
2792
2793 if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
2794 {
2795 const UINT_8 maxFmaskSwizzleModeType = 2;
2796 const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2797 const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2798 const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
2799 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2800 const UINT_32 width = Max(pIn->width, 1u);
2801 const UINT_32 height = Max(pIn->height, 1u);
2802 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);
2803
2804 AddrSwizzleMode swMode[maxFmaskSwizzleModeType] = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
2805 Dim3d blkDim[maxFmaskSwizzleModeType] = {{}, {}};
2806 Dim3d padDim[maxFmaskSwizzleModeType] = {{}, {}};
2807 UINT_64 padSize[maxFmaskSwizzleModeType] = {};
2808
2809 for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
2810 {
2811 ComputeBlockDimensionForSurf(&blkDim[i].w,
2812 &blkDim[i].h,
2813 &blkDim[i].d,
2814 fmaskBpp,
2815 1,
2816 pOut->resourceType,
2817 swMode[i]);
2818
2819 padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2820 padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2821 }
2822
2823 if (Addr2BlockTypeWithinMemoryBudget(padSize[0],
2824 padSize[1],
2825 ratioLow,
2826 ratioHi,
2827 pIn->memoryBudget,
2828 GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0])))
2829 {
2830 use64KbBlockType = FALSE;
2831 }
2832 }
2833 else if (forbidVarBlockType)
2834 {
2835 use64KbBlockType = TRUE;
2836 }
2837
2838 if (use64KbBlockType)
2839 {
2840 pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2841 }
2842 else
2843 {
2844 pOut->swizzleMode = ADDR_SW_VAR_Z_X;
2845 }
2846 }
2847 }
2848 else
2849 {
2850 UINT_32 bpp = pIn->bpp;
2851 UINT_32 width = Max(pIn->width, 1u);
2852 UINT_32 height = Max(pIn->height, 1u);
2853
2854 // Set format to INVALID will skip this conversion
2855 if (pIn->format != ADDR_FMT_INVALID)
2856 {
2857 ElemMode elemMode = ADDR_UNCOMPRESSED;
2858 UINT_32 expandX, expandY;
2859
2860 // Get compression/expansion factors and element mode which indicates compression/expansion
2861 bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2862 &elemMode,
2863 &expandX,
2864 &expandY);
2865
2866 UINT_32 basePitch = 0;
2867 GetElemLib()->AdjustSurfaceInfo(elemMode,
2868 expandX,
2869 expandY,
2870 &bpp,
2871 &basePitch,
2872 &width,
2873 &height);
2874 }
2875
2876 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2877 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2878 const UINT_32 numSamples = Max(pIn->numSamples, 1u);
2879 const UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2880 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
2881
2882 // Pre sanity check on non swizzle mode parameters
2883 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2884 localIn.flags = pIn->flags;
2885 localIn.resourceType = pIn->resourceType;
2886 localIn.format = pIn->format;
2887 localIn.bpp = bpp;
2888 localIn.width = width;
2889 localIn.height = height;
2890 localIn.numSlices = numSlices;
2891 localIn.numMipLevels = numMipLevels;
2892 localIn.numSamples = numSamples;
2893 localIn.numFrags = numFrags;
2894
2895 if (ValidateNonSwModeParams(&localIn))
2896 {
2897 // Forbid swizzle mode(s) by client setting
2898 ADDR2_SWMODE_SET allowedSwModeSet = {};
2899 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2900 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask;
2901 allowedSwModeSet.value |=
2902 pIn->forbiddenBlock.macroThin4KB ? 0 :
2903 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
2904 allowedSwModeSet.value |=
2905 pIn->forbiddenBlock.macroThick4KB ? 0 :
2906 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
2907 allowedSwModeSet.value |=
2908 pIn->forbiddenBlock.macroThin64KB ? 0 :
2909 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
2910 allowedSwModeSet.value |=
2911 pIn->forbiddenBlock.macroThick64KB ? 0 :
2912 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
2913 allowedSwModeSet.value |=
2914 pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
2915
2916 if (pIn->preferredSwSet.value != 0)
2917 {
2918 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2919 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2920 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2921 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2922 }
2923
2924 if (pIn->noXor)
2925 {
2926 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2927 }
2928
2929 if (pIn->maxAlign > 0)
2930 {
2931 if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
2932 {
2933 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2934 }
2935
2936 if (pIn->maxAlign < Size64K)
2937 {
2938 allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2939 }
2940
2941 if (pIn->maxAlign < Size4K)
2942 {
2943 allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2944 }
2945
2946 if (pIn->maxAlign < Size256)
2947 {
2948 allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2949 }
2950 }
2951
2952 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2953 switch (pIn->resourceType)
2954 {
2955 case ADDR_RSRC_TEX_1D:
2956 allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2957 break;
2958
2959 case ADDR_RSRC_TEX_2D:
2960 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2961 break;
2962
2963 case ADDR_RSRC_TEX_3D:
2964 allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2965
2966 if (pIn->flags.view3dAs2dArray)
2967 {
2968 // SW_LINEAR can be used for 3D thin images, including BCn image format.
2969 allowedSwModeSet.value &= Gfx10Rsrc3dViewAs2dSwModeMask;
2970 }
2971 break;
2972
2973 default:
2974 ADDR_ASSERT_ALWAYS();
2975 allowedSwModeSet.value = 0;
2976 break;
2977 }
2978
2979 if (ElemLib::IsBlockCompressed(pIn->format) ||
2980 ElemLib::IsMacroPixelPacked(pIn->format) ||
2981 (bpp > 64) ||
2982 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2983 {
2984 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2985 }
2986
2987 if (pIn->format == ADDR_FMT_32_32_32)
2988 {
2989 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2990 }
2991
2992 if (msaa)
2993 {
2994 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
2995 }
2996
2997 if (pIn->flags.depth || pIn->flags.stencil)
2998 {
2999 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3000 }
3001
3002 if (pIn->flags.display)
3003 {
3004 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
3005 }
3006
3007 if (allowedSwModeSet.value != 0)
3008 {
3009 #if DEBUG
3010 // Post sanity check, at least AddrLib should accept the output generated by its own
3011 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3012
3013 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3014 {
3015 if (validateSwModeSet & 1)
3016 {
3017 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3018 ADDR_ASSERT(ValidateSwModeParams(&localIn));
3019 }
3020
3021 validateSwModeSet >>= 1;
3022 }
3023 #endif
3024
3025 pOut->resourceType = pIn->resourceType;
3026 pOut->validSwModeSet = allowedSwModeSet;
3027 pOut->canXor = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
3028 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3029 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3030
3031 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3032
3033 if (pOut->clientPreferredSwSet.value == 0)
3034 {
3035 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3036 }
3037
3038 // Apply optional restrictions
3039 if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
3040 {
3041 if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
3042 {
3043 // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
3044 // the GL2 in VAR mode, so it should be avoided.
3045 allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
3046 }
3047 else
3048 {
3049 // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
3050 // But we have to suffer from low performance because there is no other choice...
3051 ADDR_ASSERT_ALWAYS();
3052 }
3053 }
3054
3055 if (pIn->flags.needEquation)
3056 {
3057 UINT_32 components = pIn->flags.allowExtEquation ? ADDR_MAX_EQUATION_COMP :
3058 ADDR_MAX_LEGACY_EQUATION_COMP;
3059 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
3060 }
3061
3062 if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
3063 {
3064 pOut->swizzleMode = ADDR_SW_LINEAR;
3065 }
3066 else
3067 {
3068 const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
3069
3070 if ((height > 1) && (computeMinSize == FALSE))
3071 {
3072 // Always ignore linear swizzle mode if:
3073 // 1. This is a (2D/3D) resource with height > 1
3074 // 2. Client doesn't require computing minimize size
3075 allowedSwModeSet.swLinear = 0;
3076 }
3077
3078 // A bitfield where each bit represents a block type. Each swizzle mode maps to a block.
3079 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3080
3081 // Determine block size if there are 2 or more block type candidates
3082 if (IsPow2(allowedBlockSet.value) == FALSE)
3083 {
3084 // Tracks a valid SwizzleMode for each valid block type
3085 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
3086
3087 swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
3088
3089 if (m_blockVarSizeLog2 != 0)
3090 {
3091 swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X;
3092 }
3093
3094 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3095 {
3096 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
3097 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_R_X;
3098 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3099 }
3100 else
3101 {
3102 swMode[AddrBlockMicro] = ADDR_SW_256B_S;
3103 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_S;
3104 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
3105 }
3106
3107 // Tracks the size of each valid swizzle mode's surface in bytes
3108 UINT_64 padSize[AddrBlockMaxTiledType] = {};
3109
3110 const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
3111 const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
3112 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3113 UINT_32 minSizeBlk = AddrBlockMicro; // Tracks the most optimal block to use
3114 UINT_64 minSize = 0; // Tracks the minimum acceptable block type
3115
3116 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3117
3118 // Iterate through all block types
3119 for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
3120 {
3121 if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
3122 {
3123 localIn.swizzleMode = swMode[i];
3124
3125 if (localIn.swizzleMode == ADDR_SW_LINEAR)
3126 {
3127 returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
3128 }
3129 else
3130 {
3131 returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
3132 }
3133
3134 if (returnCode == ADDR_OK)
3135 {
3136 padSize[i] = localOut.surfSize;
3137
3138 if (minSize == 0)
3139 {
3140 minSize = padSize[i];
3141 minSizeBlk = i;
3142 }
3143 else
3144 {
3145 // Checks if the block type is within the memory budget but favors larger blocks
3146 if (Addr2BlockTypeWithinMemoryBudget(
3147 minSize,
3148 padSize[i],
3149 ratioLow,
3150 ratioHi,
3151 0.0,
3152 GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])))
3153 {
3154 minSize = padSize[i];
3155 minSizeBlk = i;
3156 }
3157 }
3158 }
3159 else
3160 {
3161 ADDR_ASSERT_ALWAYS();
3162 break;
3163 }
3164 }
3165 }
3166
3167 if (pIn->memoryBudget > 1.0)
3168 {
3169 // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
3170 // smaller-block type again in coming loop
3171 switch (minSizeBlk)
3172 {
3173 case AddrBlockThick64KB:
3174 allowedBlockSet.macroThin64KB = 0;
3175 case AddrBlockThinVar:
3176 case AddrBlockThin64KB:
3177 allowedBlockSet.macroThick4KB = 0;
3178 case AddrBlockThick4KB:
3179 allowedBlockSet.macroThin4KB = 0;
3180 case AddrBlockThin4KB:
3181 allowedBlockSet.micro = 0;
3182 case AddrBlockMicro:
3183 allowedBlockSet.linear = 0;
3184 case AddrBlockLinear:
3185 break;
3186
3187 default:
3188 ADDR_ASSERT_ALWAYS();
3189 break;
3190 }
3191
3192 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3193 {
3194 if ((i != minSizeBlk) &&
3195 Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
3196 {
3197 if (Addr2BlockTypeWithinMemoryBudget(
3198 minSize,
3199 padSize[i],
3200 0,
3201 0,
3202 pIn->memoryBudget,
3203 GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])) == FALSE)
3204 {
3205 // Clear the block type if the memory waste is unacceptable
3206 allowedBlockSet.value &= ~(1u << (i - 1));
3207 }
3208 }
3209 }
3210
3211 // Remove VAR block type if bigger block type is allowed
3212 if (GetBlockSizeLog2(swMode[AddrBlockThinVar]) < GetBlockSizeLog2(ADDR_SW_64KB_R_X))
3213 {
3214 if (allowedBlockSet.macroThick64KB || allowedBlockSet.macroThin64KB)
3215 {
3216 allowedBlockSet.var = 0;
3217 }
3218 }
3219
3220 // Remove linear block type if 2 or more block types are allowed
3221 if (IsPow2(allowedBlockSet.value) == FALSE)
3222 {
3223 allowedBlockSet.linear = 0;
3224 }
3225
3226 // Select the biggest allowed block type
3227 minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
3228
3229 if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
3230 {
3231 minSizeBlk = AddrBlockLinear;
3232 }
3233 }
3234
3235 switch (minSizeBlk)
3236 {
3237 case AddrBlockLinear:
3238 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
3239 break;
3240
3241 case AddrBlockMicro:
3242 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3243 allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
3244 break;
3245
3246 case AddrBlockThin4KB:
3247 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3248 allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
3249 break;
3250
3251 case AddrBlockThick4KB:
3252 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3253 allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
3254 break;
3255
3256 case AddrBlockThin64KB:
3257 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3258 Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
3259 break;
3260
3261 case AddrBlockThick64KB:
3262 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3263 allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
3264 break;
3265
3266 case AddrBlockThinVar:
3267 allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
3268 break;
3269
3270 default:
3271 ADDR_ASSERT_ALWAYS();
3272 allowedSwModeSet.value = 0;
3273 break;
3274 }
3275 }
3276
3277 // Block type should be determined.
3278 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3279
3280 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3281
3282 // Determine swizzle type if there are 2 or more swizzle type candidates
3283 if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
3284 {
3285 if (ElemLib::IsBlockCompressed(pIn->format))
3286 {
3287 if (allowedSwSet.sw_D)
3288 {
3289 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3290 }
3291 else if (allowedSwSet.sw_S)
3292 {
3293 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3294 }
3295 else
3296 {
3297 ADDR_ASSERT(allowedSwSet.sw_R);
3298 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3299 }
3300 }
3301 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3302 {
3303 if (allowedSwSet.sw_S)
3304 {
3305 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3306 }
3307 else if (allowedSwSet.sw_D)
3308 {
3309 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3310 }
3311 else
3312 {
3313 ADDR_ASSERT(allowedSwSet.sw_R);
3314 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3315 }
3316 }
3317 else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
3318 {
3319 if (pIn->flags.color &&
3320 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
3321 allowedSwSet.sw_D)
3322 {
3323 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3324 }
3325 else if (allowedSwSet.sw_S)
3326 {
3327 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3328 }
3329 else if (allowedSwSet.sw_R)
3330 {
3331 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3332 }
3333 else
3334 {
3335 ADDR_ASSERT(allowedSwSet.sw_Z);
3336 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3337 }
3338 }
3339 else
3340 {
3341 if (allowedSwSet.sw_R)
3342 {
3343 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3344 }
3345 else if (allowedSwSet.sw_D)
3346 {
3347 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3348 }
3349 else if (allowedSwSet.sw_S)
3350 {
3351 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3352 }
3353 else
3354 {
3355 ADDR_ASSERT(allowedSwSet.sw_Z);
3356 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3357 }
3358 }
3359
3360 // Swizzle type should be determined.
3361 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3362 }
3363
3364 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
3365 // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3366 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3367 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3368 }
3369 }
3370 else
3371 {
3372 // Invalid combination...
3373 ADDR_ASSERT_ALWAYS();
3374 returnCode = ADDR_INVALIDPARAMS;
3375 }
3376 }
3377 else
3378 {
3379 // Invalid combination...
3380 ADDR_ASSERT_ALWAYS();
3381 returnCode = ADDR_INVALIDPARAMS;
3382 }
3383 }
3384
3385 return returnCode;
3386 }
3387
3388 /**
3389 ************************************************************************************************************************
3390 * Gfx10Lib::ComputeStereoInfo
3391 *
3392 * @brief
3393 * Compute height alignment and right eye pipeBankXor for stereo surface
3394 *
3395 * @return
3396 * Error code
3397 *
3398 ************************************************************************************************************************
3399 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const3400 ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
3401 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info
3402 UINT_32* pAlignY, ///< Stereo requested additional alignment in Y
3403 UINT_32* pRightXor ///< Right eye xor
3404 ) const
3405 {
3406 ADDR_E_RETURNCODE ret = ADDR_OK;
3407
3408 *pRightXor = 0;
3409
3410 if (IsNonPrtXor(pIn->swizzleMode))
3411 {
3412 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3413 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3414 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3415 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3416 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
3417
3418 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3419 {
3420 UINT_32 yMax = 0;
3421 UINT_32 yPosMask = 0;
3422
3423 // First get "max y bit"
3424 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3425 {
3426 ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
3427
3428 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3429 (m_equationTable[eqIndex].addr[i].index > yMax))
3430 {
3431 yMax = m_equationTable[eqIndex].addr[i].index;
3432 }
3433
3434 if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3435 (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3436 (m_equationTable[eqIndex].xor1[i].index > yMax))
3437 {
3438 yMax = m_equationTable[eqIndex].xor1[i].index;
3439 }
3440
3441 if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3442 (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3443 (m_equationTable[eqIndex].xor2[i].index > yMax))
3444 {
3445 yMax = m_equationTable[eqIndex].xor2[i].index;
3446 }
3447 }
3448
3449 // Then loop again for populating a position mask of "max Y bit"
3450 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3451 {
3452 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3453 (m_equationTable[eqIndex].addr[i].index == yMax))
3454 {
3455 yPosMask |= 1u << i;
3456 }
3457 else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3458 (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3459 (m_equationTable[eqIndex].xor1[i].index == yMax))
3460 {
3461 yPosMask |= 1u << i;
3462 }
3463 else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3464 (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3465 (m_equationTable[eqIndex].xor2[i].index == yMax))
3466 {
3467 yPosMask |= 1u << i;
3468 }
3469 }
3470
3471 const UINT_32 additionalAlign = 1 << yMax;
3472
3473 if (additionalAlign >= *pAlignY)
3474 {
3475 *pAlignY = additionalAlign;
3476
3477 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
3478
3479 if ((alignedHeight >> yMax) & 1)
3480 {
3481 *pRightXor = yPosMask >> m_pipeInterleaveLog2;
3482 }
3483 }
3484 }
3485 else
3486 {
3487 ret = ADDR_INVALIDPARAMS;
3488 }
3489 }
3490
3491 return ret;
3492 }
3493
3494 /**
3495 ************************************************************************************************************************
3496 * Gfx10Lib::HwlComputeSurfaceInfoTiled
3497 *
3498 * @brief
3499 * Internal function to calculate alignment for tiled surface
3500 *
3501 * @return
3502 * ADDR_E_RETURNCODE
3503 ************************************************************************************************************************
3504 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3505 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
3506 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3507 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3508 ) const
3509 {
3510 ADDR_E_RETURNCODE ret;
3511
3512 // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3513 pOut->mipChainPitch = 0;
3514 pOut->mipChainHeight = 0;
3515 pOut->mipChainSlice = 0;
3516 pOut->epitchIsHeight = FALSE;
3517
3518 // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3519 pOut->mipChainInTail = FALSE;
3520 pOut->firstMipIdInTail = pIn->numMipLevels;
3521
3522 if (IsBlock256b(pIn->swizzleMode))
3523 {
3524 ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3525 }
3526 else
3527 {
3528 ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3529 }
3530
3531 return ret;
3532 }
3533
3534 /**
3535 ************************************************************************************************************************
3536 * Gfx10Lib::ComputeSurfaceInfoMicroTiled
3537 *
3538 * @brief
3539 * Internal function to calculate alignment for micro tiled surface
3540 *
3541 * @return
3542 * ADDR_E_RETURNCODE
3543 ************************************************************************************************************************
3544 */
ComputeSurfaceInfoMicroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3545 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3546 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3547 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3548 ) const
3549 {
3550 ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3551 &pOut->blockHeight,
3552 &pOut->blockSlices,
3553 pIn->bpp,
3554 pIn->numFrags,
3555 pIn->resourceType,
3556 pIn->swizzleMode);
3557
3558 if (ret == ADDR_OK)
3559 {
3560 const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3561
3562 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3563 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3564 pOut->numSlices = pIn->numSlices;
3565 pOut->baseAlign = blockSize;
3566
3567 if (pIn->numMipLevels > 1)
3568 {
3569 const UINT_32 mip0Width = pIn->width;
3570 const UINT_32 mip0Height = pIn->height;
3571 UINT_64 mipSliceSize = 0;
3572
3573 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3574 {
3575 UINT_32 mipWidth, mipHeight;
3576
3577 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3578
3579 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pOut->blockWidth);
3580 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3581
3582 if (pOut->pMipInfo != NULL)
3583 {
3584 pOut->pMipInfo[i].pitch = mipActualWidth;
3585 pOut->pMipInfo[i].height = mipActualHeight;
3586 pOut->pMipInfo[i].depth = 1;
3587 pOut->pMipInfo[i].offset = mipSliceSize;
3588 pOut->pMipInfo[i].mipTailOffset = 0;
3589 pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3590 }
3591
3592 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3593 }
3594
3595 pOut->sliceSize = mipSliceSize;
3596 pOut->surfSize = mipSliceSize * pOut->numSlices;
3597 }
3598 else
3599 {
3600 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3601 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3602
3603 if (pOut->pMipInfo != NULL)
3604 {
3605 pOut->pMipInfo[0].pitch = pOut->pitch;
3606 pOut->pMipInfo[0].height = pOut->height;
3607 pOut->pMipInfo[0].depth = 1;
3608 pOut->pMipInfo[0].offset = 0;
3609 pOut->pMipInfo[0].mipTailOffset = 0;
3610 pOut->pMipInfo[0].macroBlockOffset = 0;
3611 }
3612 }
3613
3614 }
3615
3616 return ret;
3617 }
3618
3619 /**
3620 ************************************************************************************************************************
3621 * Gfx10Lib::ComputeSurfaceInfoMacroTiled
3622 *
3623 * @brief
3624 * Internal function to calculate alignment for macro tiled surface
3625 *
3626 * @return
3627 * ADDR_E_RETURNCODE
3628 ************************************************************************************************************************
3629 */
ComputeSurfaceInfoMacroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3630 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3631 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3632 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3633 ) const
3634 {
3635 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3636 &pOut->blockHeight,
3637 &pOut->blockSlices,
3638 pIn->bpp,
3639 pIn->numFrags,
3640 pIn->resourceType,
3641 pIn->swizzleMode);
3642
3643 if (returnCode == ADDR_OK)
3644 {
3645 UINT_32 heightAlign = pOut->blockHeight;
3646
3647 if (pIn->flags.qbStereo)
3648 {
3649 UINT_32 rightXor = 0;
3650
3651 returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);
3652
3653 if (returnCode == ADDR_OK)
3654 {
3655 pOut->pStereoInfo->rightSwizzle = rightXor;
3656 }
3657 }
3658
3659 if (returnCode == ADDR_OK)
3660 {
3661 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3662 const UINT_32 blockSize = 1 << blockSizeLog2;
3663
3664 pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3665 pOut->height = PowTwoAlign(pIn->height, heightAlign);
3666 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3667 pOut->baseAlign = blockSize;
3668
3669 if (pIn->numMipLevels > 1)
3670 {
3671 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
3672 pIn->swizzleMode,
3673 pOut->blockWidth,
3674 pOut->blockHeight,
3675 pOut->blockSlices);
3676 const UINT_32 mip0Width = pIn->width;
3677 const UINT_32 mip0Height = pIn->height;
3678 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
3679 const UINT_32 mip0Depth = isThin ? 1 : pIn->numSlices;
3680 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3681 const UINT_32 index = Log2(pIn->bpp >> 3);
3682 UINT_32 firstMipInTail = pIn->numMipLevels;
3683 UINT_64 mipChainSliceSize = 0;
3684 UINT_64 mipSize[MaxMipLevels];
3685 UINT_64 mipSliceSize[MaxMipLevels];
3686
3687 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
3688 Dim3d fixedTailMaxDim = tailMaxDim;
3689
3690 if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3691 {
3692 fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3693 fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3694 }
3695
3696 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3697 {
3698 UINT_32 mipWidth, mipHeight, mipDepth;
3699
3700 GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3701
3702 if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3703 {
3704 firstMipInTail = i;
3705 mipChainSliceSize += blockSize / pOut->blockSlices;
3706 break;
3707 }
3708 else
3709 {
3710 const UINT_32 pitch = PowTwoAlign(mipWidth, pOut->blockWidth);
3711 const UINT_32 height = PowTwoAlign(mipHeight, pOut->blockHeight);
3712 const UINT_32 depth = PowTwoAlign(mipDepth, pOut->blockSlices);
3713 const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3714
3715 mipSize[i] = sliceSize * depth;
3716 mipSliceSize[i] = sliceSize * pOut->blockSlices;
3717 mipChainSliceSize += sliceSize;
3718
3719 if (pOut->pMipInfo != NULL)
3720 {
3721 pOut->pMipInfo[i].pitch = pitch;
3722 pOut->pMipInfo[i].height = height;
3723 pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3724 }
3725 }
3726 }
3727
3728 pOut->sliceSize = mipChainSliceSize;
3729 pOut->surfSize = mipChainSliceSize * pOut->numSlices;
3730 pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE;
3731 pOut->firstMipIdInTail = firstMipInTail;
3732
3733 if (pOut->pMipInfo != NULL)
3734 {
3735 UINT_64 offset = 0;
3736 UINT_64 macroBlkOffset = 0;
3737 UINT_32 tailMaxDepth = 0;
3738
3739 if (firstMipInTail != pIn->numMipLevels)
3740 {
3741 UINT_32 mipWidth, mipHeight;
3742
3743 GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3744 &mipWidth, &mipHeight, &tailMaxDepth);
3745
3746 offset = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3747 macroBlkOffset = blockSize;
3748 }
3749
3750 for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3751 {
3752 pOut->pMipInfo[i].offset = offset;
3753 pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3754 pOut->pMipInfo[i].mipTailOffset = 0;
3755
3756 offset += mipSize[i];
3757 macroBlkOffset += mipSliceSize[i];
3758 }
3759
3760 UINT_32 pitch = tailMaxDim.w;
3761 UINT_32 height = tailMaxDim.h;
3762 UINT_32 depth = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3763
3764 tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3765
3766 for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3767 {
3768 const UINT_32 m = maxMipsInTail - 1 - (i - firstMipInTail);
3769 const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3770
3771 pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth;
3772 pOut->pMipInfo[i].mipTailOffset = mipOffset;
3773 pOut->pMipInfo[i].macroBlockOffset = 0;
3774
3775 pOut->pMipInfo[i].pitch = pitch;
3776 pOut->pMipInfo[i].height = height;
3777 pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3778
3779 UINT_32 mipX = ((mipOffset >> 9) & 1) |
3780 ((mipOffset >> 10) & 2) |
3781 ((mipOffset >> 11) & 4) |
3782 ((mipOffset >> 12) & 8) |
3783 ((mipOffset >> 13) & 16) |
3784 ((mipOffset >> 14) & 32);
3785 UINT_32 mipY = ((mipOffset >> 8) & 1) |
3786 ((mipOffset >> 9) & 2) |
3787 ((mipOffset >> 10) & 4) |
3788 ((mipOffset >> 11) & 8) |
3789 ((mipOffset >> 12) & 16) |
3790 ((mipOffset >> 13) & 32);
3791
3792 if (blockSizeLog2 & 1)
3793 {
3794 const UINT_32 temp = mipX;
3795 mipX = mipY;
3796 mipY = temp;
3797
3798 if (index & 1)
3799 {
3800 mipY = (mipY << 1) | (mipX & 1);
3801 mipX = mipX >> 1;
3802 }
3803 }
3804
3805 if (isThin)
3806 {
3807 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3808 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3809 pOut->pMipInfo[i].mipTailCoordZ = 0;
3810
3811 pitch = Max(pitch >> 1, Block256_2d[index].w);
3812 height = Max(height >> 1, Block256_2d[index].h);
3813 }
3814 else
3815 {
3816 pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3817 pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3818 pOut->pMipInfo[i].mipTailCoordZ = 0;
3819
3820 pitch = Max(pitch >> 1, Block256_3d[index].w);
3821 height = Max(height >> 1, Block256_3d[index].h);
3822 }
3823 }
3824 }
3825 }
3826 else
3827 {
3828 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
3829 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3830
3831 if (pOut->pMipInfo != NULL)
3832 {
3833 pOut->pMipInfo[0].pitch = pOut->pitch;
3834 pOut->pMipInfo[0].height = pOut->height;
3835 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3836 pOut->pMipInfo[0].offset = 0;
3837 pOut->pMipInfo[0].mipTailOffset = 0;
3838 pOut->pMipInfo[0].macroBlockOffset = 0;
3839 pOut->pMipInfo[0].mipTailCoordX = 0;
3840 pOut->pMipInfo[0].mipTailCoordY = 0;
3841 pOut->pMipInfo[0].mipTailCoordZ = 0;
3842 }
3843 }
3844 }
3845 }
3846
3847 return returnCode;
3848 }
3849
3850 /**
3851 ************************************************************************************************************************
3852 * Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3853 *
3854 * @brief
3855 * Internal function to calculate address from coord for tiled swizzle surface
3856 *
3857 * @return
3858 * ADDR_E_RETURNCODE
3859 ************************************************************************************************************************
3860 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3861 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3862 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3863 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3864 ) const
3865 {
3866 ADDR_E_RETURNCODE ret;
3867
3868 if (IsBlock256b(pIn->swizzleMode))
3869 {
3870 ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3871 }
3872 else
3873 {
3874 ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3875 }
3876
3877 return ret;
3878 }
3879
3880 /**
3881 ************************************************************************************************************************
3882 * Gfx10Lib::ComputeOffsetFromEquation
3883 *
3884 * @brief
3885 * Compute offset from equation
3886 *
3887 * @return
3888 * Offset
3889 ************************************************************************************************************************
3890 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z) const3891 UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
3892 const ADDR_EQUATION* pEq, ///< Equation
3893 UINT_32 x, ///< x coord in bytes
3894 UINT_32 y, ///< y coord in pixel
3895 UINT_32 z ///< z coord in slice
3896 ) const
3897 {
3898 UINT_32 offset = 0;
3899
3900 for (UINT_32 i = 0; i < pEq->numBits; i++)
3901 {
3902 UINT_32 v = 0;
3903
3904 for (UINT_32 c = 0; c < pEq->numBitComponents; c++)
3905 {
3906 if (pEq->comps[c][i].valid)
3907 {
3908 if (pEq->comps[c][i].channel == 0)
3909 {
3910 v ^= (x >> pEq->comps[c][i].index) & 1;
3911 }
3912 else if (pEq->comps[c][i].channel == 1)
3913 {
3914 v ^= (y >> pEq->comps[c][i].index) & 1;
3915 }
3916 else
3917 {
3918 ADDR_ASSERT(pEq->comps[c][i].channel == 2);
3919 v ^= (z >> pEq->comps[c][i].index) & 1;
3920 }
3921 }
3922 }
3923
3924 offset |= (v << i);
3925 }
3926
3927 return offset;
3928 }
3929
3930 /**
3931 ************************************************************************************************************************
3932 * Gfx10Lib::ComputeOffsetFromSwizzlePattern
3933 *
3934 * @brief
3935 * Compute offset from swizzle pattern
3936 *
3937 * @return
3938 * Offset
3939 ************************************************************************************************************************
3940 */
ComputeOffsetFromSwizzlePattern(const UINT_64 * pPattern,UINT_32 numBits,UINT_32 x,UINT_32 y,UINT_32 z,UINT_32 s) const3941 UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3942 const UINT_64* pPattern, ///< Swizzle pattern
3943 UINT_32 numBits, ///< Number of bits in pattern
3944 UINT_32 x, ///< x coord in pixel
3945 UINT_32 y, ///< y coord in pixel
3946 UINT_32 z, ///< z coord in slice
3947 UINT_32 s ///< sample id
3948 ) const
3949 {
3950 UINT_32 offset = 0;
3951 const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3952
3953 for (UINT_32 i = 0; i < numBits; i++)
3954 {
3955 UINT_32 v = 0;
3956
3957 if (pSwizzlePattern[i].x != 0)
3958 {
3959 UINT_16 mask = pSwizzlePattern[i].x;
3960 UINT_32 xBits = x;
3961
3962 while (mask != 0)
3963 {
3964 if (mask & 1)
3965 {
3966 v ^= xBits & 1;
3967 }
3968
3969 xBits >>= 1;
3970 mask >>= 1;
3971 }
3972 }
3973
3974 if (pSwizzlePattern[i].y != 0)
3975 {
3976 UINT_16 mask = pSwizzlePattern[i].y;
3977 UINT_32 yBits = y;
3978
3979 while (mask != 0)
3980 {
3981 if (mask & 1)
3982 {
3983 v ^= yBits & 1;
3984 }
3985
3986 yBits >>= 1;
3987 mask >>= 1;
3988 }
3989 }
3990
3991 if (pSwizzlePattern[i].z != 0)
3992 {
3993 UINT_16 mask = pSwizzlePattern[i].z;
3994 UINT_32 zBits = z;
3995
3996 while (mask != 0)
3997 {
3998 if (mask & 1)
3999 {
4000 v ^= zBits & 1;
4001 }
4002
4003 zBits >>= 1;
4004 mask >>= 1;
4005 }
4006 }
4007
4008 if (pSwizzlePattern[i].s != 0)
4009 {
4010 UINT_16 mask = pSwizzlePattern[i].s;
4011 UINT_32 sBits = s;
4012
4013 while (mask != 0)
4014 {
4015 if (mask & 1)
4016 {
4017 v ^= sBits & 1;
4018 }
4019
4020 sBits >>= 1;
4021 mask >>= 1;
4022 }
4023 }
4024
4025 offset |= (v << i);
4026 }
4027
4028 return offset;
4029 }
4030
4031 /**
4032 ************************************************************************************************************************
4033 * Gfx10Lib::GetSwizzlePatternInfo
4034 *
4035 * @brief
4036 * Get swizzle pattern
4037 *
4038 * @return
4039 * Swizzle pattern information
4040 ************************************************************************************************************************
4041 */
GetSwizzlePatternInfo(AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elemLog2,UINT_32 numFrag) const4042 const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
4043 AddrSwizzleMode swizzleMode, ///< Swizzle mode
4044 AddrResourceType resourceType, ///< Resource type
4045 UINT_32 elemLog2, ///< Element size in bytes log2
4046 UINT_32 numFrag ///< Number of fragment
4047 ) const
4048 {
4049 // Now elemLog2 is going to be used to access the correct index insode of the pPatInfo array so we will start from
4050 // the right location
4051 const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
4052 const ADDR_SW_PATINFO* patInfo = NULL;
4053 const UINT_32 swizzleMask = 1 << swizzleMode;
4054
4055 if (IsBlockVariable(swizzleMode))
4056 {
4057 if (m_blockVarSizeLog2 != 0)
4058 {
4059 ADDR_ASSERT(m_settings.supportRbPlus);
4060
4061 if (IsRtOptSwizzle(swizzleMode))
4062 {
4063 if (numFrag == 1)
4064 {
4065 patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO;
4066 }
4067 else if (numFrag == 2)
4068 {
4069 patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO;
4070 }
4071 else if (numFrag == 4)
4072 {
4073 patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO;
4074 }
4075 else
4076 {
4077 ADDR_ASSERT(numFrag == 8);
4078 patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO;
4079 }
4080 }
4081 else if (IsZOrderSwizzle(swizzleMode))
4082 {
4083 if (numFrag == 1)
4084 {
4085 patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;
4086 }
4087 else if (numFrag == 2)
4088 {
4089 patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;
4090 }
4091 else if (numFrag == 4)
4092 {
4093 patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;
4094 }
4095 else
4096 {
4097 ADDR_ASSERT(numFrag == 8);
4098 patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;
4099 }
4100 }
4101 }
4102 }
4103 else if (IsLinear(swizzleMode) == FALSE)
4104 {
4105 if (resourceType == ADDR_RSRC_TEX_3D)
4106 {
4107 ADDR_ASSERT(numFrag == 1);
4108
4109 if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)
4110 {
4111 if (IsRtOptSwizzle(swizzleMode))
4112 {
4113 if (swizzleMode == ADDR_SW_4KB_R_X)
4114 {
4115 patInfo = NULL;
4116 }
4117 else
4118 {
4119 patInfo = m_settings.supportRbPlus ?
4120 GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4121 }
4122 }
4123 else if (IsZOrderSwizzle(swizzleMode))
4124 {
4125 patInfo = m_settings.supportRbPlus ?
4126 GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4127 }
4128 else if (IsDisplaySwizzle(resourceType, swizzleMode))
4129 {
4130 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
4131 patInfo = m_settings.supportRbPlus ?
4132 GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO;
4133 }
4134 else
4135 {
4136 ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
4137
4138 if (IsBlock4kb(swizzleMode))
4139 {
4140 if (swizzleMode == ADDR_SW_4KB_S)
4141 {
4142 patInfo = m_settings.supportRbPlus ?
4143 GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO;
4144 }
4145 else
4146 {
4147 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4148 patInfo = m_settings.supportRbPlus ?
4149 GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO;
4150 }
4151 }
4152 else
4153 {
4154 if (swizzleMode == ADDR_SW_64KB_S)
4155 {
4156 patInfo = m_settings.supportRbPlus ?
4157 GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO;
4158 }
4159 else if (swizzleMode == ADDR_SW_64KB_S_X)
4160 {
4161 patInfo = m_settings.supportRbPlus ?
4162 GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO;
4163 }
4164 else
4165 {
4166 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4167 patInfo = m_settings.supportRbPlus ?
4168 GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO;
4169 }
4170 }
4171 }
4172 }
4173 }
4174 else
4175 {
4176 if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)
4177 {
4178 if (IsBlock256b(swizzleMode))
4179 {
4180 if (swizzleMode == ADDR_SW_256B_S)
4181 {
4182 patInfo = m_settings.supportRbPlus ?
4183 GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO;
4184 }
4185 else
4186 {
4187 ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
4188 patInfo = m_settings.supportRbPlus ?
4189 GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO;
4190 }
4191 }
4192 else if (IsBlock4kb(swizzleMode))
4193 {
4194 if (IsStandardSwizzle(resourceType, swizzleMode))
4195 {
4196 if (swizzleMode == ADDR_SW_4KB_S)
4197 {
4198 patInfo = m_settings.supportRbPlus ?
4199 GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO;
4200 }
4201 else
4202 {
4203 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4204 patInfo = m_settings.supportRbPlus ?
4205 GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO;
4206 }
4207 }
4208 else
4209 {
4210 if (swizzleMode == ADDR_SW_4KB_D)
4211 {
4212 patInfo = m_settings.supportRbPlus ?
4213 GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO;
4214 }
4215 else if (swizzleMode == ADDR_SW_4KB_R_X)
4216 {
4217 patInfo = NULL;
4218 }
4219 else
4220 {
4221 ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
4222 patInfo = m_settings.supportRbPlus ?
4223 GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO;
4224 }
4225 }
4226 }
4227 else
4228 {
4229 if (IsRtOptSwizzle(swizzleMode))
4230 {
4231 if (numFrag == 1)
4232 {
4233 patInfo = m_settings.supportRbPlus ?
4234 GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4235 }
4236 else if (numFrag == 2)
4237 {
4238 patInfo = m_settings.supportRbPlus ?
4239 GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO;
4240 }
4241 else if (numFrag == 4)
4242 {
4243 patInfo = m_settings.supportRbPlus ?
4244 GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO;
4245 }
4246 else
4247 {
4248 ADDR_ASSERT(numFrag == 8);
4249 patInfo = m_settings.supportRbPlus ?
4250 GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO;
4251 }
4252 }
4253 else if (IsZOrderSwizzle(swizzleMode))
4254 {
4255 if (numFrag == 1)
4256 {
4257 patInfo = m_settings.supportRbPlus ?
4258 GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4259 }
4260 else if (numFrag == 2)
4261 {
4262 patInfo = m_settings.supportRbPlus ?
4263 GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO;
4264 }
4265 else if (numFrag == 4)
4266 {
4267 patInfo = m_settings.supportRbPlus ?
4268 GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO;
4269 }
4270 else
4271 {
4272 ADDR_ASSERT(numFrag == 8);
4273 patInfo = m_settings.supportRbPlus ?
4274 GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO;
4275 }
4276 }
4277 else if (IsDisplaySwizzle(resourceType, swizzleMode))
4278 {
4279 if (swizzleMode == ADDR_SW_64KB_D)
4280 {
4281 patInfo = m_settings.supportRbPlus ?
4282 GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO;
4283 }
4284 else if (swizzleMode == ADDR_SW_64KB_D_X)
4285 {
4286 patInfo = m_settings.supportRbPlus ?
4287 GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO;
4288 }
4289 else
4290 {
4291 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
4292 patInfo = m_settings.supportRbPlus ?
4293 GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO;
4294 }
4295 }
4296 else
4297 {
4298 if (swizzleMode == ADDR_SW_64KB_S)
4299 {
4300 patInfo = m_settings.supportRbPlus ?
4301 GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO;
4302 }
4303 else if (swizzleMode == ADDR_SW_64KB_S_X)
4304 {
4305 patInfo = m_settings.supportRbPlus ?
4306 GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO;
4307 }
4308 else
4309 {
4310 ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4311 patInfo = m_settings.supportRbPlus ?
4312 GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO;
4313 }
4314 }
4315 }
4316 }
4317 }
4318 }
4319
4320 return (patInfo != NULL) ? &patInfo[index] : NULL;
4321 }
4322
4323 /**
4324 ************************************************************************************************************************
4325 * Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
4326 *
4327 * @brief
4328 * Internal function to calculate address from coord for micro tiled swizzle surface
4329 *
4330 * @return
4331 * ADDR_E_RETURNCODE
4332 ************************************************************************************************************************
4333 */
ComputeSurfaceAddrFromCoordMicroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4334 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
4335 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4336 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4337 ) const
4338 {
4339 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
4340 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4341 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
4342 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
4343
4344 localIn.swizzleMode = pIn->swizzleMode;
4345 localIn.flags = pIn->flags;
4346 localIn.resourceType = pIn->resourceType;
4347 localIn.bpp = pIn->bpp;
4348 localIn.width = Max(pIn->unalignedWidth, 1u);
4349 localIn.height = Max(pIn->unalignedHeight, 1u);
4350 localIn.numSlices = Max(pIn->numSlices, 1u);
4351 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4352 localIn.numSamples = Max(pIn->numSamples, 1u);
4353 localIn.numFrags = Max(pIn->numFrags, 1u);
4354 localOut.pMipInfo = mipInfo;
4355
4356 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
4357
4358 if (ret == ADDR_OK)
4359 {
4360 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4361 const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
4362 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
4363 const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
4364
4365 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4366 {
4367 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4368 const UINT_32 yb = pIn->y / localOut.blockHeight;
4369 const UINT_32 xb = pIn->x / localOut.blockWidth;
4370 const UINT_32 blockIndex = yb * pb + xb;
4371 const UINT_32 blockSize = 256;
4372 const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4373 pIn->x << elemLog2,
4374 pIn->y,
4375 0);
4376 pOut->addr = localOut.sliceSize * pIn->slice +
4377 mipInfo[pIn->mipId].macroBlockOffset +
4378 (blockIndex * blockSize) +
4379 blk256Offset;
4380 }
4381 else
4382 {
4383 ret = ADDR_INVALIDPARAMS;
4384 }
4385 }
4386
4387 return ret;
4388 }
4389
4390 /**
4391 ************************************************************************************************************************
4392 * Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
4393 *
4394 * @brief
4395 * Internal function to calculate address from coord for macro tiled swizzle surface
4396 *
4397 * @return
4398 * ADDR_E_RETURNCODE
4399 ************************************************************************************************************************
4400 */
ComputeSurfaceAddrFromCoordMacroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4401 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
4402 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4403 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4404 ) const
4405 {
4406 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
4407 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4408 ADDR2_MIP_INFO mipInfo[MaxMipLevels];
4409 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
4410
4411 localIn.swizzleMode = pIn->swizzleMode;
4412 localIn.flags = pIn->flags;
4413 localIn.resourceType = pIn->resourceType;
4414 localIn.bpp = pIn->bpp;
4415 localIn.width = Max(pIn->unalignedWidth, 1u);
4416 localIn.height = Max(pIn->unalignedHeight, 1u);
4417 localIn.numSlices = Max(pIn->numSlices, 1u);
4418 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4419 localIn.numSamples = Max(pIn->numSamples, 1u);
4420 localIn.numFrags = Max(pIn->numFrags, 1u);
4421 localOut.pMipInfo = mipInfo;
4422
4423 ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
4424
4425 if (ret == ADDR_OK)
4426 {
4427 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4428 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4429 const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
4430 const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
4431 const UINT_32 bankMask = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
4432 const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
4433 (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
4434
4435 if (localIn.numFrags > 1)
4436 {
4437 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4438 pIn->resourceType,
4439 elemLog2,
4440 localIn.numFrags);
4441
4442 if (pPatInfo != NULL)
4443 {
4444 const UINT_32 pb = localOut.pitch / localOut.blockWidth;
4445 const UINT_32 yb = pIn->y / localOut.blockHeight;
4446 const UINT_32 xb = pIn->x / localOut.blockWidth;
4447 const UINT_64 blkIdx = yb * pb + xb;
4448
4449 ADDR_BIT_SETTING fullSwizzlePattern[20];
4450 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4451
4452 const UINT_32 blkOffset =
4453 ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
4454 blkSizeLog2,
4455 pIn->x,
4456 pIn->y,
4457 pIn->slice,
4458 pIn->sample);
4459
4460 pOut->addr = (localOut.sliceSize * pIn->slice) +
4461 (blkIdx << blkSizeLog2) +
4462 (blkOffset ^ pipeBankXor);
4463 }
4464 else
4465 {
4466 ret = ADDR_INVALIDPARAMS;
4467 }
4468 }
4469 else
4470 {
4471 const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
4472 const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
4473 const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
4474
4475 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4476 {
4477 const BOOL_32 inTail = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
4478 const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
4479 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
4480 const UINT_32 sliceId = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
4481 const UINT_32 x = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
4482 const UINT_32 y = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
4483 const UINT_32 z = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
4484 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4485 const UINT_32 yb = pIn->y / localOut.blockHeight;
4486 const UINT_32 xb = pIn->x / localOut.blockWidth;
4487 const UINT_64 blkIdx = yb * pb + xb;
4488 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4489 x << elemLog2,
4490 y,
4491 z);
4492 pOut->addr = sliceSize * sliceId +
4493 mipInfo[pIn->mipId].macroBlockOffset +
4494 (blkIdx << blkSizeLog2) +
4495 (blkOffset ^ pipeBankXor);
4496 }
4497 else
4498 {
4499 ret = ADDR_INVALIDPARAMS;
4500 }
4501 }
4502 }
4503
4504 return ret;
4505 }
4506
4507 /**
4508 ************************************************************************************************************************
4509 * Gfx10Lib::HwlComputeMaxBaseAlignments
4510 *
4511 * @brief
4512 * Gets maximum alignments
4513 * @return
4514 * maximum alignments
4515 ************************************************************************************************************************
4516 */
HwlComputeMaxBaseAlignments() const4517 UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
4518 {
4519 return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;
4520 }
4521
4522 /**
4523 ************************************************************************************************************************
4524 * Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4525 *
4526 * @brief
4527 * Gets maximum alignments for metadata
4528 * @return
4529 * maximum alignments for metadata
4530 ************************************************************************************************************************
4531 */
HwlComputeMaxMetaBaseAlignments() const4532 UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4533 {
4534 Dim3d metaBlk;
4535
4536 const AddrSwizzleMode ValidSwizzleModeForXmask[] =
4537 {
4538 ADDR_SW_64KB_Z_X,
4539 m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,
4540 };
4541
4542 UINT_32 maxBaseAlignHtile = 0;
4543 UINT_32 maxBaseAlignCmask = 0;
4544
4545 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)
4546 {
4547 for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4548 {
4549 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4550 {
4551 // Max base alignment for Htile
4552 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,
4553 ADDR_RSRC_TEX_2D,
4554 ValidSwizzleModeForXmask[swIdx],
4555 bppLog2,
4556 numFragLog2,
4557 TRUE,
4558 &metaBlk);
4559
4560 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4561 }
4562 }
4563
4564 // Max base alignment for Cmask
4565 const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,
4566 ADDR_RSRC_TEX_2D,
4567 ValidSwizzleModeForXmask[swIdx],
4568 0,
4569 0,
4570 TRUE,
4571 &metaBlk);
4572
4573 maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);
4574 }
4575
4576 // Max base alignment for 2D Dcc
4577 const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4578 {
4579 ADDR_SW_64KB_S_X,
4580 ADDR_SW_64KB_D_X,
4581 ADDR_SW_64KB_R_X,
4582 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4583 };
4584
4585 UINT_32 maxBaseAlignDcc2D = 0;
4586
4587 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4588 {
4589 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4590 {
4591 for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4592 {
4593 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
4594 ADDR_RSRC_TEX_2D,
4595 ValidSwizzleModeForDcc2D[swIdx],
4596 bppLog2,
4597 numFragLog2,
4598 TRUE,
4599 &metaBlk);
4600
4601 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4602 }
4603 }
4604 }
4605
4606 // Max base alignment for 3D Dcc
4607 const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4608 {
4609 ADDR_SW_64KB_Z_X,
4610 ADDR_SW_64KB_S_X,
4611 ADDR_SW_64KB_D_X,
4612 ADDR_SW_64KB_R_X,
4613 m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4614 };
4615
4616 UINT_32 maxBaseAlignDcc3D = 0;
4617
4618 for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4619 {
4620 for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4621 {
4622 const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
4623 ADDR_RSRC_TEX_3D,
4624 ValidSwizzleModeForDcc3D[swIdx],
4625 bppLog2,
4626 0,
4627 TRUE,
4628 &metaBlk);
4629
4630 maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4631 }
4632 }
4633
4634 return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4635 }
4636
4637 /**
4638 ************************************************************************************************************************
4639 * Gfx10Lib::GetMetaElementSizeLog2
4640 *
4641 * @brief
4642 * Gets meta data element size log2
4643 * @return
4644 * Meta data element size log2
4645 ************************************************************************************************************************
4646 */
GetMetaElementSizeLog2(Gfx10DataType dataType)4647 INT_32 Gfx10Lib::GetMetaElementSizeLog2(
4648 Gfx10DataType dataType) ///< Data surface type
4649 {
4650 INT_32 elemSizeLog2 = 0;
4651
4652 if (dataType == Gfx10DataColor)
4653 {
4654 elemSizeLog2 = 0;
4655 }
4656 else if (dataType == Gfx10DataDepthStencil)
4657 {
4658 elemSizeLog2 = 2;
4659 }
4660 else
4661 {
4662 ADDR_ASSERT(dataType == Gfx10DataFmask);
4663 elemSizeLog2 = -1;
4664 }
4665
4666 return elemSizeLog2;
4667 }
4668
4669 /**
4670 ************************************************************************************************************************
4671 * Gfx10Lib::GetMetaCacheSizeLog2
4672 *
4673 * @brief
4674 * Gets meta data cache line size log2
4675 * @return
4676 * Meta data cache line size log2
4677 ************************************************************************************************************************
4678 */
GetMetaCacheSizeLog2(Gfx10DataType dataType)4679 INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
4680 Gfx10DataType dataType) ///< Data surface type
4681 {
4682 INT_32 cacheSizeLog2 = 0;
4683
4684 if (dataType == Gfx10DataColor)
4685 {
4686 cacheSizeLog2 = 6;
4687 }
4688 else if (dataType == Gfx10DataDepthStencil)
4689 {
4690 cacheSizeLog2 = 8;
4691 }
4692 else
4693 {
4694 ADDR_ASSERT(dataType == Gfx10DataFmask);
4695 cacheSizeLog2 = 8;
4696 }
4697 return cacheSizeLog2;
4698 }
4699
4700 /**
4701 ************************************************************************************************************************
4702 * Gfx10Lib::HwlComputeSurfaceInfoLinear
4703 *
4704 * @brief
4705 * Internal function to calculate alignment for linear surface
4706 *
4707 * @return
4708 * ADDR_E_RETURNCODE
4709 ************************************************************************************************************************
4710 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4711 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
4712 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4713 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4714 ) const
4715 {
4716 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4717
4718 if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4719 {
4720 returnCode = ADDR_INVALIDPARAMS;
4721 }
4722 else
4723 {
4724 const UINT_32 elementBytes = pIn->bpp >> 3;
4725 const UINT_32 pitchAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4726 const UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4727 UINT_32 pitch = PowTwoAlign(pIn->width, pitchAlign);
4728 UINT_32 actualHeight = pIn->height;
4729 UINT_64 sliceSize = 0;
4730
4731 if (pIn->numMipLevels > 1)
4732 {
4733 for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4734 {
4735 UINT_32 mipWidth, mipHeight;
4736
4737 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4738
4739 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4740
4741 if (pOut->pMipInfo != NULL)
4742 {
4743 pOut->pMipInfo[i].pitch = mipActualWidth;
4744 pOut->pMipInfo[i].height = mipHeight;
4745 pOut->pMipInfo[i].depth = mipDepth;
4746 pOut->pMipInfo[i].offset = sliceSize;
4747 pOut->pMipInfo[i].mipTailOffset = 0;
4748 pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4749 }
4750
4751 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4752 }
4753 }
4754 else
4755 {
4756 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4757
4758 if (returnCode == ADDR_OK)
4759 {
4760 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4761
4762 if (pOut->pMipInfo != NULL)
4763 {
4764 pOut->pMipInfo[0].pitch = pitch;
4765 pOut->pMipInfo[0].height = actualHeight;
4766 pOut->pMipInfo[0].depth = mipDepth;
4767 pOut->pMipInfo[0].offset = 0;
4768 pOut->pMipInfo[0].mipTailOffset = 0;
4769 pOut->pMipInfo[0].macroBlockOffset = 0;
4770 }
4771 }
4772 }
4773
4774 if (returnCode == ADDR_OK)
4775 {
4776 pOut->pitch = pitch;
4777 pOut->height = actualHeight;
4778 pOut->numSlices = pIn->numSlices;
4779 pOut->sliceSize = sliceSize;
4780 pOut->surfSize = sliceSize * pOut->numSlices;
4781 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4782 pOut->blockWidth = pitchAlign;
4783 pOut->blockHeight = 1;
4784 pOut->blockSlices = 1;
4785
4786 // Following members are useless on GFX10
4787 pOut->mipChainPitch = 0;
4788 pOut->mipChainHeight = 0;
4789 pOut->mipChainSlice = 0;
4790 pOut->epitchIsHeight = FALSE;
4791
4792 // Post calculation validate
4793 ADDR_ASSERT(pOut->sliceSize > 0);
4794 }
4795 }
4796
4797 return returnCode;
4798 }
4799
4800 } // V2
4801 } // Addr
4802