1 /*
2 ************************************************************************************************************************
3 *
4 * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved.
5 * SPDX-License-Identifier: MIT
6 *
7 ***********************************************************************************************************************/
8
9 /**
10 ************************************************************************************************************************
11 * @file gfx9addrlib.cpp
12 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
13 ************************************************************************************************************************
14 */
15
16 #include "gfx9addrlib.h"
17
18 #include "gfx9_gb_reg.h"
19
20 #include "amdgpu_asic_addr.h"
21
22 ////////////////////////////////////////////////////////////////////////////////////////////////////
23 ////////////////////////////////////////////////////////////////////////////////////////////////////
24
25 namespace Addr
26 {
27
28 /**
29 ************************************************************************************************************************
30 * Gfx9HwlInit
31 *
32 * @brief
33 * Creates an Gfx9Lib object.
34 *
35 * @return
36 * Returns an Gfx9Lib object pointer.
37 ************************************************************************************************************************
38 */
Gfx9HwlInit(const Client * pClient)39 Addr::Lib* Gfx9HwlInit(const Client* pClient)
40 {
41 return V2::Gfx9Lib::CreateObj(pClient);
42 }
43
44 namespace V2
45 {
46
47 ////////////////////////////////////////////////////////////////////////////////////////////////////
48 // Static Const Member
49 ////////////////////////////////////////////////////////////////////////////////////////////////////
50
51 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
52 {//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
53 {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR
54 {{0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_256B_S
55 {{0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_256B_D
56 {{0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}}, // ADDR_SW_256B_R
57
58 {{0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_Z
59 {{0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_S
60 {{0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_4KB_D
61 {{0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}}, // ADDR_SW_4KB_R
62
63 {{0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_Z
64 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_S
65 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}}, // ADDR_SW_64KB_D
66 {{0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}}, // ADDR_SW_64KB_R
67
68 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
69 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
70 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
71 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
72
73 {{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_Z_T
74 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_S_T
75 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}}, // ADDR_SW_64KB_D_T
76 {{0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0}}, // ADDR_SW_64KB_R_T
77
78 {{0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_Z_x
79 {{0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_S_x
80 {{0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_D_x
81 {{0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0}}, // ADDR_SW_4KB_R_x
82
83 {{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_Z_X
84 {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_S_X
85 {{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_D_X
86 {{0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0}}, // ADDR_SW_64KB_R_X
87
88 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
89 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
90 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
91 {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved
92 {{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // ADDR_SW_LINEAR_GENERAL
93 };
94
95 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
96
97 const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
98
99 const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
100
101 /**
102 ************************************************************************************************************************
103 * Gfx9Lib::Gfx9Lib
104 *
105 * @brief
106 * Constructor
107 *
108 ************************************************************************************************************************
109 */
Gfx9Lib(const Client * pClient)110 Gfx9Lib::Gfx9Lib(const Client* pClient)
111 :
112 Lib(pClient)
113 {
114 memset(&m_settings, 0, sizeof(m_settings));
115 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
116 memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
117 m_metaEqOverrideIndex = 0;
118 }
119
120 /**
121 ************************************************************************************************************************
122 * Gfx9Lib::~Gfx9Lib
123 *
124 * @brief
125 * Destructor
126 ************************************************************************************************************************
127 */
~Gfx9Lib()128 Gfx9Lib::~Gfx9Lib()
129 {
130 }
131
132 /**
133 ************************************************************************************************************************
134 * Gfx9Lib::HwlComputeHtileInfo
135 *
136 * @brief
137 * Interface function stub of AddrComputeHtilenfo
138 *
139 * @return
140 * ADDR_E_RETURNCODE
141 ************************************************************************************************************************
142 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const143 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
144 const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
145 ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
146 ) const
147 {
148 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
149 pIn->swizzleMode);
150
151 UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
152
153 UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
154
155 if ((numPipeTotal == 1) && (numRbTotal == 1))
156 {
157 numCompressBlkPerMetaBlkLog2 = 10;
158 }
159 else
160 {
161 if (m_settings.applyAliasFix)
162 {
163 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
164 }
165 else
166 {
167 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
168 }
169 }
170
171 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
172
173 Dim3d metaBlkDim = {8, 8, 1};
174 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
175 UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
176 UINT_32 heightAmp = totalAmpBits - widthAmp;
177 metaBlkDim.w <<= widthAmp;
178 metaBlkDim.h <<= heightAmp;
179
180 #if DEBUG
181 Dim3d metaBlkDimDbg = {8, 8, 1};
182 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
183 {
184 if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
185 ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
186 {
187 metaBlkDimDbg.h <<= 1;
188 }
189 else
190 {
191 metaBlkDimDbg.w <<= 1;
192 }
193 }
194 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
195 #endif
196
197 UINT_32 numMetaBlkX;
198 UINT_32 numMetaBlkY;
199 UINT_32 numMetaBlkZ;
200
201 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
202 pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
203 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
204
205 const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
206 UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
207
208 if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
209 {
210 align *= (numPipeTotal >> 1);
211 }
212
213 align = Max(align, metaBlkSize);
214
215 if (m_settings.metaBaseAlignFix)
216 {
217 align = Max(align, GetBlockSize(pIn->swizzleMode));
218 }
219
220 if (m_settings.htileAlignFix)
221 {
222 const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2;
223 const INT_32 htileCachelineSizeLog2 = 11;
224 const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
225
226 INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
227
228 align <<= rbMaskPadding;
229 }
230
231 pOut->pitch = numMetaBlkX * metaBlkDim.w;
232 pOut->height = numMetaBlkY * metaBlkDim.h;
233 pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize;
234
235 pOut->metaBlkWidth = metaBlkDim.w;
236 pOut->metaBlkHeight = metaBlkDim.h;
237 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
238
239 pOut->baseAlign = align;
240 pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
241
242 return ADDR_OK;
243 }
244
245 /**
246 ************************************************************************************************************************
247 * Gfx9Lib::HwlComputeCmaskInfo
248 *
249 * @brief
250 * Interface function stub of AddrComputeCmaskInfo
251 *
252 * @return
253 * ADDR_E_RETURNCODE
254 ************************************************************************************************************************
255 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const256 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
257 const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
258 ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
259 ) const
260 {
261 ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
262
263 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
264 pIn->swizzleMode);
265
266 UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
267
268 UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
269
270 if ((numPipeTotal == 1) && (numRbTotal == 1))
271 {
272 numCompressBlkPerMetaBlkLog2 = 13;
273 }
274 else
275 {
276 if (m_settings.applyAliasFix)
277 {
278 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
279 }
280 else
281 {
282 numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
283 }
284
285 numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
286 }
287
288 numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
289
290 Dim2d metaBlkDim = {8, 8};
291 UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
292 UINT_32 heightAmp = totalAmpBits >> 1;
293 UINT_32 widthAmp = totalAmpBits - heightAmp;
294 metaBlkDim.w <<= widthAmp;
295 metaBlkDim.h <<= heightAmp;
296
297 #if DEBUG
298 Dim2d metaBlkDimDbg = {8, 8};
299 for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
300 {
301 if (metaBlkDimDbg.h < metaBlkDimDbg.w)
302 {
303 metaBlkDimDbg.h <<= 1;
304 }
305 else
306 {
307 metaBlkDimDbg.w <<= 1;
308 }
309 }
310 ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
311 #endif
312
313 UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w;
314 UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
315 UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
316
317 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
318
319 if (m_settings.metaBaseAlignFix)
320 {
321 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
322 }
323
324 pOut->pitch = numMetaBlkX * metaBlkDim.w;
325 pOut->height = numMetaBlkY * metaBlkDim.h;
326 pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
327 pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
328 pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
329
330 pOut->metaBlkWidth = metaBlkDim.w;
331 pOut->metaBlkHeight = metaBlkDim.h;
332
333 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
334
335 // Get the CMASK address equation (copied from CmaskAddrFromCoord)
336 UINT_32 fmaskBpp = GetFmaskBpp(1, 1);
337 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
338 UINT_32 metaBlkWidthLog2 = Log2(pOut->metaBlkWidth);
339 UINT_32 metaBlkHeightLog2 = Log2(pOut->metaBlkHeight);
340
341 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
342 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
343 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
344
345 CoordEq *eq = (CoordEq *)((Gfx9Lib *)this)->GetMetaEquation(metaEqParams);
346
347 // Generate the CMASK address equation.
348 pOut->equation.gfx9.num_bits = Min(32u, eq->getsize());
349 bool checked = false;
350 for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) {
351 CoordTerm &bit = (*eq)[b];
352
353 unsigned c;
354 for (c = 0; c < bit.getsize(); c++) {
355 Coordinate &coord = bit[c];
356 pOut->equation.gfx9.bit[b].coord[c].dim = coord.getdim();
357 pOut->equation.gfx9.bit[b].coord[c].ord = coord.getord();
358 }
359 for (; c < 5; c++)
360 pOut->equation.gfx9.bit[b].coord[c].dim = 5; /* meaning invalid */
361 }
362
363 // Reduce num_bits because DIM_M fills the rest of the bits monotonically.
364 for (int b = pOut->equation.gfx9.num_bits - 1; b >= 1; b--) {
365 CoordTerm &prev = (*eq)[b - 1];
366 CoordTerm &cur = (*eq)[b];
367
368 if (cur.getsize() == 1 && cur[0].getdim() == DIM_M &&
369 prev.getsize() == 1 && prev[0].getdim() == DIM_M &&
370 prev[0].getord() + 1 == cur[0].getord())
371 pOut->equation.gfx9.num_bits = b;
372 else
373 break;
374 }
375
376 pOut->equation.gfx9.numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
377 pIn->swizzleMode);
378
379 return ADDR_OK;
380 }
381
382 /**
383 ************************************************************************************************************************
384 * Gfx9Lib::GetMetaMipInfo
385 *
386 * @brief
387 * Get meta mip info
388 *
389 * @return
390 * N/A
391 ************************************************************************************************************************
392 */
GetMetaMipInfo(UINT_32 numMipLevels,Dim3d * pMetaBlkDim,BOOL_32 dataThick,ADDR2_META_MIP_INFO * pInfo,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 * pNumMetaBlkX,UINT_32 * pNumMetaBlkY,UINT_32 * pNumMetaBlkZ) const393 VOID Gfx9Lib::GetMetaMipInfo(
394 UINT_32 numMipLevels, ///< [in] number of mip levels
395 Dim3d* pMetaBlkDim, ///< [in] meta block dimension
396 BOOL_32 dataThick, ///< [in] data surface is thick
397 ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info
398 UINT_32 mip0Width, ///< [in] mip0 width
399 UINT_32 mip0Height, ///< [in] mip0 height
400 UINT_32 mip0Depth, ///< [in] mip0 depth
401 UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain
402 UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain
403 UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain
404 const
405 {
406 UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
407 UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
408 UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
409 UINT_32 tailWidth = pMetaBlkDim->w;
410 UINT_32 tailHeight = pMetaBlkDim->h >> 1;
411 UINT_32 tailDepth = pMetaBlkDim->d;
412 BOOL_32 inTail = FALSE;
413 AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
414
415 if (numMipLevels > 1)
416 {
417 if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
418 {
419 // Z major
420 major = ADDR_MAJOR_Z;
421 }
422 else if (numMetaBlkX >= numMetaBlkY)
423 {
424 // X major
425 major = ADDR_MAJOR_X;
426 }
427 else
428 {
429 // Y major
430 major = ADDR_MAJOR_Y;
431 }
432
433 inTail = ((mip0Width <= tailWidth) &&
434 (mip0Height <= tailHeight) &&
435 ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
436
437 if (inTail == FALSE)
438 {
439 UINT_32 orderLimit;
440 UINT_32 *pMipDim;
441 UINT_32 *pOrderDim;
442
443 if (major == ADDR_MAJOR_Z)
444 {
445 // Z major
446 pMipDim = &numMetaBlkY;
447 pOrderDim = &numMetaBlkZ;
448 orderLimit = 4;
449 }
450 else if (major == ADDR_MAJOR_X)
451 {
452 // X major
453 pMipDim = &numMetaBlkY;
454 pOrderDim = &numMetaBlkX;
455 orderLimit = 4;
456 }
457 else
458 {
459 // Y major
460 pMipDim = &numMetaBlkX;
461 pOrderDim = &numMetaBlkY;
462 orderLimit = 2;
463 }
464
465 if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
466 {
467 *pMipDim += 2;
468 }
469 else
470 {
471 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
472 }
473 }
474 }
475
476 if (pInfo != NULL)
477 {
478 UINT_32 mipWidth = mip0Width;
479 UINT_32 mipHeight = mip0Height;
480 UINT_32 mipDepth = mip0Depth;
481 Dim3d mipCoord = {0};
482
483 for (UINT_32 mip = 0; mip < numMipLevels; mip++)
484 {
485 if (inTail)
486 {
487 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
488 pMetaBlkDim);
489 break;
490 }
491 else
492 {
493 mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w);
494 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
495 mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d);
496
497 pInfo[mip].inMiptail = FALSE;
498 pInfo[mip].startX = mipCoord.w;
499 pInfo[mip].startY = mipCoord.h;
500 pInfo[mip].startZ = mipCoord.d;
501 pInfo[mip].width = mipWidth;
502 pInfo[mip].height = mipHeight;
503 pInfo[mip].depth = dataThick ? mipDepth : 1;
504
505 if ((mip >= 3) || (mip & 1))
506 {
507 switch (major)
508 {
509 case ADDR_MAJOR_X:
510 mipCoord.w += mipWidth;
511 break;
512 case ADDR_MAJOR_Y:
513 mipCoord.h += mipHeight;
514 break;
515 case ADDR_MAJOR_Z:
516 mipCoord.d += mipDepth;
517 break;
518 default:
519 break;
520 }
521 }
522 else
523 {
524 switch (major)
525 {
526 case ADDR_MAJOR_X:
527 mipCoord.h += mipHeight;
528 break;
529 case ADDR_MAJOR_Y:
530 mipCoord.w += mipWidth;
531 break;
532 case ADDR_MAJOR_Z:
533 mipCoord.h += mipHeight;
534 break;
535 default:
536 break;
537 }
538 }
539
540 mipWidth = Max(mipWidth >> 1, 1u);
541 mipHeight = Max(mipHeight >> 1, 1u);
542 mipDepth = Max(mipDepth >> 1, 1u);
543
544 inTail = ((mipWidth <= tailWidth) &&
545 (mipHeight <= tailHeight) &&
546 ((dataThick == FALSE) || (mipDepth <= tailDepth)));
547 }
548 }
549 }
550
551 *pNumMetaBlkX = numMetaBlkX;
552 *pNumMetaBlkY = numMetaBlkY;
553 *pNumMetaBlkZ = numMetaBlkZ;
554 }
555
556 /**
557 ************************************************************************************************************************
558 * Gfx9Lib::HwlComputeDccInfo
559 *
560 * @brief
561 * Interface function to compute DCC key info
562 *
563 * @return
564 * ADDR_E_RETURNCODE
565 ************************************************************************************************************************
566 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const567 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
568 const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
569 ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
570 ) const
571 {
572 BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
573 BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
574 BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
575
576 if (dataLinear)
577 {
578 metaLinear = TRUE;
579 }
580 else if (metaLinear == TRUE)
581 {
582 pipeAligned = FALSE;
583 }
584
585 UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
586
587 if (metaLinear)
588 {
589 // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
590 ADDR_ASSERT_ALWAYS();
591
592 pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
593 pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
594 }
595 else
596 {
597 BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
598
599 UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
600
601 UINT_32 numFrags = Max(pIn->numFrags, 1u);
602 UINT_32 numSlices = Max(pIn->numSlices, 1u);
603
604 minMetaBlkSize /= numFrags;
605
606 UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
607
608 UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
609
610 if ((numPipeTotal > 1) || (numRbTotal > 1))
611 {
612 const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
613
614 numCompressBlkPerMetaBlk =
615 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
616
617 if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
618 {
619 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
620 }
621 }
622
623 Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
624 Dim3d metaBlkDim = compressBlkDim;
625
626 for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
627 {
628 if ((metaBlkDim.h < metaBlkDim.w) ||
629 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
630 {
631 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
632 {
633 metaBlkDim.h <<= 1;
634 }
635 else
636 {
637 metaBlkDim.d <<= 1;
638 }
639 }
640 else
641 {
642 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
643 {
644 metaBlkDim.w <<= 1;
645 }
646 else
647 {
648 metaBlkDim.d <<= 1;
649 }
650 }
651 }
652
653 UINT_32 numMetaBlkX;
654 UINT_32 numMetaBlkY;
655 UINT_32 numMetaBlkZ;
656
657 GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
658 pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
659 &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
660
661 UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
662
663 if (numFrags > m_maxCompFrag)
664 {
665 sizeAlign *= (numFrags / m_maxCompFrag);
666 }
667
668 if (m_settings.metaBaseAlignFix)
669 {
670 sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
671 }
672
673 pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
674 numCompressBlkPerMetaBlk * numFrags;
675 pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
676 pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
677
678 pOut->pitch = numMetaBlkX * metaBlkDim.w;
679 pOut->height = numMetaBlkY * metaBlkDim.h;
680 pOut->depth = numMetaBlkZ * metaBlkDim.d;
681
682 pOut->compressBlkWidth = compressBlkDim.w;
683 pOut->compressBlkHeight = compressBlkDim.h;
684 pOut->compressBlkDepth = compressBlkDim.d;
685
686 pOut->metaBlkWidth = metaBlkDim.w;
687 pOut->metaBlkHeight = metaBlkDim.h;
688 pOut->metaBlkDepth = metaBlkDim.d;
689 pOut->metaBlkSize = numCompressBlkPerMetaBlk * numFrags;
690
691 pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
692 pOut->fastClearSizePerSlice =
693 pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
694
695 // Get the DCC address equation (copied from DccAddrFromCoord)
696 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
697 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
698 UINT_32 metaBlkWidthLog2 = Log2(pOut->metaBlkWidth);
699 UINT_32 metaBlkHeightLog2 = Log2(pOut->metaBlkHeight);
700 UINT_32 metaBlkDepthLog2 = Log2(pOut->metaBlkDepth);
701 UINT_32 compBlkWidthLog2 = Log2(pOut->compressBlkWidth);
702 UINT_32 compBlkHeightLog2 = Log2(pOut->compressBlkHeight);
703 UINT_32 compBlkDepthLog2 = Log2(pOut->compressBlkDepth);
704
705 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
706 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
707 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
708 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
709
710 CoordEq *eq = (CoordEq *)((Gfx9Lib *)this)->GetMetaEquation(metaEqParams);
711
712 // Generate the DCC address equation.
713 pOut->equation.gfx9.num_bits = Min(32u, eq->getsize());
714 bool checked = false;
715 for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) {
716 CoordTerm &bit = (*eq)[b];
717
718 unsigned c;
719 for (c = 0; c < bit.getsize(); c++) {
720 Coordinate &coord = bit[c];
721 pOut->equation.gfx9.bit[b].coord[c].dim = coord.getdim();
722 pOut->equation.gfx9.bit[b].coord[c].ord = coord.getord();
723 }
724 for (; c < 5; c++)
725 pOut->equation.gfx9.bit[b].coord[c].dim = 5; /* meaning invalid */
726 }
727
728 // Reduce num_bits because DIM_M fills the rest of the bits monotonically.
729 for (int b = pOut->equation.gfx9.num_bits - 1; b >= 1; b--) {
730 CoordTerm &prev = (*eq)[b - 1];
731 CoordTerm &cur = (*eq)[b];
732
733 if (cur.getsize() == 1 && cur[0].getdim() == DIM_M &&
734 prev.getsize() == 1 && prev[0].getdim() == DIM_M &&
735 prev[0].getord() + 1 == cur[0].getord())
736 pOut->equation.gfx9.num_bits = b;
737 else
738 break;
739 }
740
741 pOut->equation.gfx9.numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
742 pIn->swizzleMode);
743 }
744
745 return ADDR_OK;
746 }
747
748 /**
749 ************************************************************************************************************************
750 * Gfx9Lib::HwlComputeMaxBaseAlignments
751 *
752 * @brief
753 * Gets maximum alignments
754 * @return
755 * maximum alignments
756 ************************************************************************************************************************
757 */
HwlComputeMaxBaseAlignments() const758 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
759 {
760 return Size64K;
761 }
762
763 /**
764 ************************************************************************************************************************
765 * Gfx9Lib::HwlComputeMaxMetaBaseAlignments
766 *
767 * @brief
768 * Gets maximum alignments for metadata
769 * @return
770 * maximum alignments for metadata
771 ************************************************************************************************************************
772 */
HwlComputeMaxMetaBaseAlignments() const773 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
774 {
775 // Max base alignment for Htile
776 const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
777 const UINT_32 maxNumRbTotal = m_se * m_rbPerSe;
778
779 // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
780 // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
781 ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
782 const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
783
784 UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
785
786 if (maxNumPipeTotal > 2)
787 {
788 maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
789 }
790
791 maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
792
793 if (m_settings.metaBaseAlignFix)
794 {
795 maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
796 }
797
798 if (m_settings.htileAlignFix)
799 {
800 maxBaseAlignHtile *= maxNumPipeTotal;
801 }
802
803 // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
804
805 // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
806 UINT_32 maxBaseAlignDcc3D = 65536;
807
808 if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
809 {
810 maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
811 }
812
813 // Max base alignment for Msaa Dcc
814 UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
815
816 if (m_settings.metaBaseAlignFix)
817 {
818 maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
819 }
820
821 return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
822 }
823
824 /**
825 ************************************************************************************************************************
826 * Gfx9Lib::HwlComputeCmaskAddrFromCoord
827 *
828 * @brief
829 * Interface function stub of AddrComputeCmaskAddrFromCoord
830 *
831 * @return
832 * ADDR_E_RETURNCODE
833 ************************************************************************************************************************
834 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)835 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
836 const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
837 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
838 {
839 ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
840 input.size = sizeof(input);
841 input.cMaskFlags = pIn->cMaskFlags;
842 input.colorFlags = pIn->colorFlags;
843 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
844 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
845 input.numSlices = Max(pIn->numSlices, 1u);
846 input.swizzleMode = pIn->swizzleMode;
847 input.resourceType = pIn->resourceType;
848
849 ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
850 output.size = sizeof(output);
851
852 ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
853
854 if (returnCode == ADDR_OK)
855 {
856 UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
857 UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
858 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
859 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
860
861 MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
862 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
863 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
864
865 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
866
867 UINT_32 xb = pIn->x / output.metaBlkWidth;
868 UINT_32 yb = pIn->y / output.metaBlkHeight;
869 UINT_32 zb = pIn->slice;
870
871 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
872 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
873 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
874
875 UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, 0, blockIndex};
876 UINT_64 address = pMetaEq->solve(coords);
877
878 pOut->addr = address >> 1;
879 pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
880
881
882 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
883 pIn->swizzleMode);
884
885 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
886
887 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
888 }
889
890 return returnCode;
891 }
892
893 /**
894 ************************************************************************************************************************
895 * Gfx9Lib::HwlComputeHtileAddrFromCoord
896 *
897 * @brief
898 * Interface function stub of AddrComputeHtileAddrFromCoord
899 *
900 * @return
901 * ADDR_E_RETURNCODE
902 ************************************************************************************************************************
903 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)904 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
905 const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
906 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
907 {
908 ADDR_E_RETURNCODE returnCode = ADDR_OK;
909
910 if (pIn->numMipLevels > 1)
911 {
912 returnCode = ADDR_NOTIMPLEMENTED;
913 }
914 else
915 {
916 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
917 input.size = sizeof(input);
918 input.hTileFlags = pIn->hTileFlags;
919 input.depthFlags = pIn->depthflags;
920 input.swizzleMode = pIn->swizzleMode;
921 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
922 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
923 input.numSlices = Max(pIn->numSlices, 1u);
924 input.numMipLevels = Max(pIn->numMipLevels, 1u);
925
926 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
927 output.size = sizeof(output);
928
929 returnCode = ComputeHtileInfo(&input, &output);
930
931 if (returnCode == ADDR_OK)
932 {
933 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
934 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
935 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
936 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
937
938 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
939 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
940 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
941
942 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
943
944 UINT_32 xb = pIn->x / output.metaBlkWidth;
945 UINT_32 yb = pIn->y / output.metaBlkHeight;
946 UINT_32 zb = pIn->slice;
947
948 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
949 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
950 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
951
952 UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, 0, blockIndex};
953 UINT_64 address = pMetaEq->solve(coords);
954
955 pOut->addr = address >> 1;
956
957 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
958 pIn->swizzleMode);
959
960 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
961
962 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
963 }
964 }
965
966 return returnCode;
967 }
968
969 /**
970 ************************************************************************************************************************
971 * Gfx9Lib::HwlComputeHtileCoordFromAddr
972 *
973 * @brief
974 * Interface function stub of AddrComputeHtileCoordFromAddr
975 *
976 * @return
977 * ADDR_E_RETURNCODE
978 ************************************************************************************************************************
979 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)980 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
981 const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
982 ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
983 {
984 ADDR_E_RETURNCODE returnCode = ADDR_OK;
985
986 if (pIn->numMipLevels > 1)
987 {
988 returnCode = ADDR_NOTIMPLEMENTED;
989 }
990 else
991 {
992 ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
993 input.size = sizeof(input);
994 input.hTileFlags = pIn->hTileFlags;
995 input.swizzleMode = pIn->swizzleMode;
996 input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
997 input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
998 input.numSlices = Max(pIn->numSlices, 1u);
999 input.numMipLevels = Max(pIn->numMipLevels, 1u);
1000
1001 ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
1002 output.size = sizeof(output);
1003
1004 returnCode = ComputeHtileInfo(&input, &output);
1005
1006 if (returnCode == ADDR_OK)
1007 {
1008 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1009 UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth);
1010 UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1011 UINT_32 numSamplesLog2 = Log2(pIn->numSamples);
1012
1013 MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
1014 Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
1015 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
1016
1017 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1018
1019 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
1020 pIn->swizzleMode);
1021
1022 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1023
1024 UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
1025
1026 UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth;
1027 UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1028
1029 UINT_32 coords[NUM_DIMS];
1030 pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, coords);
1031
1032 pOut->slice = coords[DIM_M] / sliceSizeInBlock;
1033 pOut->y = ((coords[DIM_M] % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + coords[DIM_Y];
1034 pOut->x = (coords[DIM_M] % pitchInBlock) * output.metaBlkWidth + coords[DIM_X];
1035 }
1036 }
1037
1038 return returnCode;
1039 }
1040
1041 /**
1042 ************************************************************************************************************************
1043 * Gfx9Lib::HwlSupportComputeDccAddrFromCoord
1044 *
1045 * @brief
1046 * Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
1047 *
1048 * @return
1049 * ADDR_E_RETURNCODE
1050 ************************************************************************************************************************
1051 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)1052 ADDR_E_RETURNCODE Gfx9Lib::HwlSupportComputeDccAddrFromCoord(
1053 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
1054 {
1055 ADDR_E_RETURNCODE returnCode = ADDR_OK;
1056
1057 if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
1058 {
1059 returnCode = ADDR_NOTSUPPORTED;
1060 }
1061 else if ((pIn->pitch == 0) ||
1062 (pIn->height == 0) ||
1063 (pIn->compressBlkWidth == 0) ||
1064 (pIn->compressBlkHeight == 0) ||
1065 (pIn->compressBlkDepth == 0) ||
1066 (pIn->metaBlkWidth == 0) ||
1067 (pIn->metaBlkHeight == 0) ||
1068 (pIn->metaBlkDepth == 0) ||
1069 (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
1070 {
1071 returnCode = ADDR_NOTSUPPORTED;
1072 }
1073
1074 return returnCode;
1075 }
1076
1077 /**
1078 ************************************************************************************************************************
1079 * Gfx9Lib::HwlComputeDccAddrFromCoord
1080 *
1081 * @brief
1082 * Interface function stub of AddrComputeDccAddrFromCoord
1083 *
1084 * @return
1085 * N/A
1086 ************************************************************************************************************************
1087 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)1088 VOID Gfx9Lib::HwlComputeDccAddrFromCoord(
1089 const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
1090 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
1091 {
1092 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
1093 UINT_32 numSamplesLog2 = Log2(pIn->numFrags);
1094 UINT_32 metaBlkWidthLog2 = Log2(pIn->metaBlkWidth);
1095 UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
1096 UINT_32 metaBlkDepthLog2 = Log2(pIn->metaBlkDepth);
1097 UINT_32 compBlkWidthLog2 = Log2(pIn->compressBlkWidth);
1098 UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
1099 UINT_32 compBlkDepthLog2 = Log2(pIn->compressBlkDepth);
1100
1101 MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1102 Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1103 metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1104 compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1105
1106 const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1107
1108 UINT_32 xb = pIn->x / pIn->metaBlkWidth;
1109 UINT_32 yb = pIn->y / pIn->metaBlkHeight;
1110 UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
1111
1112 UINT_32 pitchInBlock = pIn->pitch / pIn->metaBlkWidth;
1113 UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
1114 UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1115
1116 UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex};
1117 UINT_64 address = pMetaEq->solve(coords);
1118
1119 pOut->addr = address >> 1;
1120
1121 UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1122 pIn->swizzleMode);
1123
1124 UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1125
1126 pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1127 }
1128
1129 /**
1130 ************************************************************************************************************************
1131 * Gfx9Lib::HwlInitGlobalParams
1132 *
1133 * @brief
1134 * Initializes global parameters
1135 *
1136 * @return
1137 * TRUE if all settings are valid
1138 *
1139 ************************************************************************************************************************
1140 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)1141 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1142 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1143 {
1144 BOOL_32 valid = TRUE;
1145
1146 if (m_settings.isArcticIsland)
1147 {
1148 GB_ADDR_CONFIG_GFX9 gbAddrConfig;
1149
1150 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1151
1152 // These values are copied from CModel code
1153 switch (gbAddrConfig.bits.NUM_PIPES)
1154 {
1155 case ADDR_CONFIG_1_PIPE:
1156 m_pipes = 1;
1157 m_pipesLog2 = 0;
1158 break;
1159 case ADDR_CONFIG_2_PIPE:
1160 m_pipes = 2;
1161 m_pipesLog2 = 1;
1162 break;
1163 case ADDR_CONFIG_4_PIPE:
1164 m_pipes = 4;
1165 m_pipesLog2 = 2;
1166 break;
1167 case ADDR_CONFIG_8_PIPE:
1168 m_pipes = 8;
1169 m_pipesLog2 = 3;
1170 break;
1171 case ADDR_CONFIG_16_PIPE:
1172 m_pipes = 16;
1173 m_pipesLog2 = 4;
1174 break;
1175 case ADDR_CONFIG_32_PIPE:
1176 m_pipes = 32;
1177 m_pipesLog2 = 5;
1178 break;
1179 default:
1180 ADDR_ASSERT_ALWAYS();
1181 break;
1182 }
1183
1184 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1185 {
1186 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1187 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1188 m_pipeInterleaveLog2 = 8;
1189 break;
1190 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1191 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1192 m_pipeInterleaveLog2 = 9;
1193 break;
1194 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1195 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1196 m_pipeInterleaveLog2 = 10;
1197 break;
1198 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1199 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1200 m_pipeInterleaveLog2 = 11;
1201 break;
1202 default:
1203 ADDR_ASSERT_ALWAYS();
1204 break;
1205 }
1206
1207 // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1208 // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1209 ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1210
1211 switch (gbAddrConfig.bits.NUM_BANKS)
1212 {
1213 case ADDR_CONFIG_1_BANK:
1214 m_banks = 1;
1215 m_banksLog2 = 0;
1216 break;
1217 case ADDR_CONFIG_2_BANK:
1218 m_banks = 2;
1219 m_banksLog2 = 1;
1220 break;
1221 case ADDR_CONFIG_4_BANK:
1222 m_banks = 4;
1223 m_banksLog2 = 2;
1224 break;
1225 case ADDR_CONFIG_8_BANK:
1226 m_banks = 8;
1227 m_banksLog2 = 3;
1228 break;
1229 case ADDR_CONFIG_16_BANK:
1230 m_banks = 16;
1231 m_banksLog2 = 4;
1232 break;
1233 default:
1234 ADDR_ASSERT_ALWAYS();
1235 break;
1236 }
1237
1238 switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1239 {
1240 case ADDR_CONFIG_1_SHADER_ENGINE:
1241 m_se = 1;
1242 m_seLog2 = 0;
1243 break;
1244 case ADDR_CONFIG_2_SHADER_ENGINE:
1245 m_se = 2;
1246 m_seLog2 = 1;
1247 break;
1248 case ADDR_CONFIG_4_SHADER_ENGINE:
1249 m_se = 4;
1250 m_seLog2 = 2;
1251 break;
1252 case ADDR_CONFIG_8_SHADER_ENGINE:
1253 m_se = 8;
1254 m_seLog2 = 3;
1255 break;
1256 default:
1257 ADDR_ASSERT_ALWAYS();
1258 break;
1259 }
1260
1261 switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1262 {
1263 case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1264 m_rbPerSe = 1;
1265 m_rbPerSeLog2 = 0;
1266 break;
1267 case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1268 m_rbPerSe = 2;
1269 m_rbPerSeLog2 = 1;
1270 break;
1271 case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1272 m_rbPerSe = 4;
1273 m_rbPerSeLog2 = 2;
1274 break;
1275 default:
1276 ADDR_ASSERT_ALWAYS();
1277 break;
1278 }
1279
1280 switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1281 {
1282 case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1283 m_maxCompFrag = 1;
1284 m_maxCompFragLog2 = 0;
1285 break;
1286 case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1287 m_maxCompFrag = 2;
1288 m_maxCompFragLog2 = 1;
1289 break;
1290 case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1291 m_maxCompFrag = 4;
1292 m_maxCompFragLog2 = 2;
1293 break;
1294 case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1295 m_maxCompFrag = 8;
1296 m_maxCompFragLog2 = 3;
1297 break;
1298 default:
1299 ADDR_ASSERT_ALWAYS();
1300 break;
1301 }
1302
1303 if ((m_rbPerSeLog2 == 1) &&
1304 (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1305 ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1306 {
1307 ADDR_ASSERT(m_settings.isVega10 == FALSE);
1308 ADDR_ASSERT(m_settings.isRaven == FALSE);
1309 ADDR_ASSERT(m_settings.isVega20 == FALSE);
1310 if (m_settings.isVega12)
1311 {
1312 m_settings.htileCacheRbConflict = 1;
1313 }
1314 }
1315
1316 // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1317 m_blockVarSizeLog2 = 0;
1318 }
1319 else
1320 {
1321 valid = FALSE;
1322 ADDR_NOT_IMPLEMENTED();
1323 }
1324
1325 if (valid)
1326 {
1327 InitEquationTable();
1328 }
1329
1330 return valid;
1331 }
1332
1333 /**
1334 ************************************************************************************************************************
1335 * Gfx9Lib::HwlConvertChipFamily
1336 *
1337 * @brief
1338 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1339 * @return
1340 * ChipFamily
1341 ************************************************************************************************************************
1342 */
HwlConvertChipFamily(UINT_32 uChipFamily,UINT_32 uChipRevision)1343 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1344 UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
1345 UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1346 {
1347 ChipFamily family = ADDR_CHIP_FAMILY_AI;
1348
1349 switch (uChipFamily)
1350 {
1351 case FAMILY_AI:
1352 m_settings.isArcticIsland = 1;
1353 m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1354 m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1355 m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1356 m_settings.isDce12 = 1;
1357
1358 if (m_settings.isVega10 == 0)
1359 {
1360 m_settings.htileAlignFix = 1;
1361 m_settings.applyAliasFix = 1;
1362 }
1363
1364 m_settings.metaBaseAlignFix = 1;
1365
1366 m_settings.depthPipeXorDisable = 1;
1367 break;
1368 case FAMILY_RV:
1369 m_settings.isArcticIsland = 1;
1370
1371 if (ASICREV_IS_RAVEN(uChipRevision))
1372 {
1373 m_settings.isRaven = 1;
1374
1375 m_settings.depthPipeXorDisable = 1;
1376 }
1377
1378 if (ASICREV_IS_RAVEN2(uChipRevision))
1379 {
1380 m_settings.isRaven = 1;
1381 }
1382
1383 if (m_settings.isRaven == 0)
1384 {
1385 m_settings.htileAlignFix = 1;
1386 m_settings.applyAliasFix = 1;
1387 }
1388
1389 m_settings.isDcn1 = m_settings.isRaven;
1390
1391 if (ASICREV_IS_RENOIR(uChipRevision))
1392 {
1393 m_settings.isRaven = 1;
1394 m_settings.isDcn2 = 1;
1395 }
1396
1397 m_settings.metaBaseAlignFix = 1;
1398 break;
1399
1400 default:
1401 ADDR_ASSERT(!"No Chip found");
1402 break;
1403 }
1404
1405 return family;
1406 }
1407
1408 /**
1409 ************************************************************************************************************************
1410 * Gfx9Lib::InitRbEquation
1411 *
1412 * @brief
1413 * Init RB equation
1414 * @return
1415 * N/A
1416 ************************************************************************************************************************
1417 */
GetRbEquation(CoordEq * pRbEq,UINT_32 numRbPerSeLog2,UINT_32 numSeLog2) const1418 VOID Gfx9Lib::GetRbEquation(
1419 CoordEq* pRbEq, ///< [out] rb equation
1420 UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine
1421 UINT_32 numSeLog2) ///< [in] number of shader engine
1422 const
1423 {
1424 // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1425 UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1426 Coordinate cx(DIM_X, rbRegion);
1427 Coordinate cy(DIM_Y, rbRegion);
1428
1429 UINT_32 start = 0;
1430 UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1431
1432 // Clear the rb equation
1433 pRbEq->resize(0);
1434 pRbEq->resize(numRbTotalLog2);
1435
1436 if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1437 {
1438 // Special case when more than 1 SE, and 2 RB per SE
1439 (*pRbEq)[0].add(cx);
1440 (*pRbEq)[0].add(cy);
1441 cx++;
1442 cy++;
1443
1444 if (m_settings.applyAliasFix == false)
1445 {
1446 (*pRbEq)[0].add(cy);
1447 }
1448
1449 (*pRbEq)[0].add(cy);
1450 start++;
1451 }
1452
1453 UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1454
1455 for (UINT_32 i = 0; i < numBits; i++)
1456 {
1457 UINT_32 idx =
1458 start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1459
1460 if ((i % 2) == 1)
1461 {
1462 (*pRbEq)[idx].add(cx);
1463 cx++;
1464 }
1465 else
1466 {
1467 (*pRbEq)[idx].add(cy);
1468 cy++;
1469 }
1470 }
1471 }
1472
1473 /**
1474 ************************************************************************************************************************
1475 * Gfx9Lib::GetDataEquation
1476 *
1477 * @brief
1478 * Get data equation for fmask and Z
1479 * @return
1480 * N/A
1481 ************************************************************************************************************************
1482 */
GetDataEquation(CoordEq * pDataEq,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2) const1483 VOID Gfx9Lib::GetDataEquation(
1484 CoordEq* pDataEq, ///< [out] data surface equation
1485 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1486 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1487 AddrResourceType resourceType, ///< [in] data surface resource type
1488 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1489 UINT_32 numSamplesLog2) ///< [in] data surface sample count
1490 const
1491 {
1492 Coordinate cx(DIM_X, 0);
1493 Coordinate cy(DIM_Y, 0);
1494 Coordinate cz(DIM_Z, 0);
1495 Coordinate cs(DIM_S, 0);
1496
1497 // Clear the equation
1498 pDataEq->resize(0);
1499 pDataEq->resize(27);
1500
1501 if (dataSurfaceType == Gfx9DataColor)
1502 {
1503 if (IsLinear(swizzleMode))
1504 {
1505 Coordinate cm(DIM_M, 0);
1506
1507 pDataEq->resize(49);
1508
1509 for (UINT_32 i = 0; i < 49; i++)
1510 {
1511 (*pDataEq)[i].add(cm);
1512 cm++;
1513 }
1514 }
1515 else if (IsThick(resourceType, swizzleMode))
1516 {
1517 // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1518 UINT_32 i;
1519 if (IsStandardSwizzle(resourceType, swizzleMode))
1520 {
1521 // Standard 3d swizzle
1522 // Fill in bottom x bits
1523 for (i = elementBytesLog2; i < 4; i++)
1524 {
1525 (*pDataEq)[i].add(cx);
1526 cx++;
1527 }
1528 // Fill in 2 bits of y and then z
1529 for (i = 4; i < 6; i++)
1530 {
1531 (*pDataEq)[i].add(cy);
1532 cy++;
1533 }
1534 for (i = 6; i < 8; i++)
1535 {
1536 (*pDataEq)[i].add(cz);
1537 cz++;
1538 }
1539 if (elementBytesLog2 < 2)
1540 {
1541 // fill in z & y bit
1542 (*pDataEq)[8].add(cz);
1543 (*pDataEq)[9].add(cy);
1544 cz++;
1545 cy++;
1546 }
1547 else if (elementBytesLog2 == 2)
1548 {
1549 // fill in y and x bit
1550 (*pDataEq)[8].add(cy);
1551 (*pDataEq)[9].add(cx);
1552 cy++;
1553 cx++;
1554 }
1555 else
1556 {
1557 // fill in 2 x bits
1558 (*pDataEq)[8].add(cx);
1559 cx++;
1560 (*pDataEq)[9].add(cx);
1561 cx++;
1562 }
1563 }
1564 else
1565 {
1566 // Z 3d swizzle
1567 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1568 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1569 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1570 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1571 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1572 {
1573 (*pDataEq)[i].add(cz);
1574 cz++;
1575 }
1576 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1577 {
1578 // add an x and z
1579 (*pDataEq)[6].add(cx);
1580 (*pDataEq)[7].add(cz);
1581 cx++;
1582 cz++;
1583 }
1584 else if (elementBytesLog2 == 2)
1585 {
1586 // add a y and z
1587 (*pDataEq)[6].add(cy);
1588 (*pDataEq)[7].add(cz);
1589 cy++;
1590 cz++;
1591 }
1592 // add y and x
1593 (*pDataEq)[8].add(cy);
1594 (*pDataEq)[9].add(cx);
1595 cy++;
1596 cx++;
1597 }
1598 // Fill in bit 10 and up
1599 pDataEq->mort3d( cz, cy, cx, 10 );
1600 }
1601 else if (IsThin(resourceType, swizzleMode))
1602 {
1603 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1604 // Color 2D
1605 UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1606 UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1607 UINT_32 i;
1608 // Fill in bottom x bits
1609 for (i = elementBytesLog2; i < 4; i++)
1610 {
1611 (*pDataEq)[i].add(cx);
1612 cx++;
1613 }
1614 // Fill in bottom y bits
1615 for (i = 4; i < 4 + microYBits; i++)
1616 {
1617 (*pDataEq)[i].add(cy);
1618 cy++;
1619 }
1620 // Fill in last of the micro_x bits
1621 for (i = 4 + microYBits; i < 8; i++)
1622 {
1623 (*pDataEq)[i].add(cx);
1624 cx++;
1625 }
1626 // Fill in x/y bits below sample split
1627 pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1628 // Fill in sample bits
1629 for (i = 0; i < numSamplesLog2; i++)
1630 {
1631 cs.set(DIM_S, i);
1632 (*pDataEq)[tileSplitStart + i].add(cs);
1633 }
1634 // Fill in x/y bits above sample split
1635 if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1636 {
1637 pDataEq->mort2d(cx, cy, blockSizeLog2);
1638 }
1639 else
1640 {
1641 pDataEq->mort2d(cy, cx, blockSizeLog2);
1642 }
1643 }
1644 else
1645 {
1646 ADDR_ASSERT_ALWAYS();
1647 }
1648 }
1649 else
1650 {
1651 // Fmask or depth
1652 UINT_32 sampleStart = elementBytesLog2;
1653 UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1654 UINT_32 ymajStart = 6 + numSamplesLog2;
1655
1656 for (UINT_32 s = 0; s < numSamplesLog2; s++)
1657 {
1658 cs.set(DIM_S, s);
1659 (*pDataEq)[sampleStart + s].add(cs);
1660 }
1661
1662 // Put in the x-major order pixel bits
1663 pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1664 // Put in the y-major order pixel bits
1665 pDataEq->mort2d(cy, cx, ymajStart);
1666 }
1667 }
1668
1669 /**
1670 ************************************************************************************************************************
1671 * Gfx9Lib::GetPipeEquation
1672 *
1673 * @brief
1674 * Get pipe equation
1675 * @return
1676 * N/A
1677 ************************************************************************************************************************
1678 */
GetPipeEquation(CoordEq * pPipeEq,CoordEq * pDataEq,UINT_32 pipeInterleaveLog2,UINT_32 numPipeLog2,UINT_32 numSamplesLog2,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType) const1679 VOID Gfx9Lib::GetPipeEquation(
1680 CoordEq* pPipeEq, ///< [out] pipe equation
1681 CoordEq* pDataEq, ///< [in] data equation
1682 UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave
1683 UINT_32 numPipeLog2, ///< [in] number of pipes
1684 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1685 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1686 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1687 AddrResourceType resourceType ///< [in] data surface resource type
1688 ) const
1689 {
1690 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1691 CoordEq dataEq;
1692
1693 pDataEq->copy(dataEq);
1694
1695 if (dataSurfaceType == Gfx9DataColor)
1696 {
1697 INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1698 dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1699 }
1700
1701 dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1702
1703 // This section should only apply to z/stencil, maybe fmask
1704 // If the pipe bit is below the comp block size,
1705 // then keep moving up the address until we find a bit that is above
1706 UINT_32 pipeStart = 0;
1707
1708 if (dataSurfaceType != Gfx9DataColor)
1709 {
1710 Coordinate tileMin(DIM_X, 3);
1711
1712 while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1713 {
1714 pipeStart++;
1715 }
1716
1717 // if pipe is 0, then the first pipe bit is above the comp block size,
1718 // so we don't need to do anything
1719 // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1720 // we will get the same pipe equation
1721 if (pipeStart != 0)
1722 {
1723 for (UINT_32 i = 0; i < numPipeLog2; i++)
1724 {
1725 // Copy the jth bit above pipe interleave to the current pipe equation bit
1726 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1727 }
1728 }
1729 }
1730
1731 if (IsPrt(swizzleMode))
1732 {
1733 // Clear out bits above the block size if prt's are enabled
1734 dataEq.resize(blockSizeLog2);
1735 dataEq.resize(48);
1736 }
1737
1738 if (IsXor(swizzleMode))
1739 {
1740 CoordEq xorMask;
1741
1742 if (IsThick(resourceType, swizzleMode))
1743 {
1744 CoordEq xorMask2;
1745
1746 dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1747
1748 xorMask.resize(numPipeLog2);
1749
1750 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1751 {
1752 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1753 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1754 }
1755 }
1756 else
1757 {
1758 // Xor in the bits above the pipe+gpu bits
1759 dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1760
1761 if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1762 {
1763 Coordinate co;
1764 CoordEq xorMask2;
1765 // if 1xaa and not prt, then xor in the z bits
1766 xorMask2.resize(0);
1767 xorMask2.resize(numPipeLog2);
1768 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1769 {
1770 co.set(DIM_Z, numPipeLog2 - 1 - pipeIdx);
1771 xorMask2[pipeIdx].add(co);
1772 }
1773
1774 pPipeEq->xorin(xorMask2);
1775 }
1776 }
1777
1778 xorMask.reverse();
1779 pPipeEq->xorin(xorMask);
1780 }
1781 }
1782 /**
1783 ************************************************************************************************************************
1784 * Gfx9Lib::GetMetaEquation
1785 *
1786 * @brief
1787 * Get meta equation for cmask/htile/DCC
1788 * @return
1789 * Pointer to a calculated meta equation
1790 ************************************************************************************************************************
1791 */
GetMetaEquation(const MetaEqParams & metaEqParams)1792 const CoordEq* Gfx9Lib::GetMetaEquation(
1793 const MetaEqParams& metaEqParams)
1794 {
1795 UINT_32 cachedMetaEqIndex;
1796
1797 for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1798 {
1799 if (memcmp(&metaEqParams,
1800 &m_cachedMetaEqKey[cachedMetaEqIndex],
1801 static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1802 {
1803 break;
1804 }
1805 }
1806
1807 CoordEq* pMetaEq = NULL;
1808
1809 if (cachedMetaEqIndex < MaxCachedMetaEq)
1810 {
1811 pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1812 }
1813 else
1814 {
1815 m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1816
1817 pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1818
1819 m_metaEqOverrideIndex %= MaxCachedMetaEq;
1820
1821 GenMetaEquation(pMetaEq,
1822 metaEqParams.maxMip,
1823 metaEqParams.elementBytesLog2,
1824 metaEqParams.numSamplesLog2,
1825 metaEqParams.metaFlag,
1826 metaEqParams.dataSurfaceType,
1827 metaEqParams.swizzleMode,
1828 metaEqParams.resourceType,
1829 metaEqParams.metaBlkWidthLog2,
1830 metaEqParams.metaBlkHeightLog2,
1831 metaEqParams.metaBlkDepthLog2,
1832 metaEqParams.compBlkWidthLog2,
1833 metaEqParams.compBlkHeightLog2,
1834 metaEqParams.compBlkDepthLog2);
1835 }
1836
1837 return pMetaEq;
1838 }
1839
1840 /**
1841 ************************************************************************************************************************
1842 * Gfx9Lib::GenMetaEquation
1843 *
1844 * @brief
1845 * Get meta equation for cmask/htile/DCC
1846 * @return
1847 * N/A
1848 ************************************************************************************************************************
1849 */
GenMetaEquation(CoordEq * pMetaEq,UINT_32 maxMip,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2,ADDR2_META_FLAGS metaFlag,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 metaBlkWidthLog2,UINT_32 metaBlkHeightLog2,UINT_32 metaBlkDepthLog2,UINT_32 compBlkWidthLog2,UINT_32 compBlkHeightLog2,UINT_32 compBlkDepthLog2) const1850 VOID Gfx9Lib::GenMetaEquation(
1851 CoordEq* pMetaEq, ///< [out] meta equation
1852 UINT_32 maxMip, ///< [in] max mip Id
1853 UINT_32 elementBytesLog2, ///< [in] data surface element bytes
1854 UINT_32 numSamplesLog2, ///< [in] data surface sample count
1855 ADDR2_META_FLAGS metaFlag, ///< [in] meta falg
1856 Gfx9DataType dataSurfaceType, ///< [in] data surface type
1857 AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode
1858 AddrResourceType resourceType, ///< [in] data surface resource type
1859 UINT_32 metaBlkWidthLog2, ///< [in] meta block width
1860 UINT_32 metaBlkHeightLog2, ///< [in] meta block height
1861 UINT_32 metaBlkDepthLog2, ///< [in] meta block depth
1862 UINT_32 compBlkWidthLog2, ///< [in] compress block width
1863 UINT_32 compBlkHeightLog2, ///< [in] compress block height
1864 UINT_32 compBlkDepthLog2) ///< [in] compress block depth
1865 const
1866 {
1867 UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1868 UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1869
1870 // Get the correct data address and rb equation
1871 CoordEq dataEq;
1872 GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1873 elementBytesLog2, numSamplesLog2);
1874
1875 // Get pipe and rb equations
1876 CoordEq pipeEquation;
1877 GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1878 numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1879 numPipeTotalLog2 = pipeEquation.getsize();
1880
1881 if (metaFlag.linear)
1882 {
1883 // Linear metadata supporting was removed for GFX9! No one can use this feature.
1884 ADDR_ASSERT_ALWAYS();
1885
1886 ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1887
1888 dataEq.copy(*pMetaEq);
1889
1890 if (IsLinear(swizzleMode))
1891 {
1892 if (metaFlag.pipeAligned)
1893 {
1894 // Remove the pipe bits
1895 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1896 pMetaEq->shift(-shift, pipeInterleaveLog2);
1897 }
1898 // Divide by comp block size, which for linear (which is always color) is 256 B
1899 pMetaEq->shift(-8);
1900
1901 if (metaFlag.pipeAligned)
1902 {
1903 // Put pipe bits back in
1904 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1905
1906 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1907 {
1908 pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1909 }
1910 }
1911 }
1912
1913 pMetaEq->shift(1);
1914 }
1915 else
1916 {
1917 UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1918 UINT_32 compFragLog2 =
1919 ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1920 maxCompFragLog2 : numSamplesLog2;
1921
1922 UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1923
1924 // Make sure the metaaddr is cleared
1925 pMetaEq->resize(0);
1926 pMetaEq->resize(27);
1927
1928 if (IsThick(resourceType, swizzleMode))
1929 {
1930 Coordinate cx(DIM_X, 0);
1931 Coordinate cy(DIM_Y, 0);
1932 Coordinate cz(DIM_Z, 0);
1933
1934 if (maxMip > 0)
1935 {
1936 pMetaEq->mort3d(cy, cx, cz);
1937 }
1938 else
1939 {
1940 pMetaEq->mort3d(cx, cy, cz);
1941 }
1942 }
1943 else
1944 {
1945 Coordinate cx(DIM_X, 0);
1946 Coordinate cy(DIM_Y, 0);
1947 Coordinate cs;
1948
1949 if (maxMip > 0)
1950 {
1951 pMetaEq->mort2d(cy, cx, compFragLog2);
1952 }
1953 else
1954 {
1955 pMetaEq->mort2d(cx, cy, compFragLog2);
1956 }
1957
1958 //------------------------------------------------------------------------------------------------------------------------
1959 // Put the compressible fragments at the lsb
1960 // the uncompressible frags will be at the msb of the micro address
1961 //------------------------------------------------------------------------------------------------------------------------
1962 for (UINT_32 s = 0; s < compFragLog2; s++)
1963 {
1964 cs.set(DIM_S, s);
1965 (*pMetaEq)[s].add(cs);
1966 }
1967 }
1968
1969 // Keep a copy of the pipe equations
1970 CoordEq origPipeEquation;
1971 pipeEquation.copy(origPipeEquation);
1972
1973 Coordinate co;
1974 // filter out everything under the compressed block size
1975 co.set(DIM_X, compBlkWidthLog2);
1976 pMetaEq->Filter('<', co, 0, DIM_X);
1977 co.set(DIM_Y, compBlkHeightLog2);
1978 pMetaEq->Filter('<', co, 0, DIM_Y);
1979 co.set(DIM_Z, compBlkDepthLog2);
1980 pMetaEq->Filter('<', co, 0, DIM_Z);
1981
1982 // For non-color, filter out sample bits
1983 if (dataSurfaceType != Gfx9DataColor)
1984 {
1985 co.set(DIM_X, 0);
1986 pMetaEq->Filter('<', co, 0, DIM_S);
1987 }
1988
1989 // filter out everything above the metablock size
1990 co.set(DIM_X, metaBlkWidthLog2 - 1);
1991 pMetaEq->Filter('>', co, 0, DIM_X);
1992 co.set(DIM_Y, metaBlkHeightLog2 - 1);
1993 pMetaEq->Filter('>', co, 0, DIM_Y);
1994 co.set(DIM_Z, metaBlkDepthLog2 - 1);
1995 pMetaEq->Filter('>', co, 0, DIM_Z);
1996
1997 // filter out everything above the metablock size for the channel bits
1998 co.set(DIM_X, metaBlkWidthLog2 - 1);
1999 pipeEquation.Filter('>', co, 0, DIM_X);
2000 co.set(DIM_Y, metaBlkHeightLog2 - 1);
2001 pipeEquation.Filter('>', co, 0, DIM_Y);
2002 co.set(DIM_Z, metaBlkDepthLog2 - 1);
2003 pipeEquation.Filter('>', co, 0, DIM_Z);
2004
2005 // Make sure we still have the same number of channel bits
2006 if (pipeEquation.getsize() != numPipeTotalLog2)
2007 {
2008 ADDR_ASSERT_ALWAYS();
2009 }
2010
2011 // Loop through all channel and rb bits,
2012 // and make sure these components exist in the metadata address
2013 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2014 {
2015 for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
2016 {
2017 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
2018 {
2019 ADDR_ASSERT_ALWAYS();
2020 }
2021 }
2022 }
2023
2024 const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0;
2025 const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
2026 const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
2027 CoordEq origRbEquation;
2028
2029 GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
2030
2031 CoordEq rbEquation = origRbEquation;
2032
2033 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2034 {
2035 for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
2036 {
2037 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
2038 {
2039 ADDR_ASSERT_ALWAYS();
2040 }
2041 }
2042 }
2043
2044 if (m_settings.applyAliasFix)
2045 {
2046 co.set(DIM_Z, -1);
2047 }
2048
2049 // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
2050 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2051 {
2052 for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
2053 {
2054 BOOL_32 isRbEquationInPipeEquation = FALSE;
2055
2056 if (m_settings.applyAliasFix)
2057 {
2058 CoordTerm filteredPipeEq;
2059 filteredPipeEq = pipeEquation[j];
2060
2061 filteredPipeEq.Filter('>', co, 0, DIM_Z);
2062
2063 isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
2064 }
2065 else
2066 {
2067 isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
2068 }
2069
2070 if (isRbEquationInPipeEquation)
2071 {
2072 rbEquation[i].Clear();
2073 }
2074 }
2075 }
2076
2077 bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
2078
2079 // Loop through each bit of the channel, get the smallest coordinate,
2080 // and remove it from the metaaddr, and rb_equation
2081 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2082 {
2083 pipeEquation[i].getsmallest(co);
2084
2085 UINT_32 old_size = pMetaEq->getsize();
2086 pMetaEq->Filter('=', co);
2087 UINT_32 new_size = pMetaEq->getsize();
2088 if (new_size != old_size-1)
2089 {
2090 ADDR_ASSERT_ALWAYS();
2091 }
2092 pipeEquation.remove(co);
2093 for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2094 {
2095 if (rbEquation[j].remove(co))
2096 {
2097 // if we actually removed something from this bit, then add the remaining
2098 // channel bits, as these can be removed for this bit
2099 for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2100 {
2101 if (pipeEquation[i][k] != co)
2102 {
2103 rbEquation[j].add(pipeEquation[i][k]);
2104 rbAppendedWithPipeBits[j] = true;
2105 }
2106 }
2107 }
2108 }
2109 }
2110
2111 // Loop through the rb bits and see what remain;
2112 // filter out the smallest coordinate if it remains
2113 UINT_32 rbBitsLeft = 0;
2114 for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2115 {
2116 BOOL_32 isRbEqAppended = FALSE;
2117
2118 if (m_settings.applyAliasFix)
2119 {
2120 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2121 }
2122 else
2123 {
2124 isRbEqAppended = (rbEquation[i].getsize() > 0);
2125 }
2126
2127 if (isRbEqAppended)
2128 {
2129 rbBitsLeft++;
2130 rbEquation[i].getsmallest(co);
2131 UINT_32 old_size = pMetaEq->getsize();
2132 pMetaEq->Filter('=', co);
2133 UINT_32 new_size = pMetaEq->getsize();
2134 if (new_size != old_size - 1)
2135 {
2136 // assert warning
2137 }
2138 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2139 {
2140 if (rbEquation[j].remove(co))
2141 {
2142 // if we actually removed something from this bit, then add the remaining
2143 // rb bits, as these can be removed for this bit
2144 for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2145 {
2146 if (rbEquation[i][k] != co)
2147 {
2148 rbEquation[j].add(rbEquation[i][k]);
2149 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2150 }
2151 }
2152 }
2153 }
2154 }
2155 }
2156
2157 // capture the size of the metaaddr
2158 UINT_32 metaSize = pMetaEq->getsize();
2159 // resize to 49 bits...make this a nibble address
2160 pMetaEq->resize(49);
2161 // Concatenate the macro address above the current address
2162 for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2163 {
2164 co.set(DIM_M, j);
2165 (*pMetaEq)[i].add(co);
2166 }
2167
2168 // Multiply by meta element size (in nibbles)
2169 if (dataSurfaceType == Gfx9DataColor)
2170 {
2171 pMetaEq->shift(1);
2172 }
2173 else if (dataSurfaceType == Gfx9DataDepthStencil)
2174 {
2175 pMetaEq->shift(3);
2176 }
2177
2178 //------------------------------------------------------------------------------------------
2179 // Note the pipeInterleaveLog2+1 is because address is a nibble address
2180 // Shift up from pipe interleave number of channel
2181 // and rb bits left, and uncompressed fragments
2182 //------------------------------------------------------------------------------------------
2183
2184 pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2185
2186 // Put in the channel bits
2187 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2188 {
2189 origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2190 }
2191
2192 // Put in remaining rb bits
2193 for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2194 {
2195 BOOL_32 isRbEqAppended = FALSE;
2196
2197 if (m_settings.applyAliasFix)
2198 {
2199 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2200 }
2201 else
2202 {
2203 isRbEqAppended = (rbEquation[i].getsize() > 0);
2204 }
2205
2206 if (isRbEqAppended)
2207 {
2208 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2209 // Mark any rb bit we add in to the rb mask
2210 j++;
2211 }
2212 }
2213
2214 //------------------------------------------------------------------------------------------
2215 // Put in the uncompressed fragment bits
2216 //------------------------------------------------------------------------------------------
2217 for (UINT_32 i = 0; i < uncompFragLog2; i++)
2218 {
2219 co.set(DIM_S, compFragLog2 + i);
2220 (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2221 }
2222 }
2223 }
2224
2225 /**
2226 ************************************************************************************************************************
2227 * Gfx9Lib::IsEquationSupported
2228 *
2229 * @brief
2230 * Check if equation is supported for given swizzle mode and resource type.
2231 *
2232 * @return
2233 * TRUE if supported
2234 ************************************************************************************************************************
2235 */
IsEquationSupported(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2) const2236 BOOL_32 Gfx9Lib::IsEquationSupported(
2237 AddrResourceType rsrcType,
2238 AddrSwizzleMode swMode,
2239 UINT_32 elementBytesLog2) const
2240 {
2241 BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2242 (IsValidSwMode(swMode) == TRUE) &&
2243 (IsLinear(swMode) == FALSE) &&
2244 (((IsTex2d(rsrcType) == TRUE) &&
2245 ((elementBytesLog2 < 4) ||
2246 ((IsRotateSwizzle(swMode) == FALSE) &&
2247 (IsZOrderSwizzle(swMode) == FALSE)))) ||
2248 ((IsTex3d(rsrcType) == TRUE) &&
2249 (IsRotateSwizzle(swMode) == FALSE) &&
2250 (IsBlock256b(swMode) == FALSE)));
2251
2252 return supported;
2253 }
2254
2255 /**
2256 ************************************************************************************************************************
2257 * Gfx9Lib::InitEquationTable
2258 *
2259 * @brief
2260 * Initialize Equation table.
2261 *
2262 * @return
2263 * N/A
2264 ************************************************************************************************************************
2265 */
InitEquationTable()2266 VOID Gfx9Lib::InitEquationTable()
2267 {
2268 memset(m_equationTable, 0, sizeof(m_equationTable));
2269
2270 // Loop all possible resource type (2D/3D)
2271 for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2272 {
2273 AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2274
2275 // Loop all possible swizzle mode
2276 for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2277 {
2278 AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2279
2280 // Loop all possible bpp
2281 for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2282 {
2283 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2284
2285 // Check if the input is supported
2286 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2287 {
2288 ADDR_EQUATION equation;
2289 ADDR_E_RETURNCODE retCode;
2290
2291 memset(&equation, 0, sizeof(ADDR_EQUATION));
2292
2293 // Generate the equation
2294 if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2295 {
2296 retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2297 }
2298 else if (IsThin(rsrcType, swMode))
2299 {
2300 retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2301 }
2302 else
2303 {
2304 retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2305 }
2306
2307 // Only fill the equation into the table if the return code is ADDR_OK,
2308 // otherwise if the return code is not ADDR_OK, it indicates this is not
2309 // a valid input, we do nothing but just fill invalid equation index
2310 // into the lookup table.
2311 if (retCode == ADDR_OK)
2312 {
2313 equationIndex = m_numEquations;
2314 ADDR_ASSERT(equationIndex < EquationTableSize);
2315
2316 m_equationTable[equationIndex] = equation;
2317
2318 m_numEquations++;
2319 }
2320 else
2321 {
2322 ADDR_ASSERT_ALWAYS();
2323 }
2324 }
2325
2326 // Fill the index into the lookup table, if the combination is not supported
2327 // fill the invalid equation index
2328 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2329 }
2330 }
2331 }
2332 }
2333
2334 /**
2335 ************************************************************************************************************************
2336 * Gfx9Lib::HwlGetEquationIndex
2337 *
2338 * @brief
2339 * Interface function stub of GetEquationIndex
2340 *
2341 * @return
2342 * ADDR_E_RETURNCODE
2343 ************************************************************************************************************************
2344 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2345 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2346 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2347 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut
2348 ) const
2349 {
2350 AddrResourceType rsrcType = pIn->resourceType;
2351 AddrSwizzleMode swMode = pIn->swizzleMode;
2352 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
2353 UINT_32 index = ADDR_INVALID_EQUATION_INDEX;
2354
2355 if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2356 {
2357 UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2358 UINT_32 swModeIdx = static_cast<UINT_32>(swMode);
2359
2360 index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2361 }
2362
2363 if (pOut->pMipInfo != NULL)
2364 {
2365 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2366 {
2367 pOut->pMipInfo[i].equationIndex = index;
2368 }
2369 }
2370
2371 return index;
2372 }
2373
2374 /**
2375 ************************************************************************************************************************
2376 * Gfx9Lib::HwlComputeBlock256Equation
2377 *
2378 * @brief
2379 * Interface function stub of ComputeBlock256Equation
2380 *
2381 * @return
2382 * ADDR_E_RETURNCODE
2383 ************************************************************************************************************************
2384 */
HwlComputeBlock256Equation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2385 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2386 AddrResourceType rsrcType,
2387 AddrSwizzleMode swMode,
2388 UINT_32 elementBytesLog2,
2389 ADDR_EQUATION* pEquation) const
2390 {
2391 ADDR_E_RETURNCODE ret = ADDR_OK;
2392
2393 pEquation->numBits = 8;
2394 pEquation->numBitComponents = 1;
2395
2396 UINT_32 i = 0;
2397 for (; i < elementBytesLog2; i++)
2398 {
2399 InitChannel(1, 0 , i, &pEquation->addr[i]);
2400 }
2401
2402 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2403
2404 const UINT_32 maxBitsUsed = 4;
2405 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2406 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2407
2408 for (i = 0; i < maxBitsUsed; i++)
2409 {
2410 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2411 InitChannel(1, 1, i, &y[i]);
2412 }
2413
2414 if (IsStandardSwizzle(rsrcType, swMode))
2415 {
2416 switch (elementBytesLog2)
2417 {
2418 case 0:
2419 pixelBit[0] = x[0];
2420 pixelBit[1] = x[1];
2421 pixelBit[2] = x[2];
2422 pixelBit[3] = x[3];
2423 pixelBit[4] = y[0];
2424 pixelBit[5] = y[1];
2425 pixelBit[6] = y[2];
2426 pixelBit[7] = y[3];
2427 break;
2428 case 1:
2429 pixelBit[0] = x[0];
2430 pixelBit[1] = x[1];
2431 pixelBit[2] = x[2];
2432 pixelBit[3] = y[0];
2433 pixelBit[4] = y[1];
2434 pixelBit[5] = y[2];
2435 pixelBit[6] = x[3];
2436 break;
2437 case 2:
2438 pixelBit[0] = x[0];
2439 pixelBit[1] = x[1];
2440 pixelBit[2] = y[0];
2441 pixelBit[3] = y[1];
2442 pixelBit[4] = y[2];
2443 pixelBit[5] = x[2];
2444 break;
2445 case 3:
2446 pixelBit[0] = x[0];
2447 pixelBit[1] = y[0];
2448 pixelBit[2] = y[1];
2449 pixelBit[3] = x[1];
2450 pixelBit[4] = x[2];
2451 break;
2452 case 4:
2453 pixelBit[0] = y[0];
2454 pixelBit[1] = y[1];
2455 pixelBit[2] = x[0];
2456 pixelBit[3] = x[1];
2457 break;
2458 default:
2459 ADDR_ASSERT_ALWAYS();
2460 ret = ADDR_INVALIDPARAMS;
2461 break;
2462 }
2463 }
2464 else if (IsDisplaySwizzle(rsrcType, swMode))
2465 {
2466 switch (elementBytesLog2)
2467 {
2468 case 0:
2469 pixelBit[0] = x[0];
2470 pixelBit[1] = x[1];
2471 pixelBit[2] = x[2];
2472 pixelBit[3] = y[1];
2473 pixelBit[4] = y[0];
2474 pixelBit[5] = y[2];
2475 pixelBit[6] = x[3];
2476 pixelBit[7] = y[3];
2477 break;
2478 case 1:
2479 pixelBit[0] = x[0];
2480 pixelBit[1] = x[1];
2481 pixelBit[2] = x[2];
2482 pixelBit[3] = y[0];
2483 pixelBit[4] = y[1];
2484 pixelBit[5] = y[2];
2485 pixelBit[6] = x[3];
2486 break;
2487 case 2:
2488 pixelBit[0] = x[0];
2489 pixelBit[1] = x[1];
2490 pixelBit[2] = y[0];
2491 pixelBit[3] = x[2];
2492 pixelBit[4] = y[1];
2493 pixelBit[5] = y[2];
2494 break;
2495 case 3:
2496 pixelBit[0] = x[0];
2497 pixelBit[1] = y[0];
2498 pixelBit[2] = x[1];
2499 pixelBit[3] = x[2];
2500 pixelBit[4] = y[1];
2501 break;
2502 case 4:
2503 pixelBit[0] = x[0];
2504 pixelBit[1] = y[0];
2505 pixelBit[2] = x[1];
2506 pixelBit[3] = y[1];
2507 break;
2508 default:
2509 ADDR_ASSERT_ALWAYS();
2510 ret = ADDR_INVALIDPARAMS;
2511 break;
2512 }
2513 }
2514 else if (IsRotateSwizzle(swMode))
2515 {
2516 switch (elementBytesLog2)
2517 {
2518 case 0:
2519 pixelBit[0] = y[0];
2520 pixelBit[1] = y[1];
2521 pixelBit[2] = y[2];
2522 pixelBit[3] = x[1];
2523 pixelBit[4] = x[0];
2524 pixelBit[5] = x[2];
2525 pixelBit[6] = x[3];
2526 pixelBit[7] = y[3];
2527 break;
2528 case 1:
2529 pixelBit[0] = y[0];
2530 pixelBit[1] = y[1];
2531 pixelBit[2] = y[2];
2532 pixelBit[3] = x[0];
2533 pixelBit[4] = x[1];
2534 pixelBit[5] = x[2];
2535 pixelBit[6] = x[3];
2536 break;
2537 case 2:
2538 pixelBit[0] = y[0];
2539 pixelBit[1] = y[1];
2540 pixelBit[2] = x[0];
2541 pixelBit[3] = y[2];
2542 pixelBit[4] = x[1];
2543 pixelBit[5] = x[2];
2544 break;
2545 case 3:
2546 pixelBit[0] = y[0];
2547 pixelBit[1] = x[0];
2548 pixelBit[2] = y[1];
2549 pixelBit[3] = x[1];
2550 pixelBit[4] = x[2];
2551 break;
2552 default:
2553 ADDR_ASSERT_ALWAYS();
2554 case 4:
2555 ret = ADDR_INVALIDPARAMS;
2556 break;
2557 }
2558 }
2559 else
2560 {
2561 ADDR_ASSERT_ALWAYS();
2562 ret = ADDR_INVALIDPARAMS;
2563 }
2564
2565 // Post validation
2566 if (ret == ADDR_OK)
2567 {
2568 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2569 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2570 (microBlockDim.w * (1 << elementBytesLog2)));
2571 ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2572 }
2573
2574 return ret;
2575 }
2576
2577 /**
2578 ************************************************************************************************************************
2579 * Gfx9Lib::HwlComputeThinEquation
2580 *
2581 * @brief
2582 * Interface function stub of ComputeThinEquation
2583 *
2584 * @return
2585 * ADDR_E_RETURNCODE
2586 ************************************************************************************************************************
2587 */
HwlComputeThinEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2588 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2589 AddrResourceType rsrcType,
2590 AddrSwizzleMode swMode,
2591 UINT_32 elementBytesLog2,
2592 ADDR_EQUATION* pEquation) const
2593 {
2594 ADDR_E_RETURNCODE ret = ADDR_OK;
2595
2596 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2597
2598 UINT_32 maxXorBits = blockSizeLog2;
2599 if (IsNonPrtXor(swMode))
2600 {
2601 // For non-prt-xor, maybe need to initialize some more bits for xor
2602 // The highest xor bit used in equation will be max the following 3 items:
2603 // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2604 // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2605 // 3. blockSizeLog2
2606
2607 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2608 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2609 GetPipeXorBits(blockSizeLog2) +
2610 2 * GetBankXorBits(blockSizeLog2));
2611 }
2612
2613 const UINT_32 maxBitsUsed = 14;
2614 ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2615 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2616 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2617
2618 const UINT_32 extraXorBits = 16;
2619 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2620 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2621
2622 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2623 {
2624 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2625 InitChannel(1, 1, i, &y[i]);
2626 }
2627
2628 ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2629
2630 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2631 {
2632 InitChannel(1, 0 , i, &pixelBit[i]);
2633 }
2634
2635 UINT_32 xIdx = 0;
2636 UINT_32 yIdx = 0;
2637 UINT_32 lowBits = 0;
2638
2639 if (IsZOrderSwizzle(swMode))
2640 {
2641 if (elementBytesLog2 <= 3)
2642 {
2643 for (UINT_32 i = elementBytesLog2; i < 6; i++)
2644 {
2645 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2646 }
2647
2648 lowBits = 6;
2649 }
2650 else
2651 {
2652 ret = ADDR_INVALIDPARAMS;
2653 }
2654 }
2655 else
2656 {
2657 ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2658
2659 if (ret == ADDR_OK)
2660 {
2661 Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2662 xIdx = Log2(microBlockDim.w);
2663 yIdx = Log2(microBlockDim.h);
2664 lowBits = 8;
2665 }
2666 }
2667
2668 if (ret == ADDR_OK)
2669 {
2670 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2671 {
2672 pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2673 }
2674
2675 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2676 {
2677 xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2678 }
2679
2680 if (IsXor(swMode))
2681 {
2682 // Fill XOR bits
2683 UINT_32 pipeStart = m_pipeInterleaveLog2;
2684 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2685
2686 UINT_32 bankStart = pipeStart + pipeXorBits;
2687 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2688
2689 for (UINT_32 i = 0; i < pipeXorBits; i++)
2690 {
2691 UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2692 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2693 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2694
2695 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2696 }
2697
2698 for (UINT_32 i = 0; i < bankXorBits; i++)
2699 {
2700 UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2701 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2702 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2703
2704 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2705 }
2706
2707 if (IsPrt(swMode) == FALSE)
2708 {
2709 for (UINT_32 i = 0; i < pipeXorBits; i++)
2710 {
2711 InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2712 }
2713
2714 for (UINT_32 i = 0; i < bankXorBits; i++)
2715 {
2716 InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2717 }
2718 }
2719 }
2720
2721 FillEqBitComponents(pEquation);
2722 pEquation->numBits = blockSizeLog2;
2723 }
2724
2725 return ret;
2726 }
2727
2728 /**
2729 ************************************************************************************************************************
2730 * Gfx9Lib::HwlComputeThickEquation
2731 *
2732 * @brief
2733 * Interface function stub of ComputeThickEquation
2734 *
2735 * @return
2736 * ADDR_E_RETURNCODE
2737 ************************************************************************************************************************
2738 */
HwlComputeThickEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2739 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2740 AddrResourceType rsrcType,
2741 AddrSwizzleMode swMode,
2742 UINT_32 elementBytesLog2,
2743 ADDR_EQUATION* pEquation) const
2744 {
2745 ADDR_E_RETURNCODE ret = ADDR_OK;
2746
2747 ADDR_ASSERT(IsTex3d(rsrcType));
2748
2749 UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2750
2751 UINT_32 maxXorBits = blockSizeLog2;
2752 if (IsNonPrtXor(swMode))
2753 {
2754 // For non-prt-xor, maybe need to initialize some more bits for xor
2755 // The highest xor bit used in equation will be max the following 3:
2756 // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2757 // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2758 // 3. blockSizeLog2
2759
2760 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2761 maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2762 GetPipeXorBits(blockSizeLog2) +
2763 3 * GetBankXorBits(blockSizeLog2));
2764 }
2765
2766 for (UINT_32 i = 0; i < elementBytesLog2; i++)
2767 {
2768 InitChannel(1, 0 , i, &pEquation->addr[i]);
2769 }
2770
2771 ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2772
2773 const UINT_32 maxBitsUsed = 12;
2774 ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2775 ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2776 ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2777 ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2778
2779 const UINT_32 extraXorBits = 24;
2780 ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2781 ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2782
2783 for (UINT_32 i = 0; i < maxBitsUsed; i++)
2784 {
2785 InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2786 InitChannel(1, 1, i, &y[i]);
2787 InitChannel(1, 2, i, &z[i]);
2788 }
2789
2790 if (IsZOrderSwizzle(swMode))
2791 {
2792 switch (elementBytesLog2)
2793 {
2794 case 0:
2795 pixelBit[0] = x[0];
2796 pixelBit[1] = y[0];
2797 pixelBit[2] = x[1];
2798 pixelBit[3] = y[1];
2799 pixelBit[4] = z[0];
2800 pixelBit[5] = z[1];
2801 pixelBit[6] = x[2];
2802 pixelBit[7] = z[2];
2803 pixelBit[8] = y[2];
2804 pixelBit[9] = x[3];
2805 break;
2806 case 1:
2807 pixelBit[0] = x[0];
2808 pixelBit[1] = y[0];
2809 pixelBit[2] = x[1];
2810 pixelBit[3] = y[1];
2811 pixelBit[4] = z[0];
2812 pixelBit[5] = z[1];
2813 pixelBit[6] = z[2];
2814 pixelBit[7] = y[2];
2815 pixelBit[8] = x[2];
2816 break;
2817 case 2:
2818 pixelBit[0] = x[0];
2819 pixelBit[1] = y[0];
2820 pixelBit[2] = x[1];
2821 pixelBit[3] = z[0];
2822 pixelBit[4] = y[1];
2823 pixelBit[5] = z[1];
2824 pixelBit[6] = y[2];
2825 pixelBit[7] = x[2];
2826 break;
2827 case 3:
2828 pixelBit[0] = x[0];
2829 pixelBit[1] = y[0];
2830 pixelBit[2] = z[0];
2831 pixelBit[3] = x[1];
2832 pixelBit[4] = z[1];
2833 pixelBit[5] = y[1];
2834 pixelBit[6] = x[2];
2835 break;
2836 case 4:
2837 pixelBit[0] = x[0];
2838 pixelBit[1] = y[0];
2839 pixelBit[2] = z[0];
2840 pixelBit[3] = z[1];
2841 pixelBit[4] = y[1];
2842 pixelBit[5] = x[1];
2843 break;
2844 default:
2845 ADDR_ASSERT_ALWAYS();
2846 ret = ADDR_INVALIDPARAMS;
2847 break;
2848 }
2849 }
2850 else if (IsStandardSwizzle(rsrcType, swMode))
2851 {
2852 switch (elementBytesLog2)
2853 {
2854 case 0:
2855 pixelBit[0] = x[0];
2856 pixelBit[1] = x[1];
2857 pixelBit[2] = x[2];
2858 pixelBit[3] = x[3];
2859 pixelBit[4] = y[0];
2860 pixelBit[5] = y[1];
2861 pixelBit[6] = z[0];
2862 pixelBit[7] = z[1];
2863 pixelBit[8] = z[2];
2864 pixelBit[9] = y[2];
2865 break;
2866 case 1:
2867 pixelBit[0] = x[0];
2868 pixelBit[1] = x[1];
2869 pixelBit[2] = x[2];
2870 pixelBit[3] = y[0];
2871 pixelBit[4] = y[1];
2872 pixelBit[5] = z[0];
2873 pixelBit[6] = z[1];
2874 pixelBit[7] = z[2];
2875 pixelBit[8] = y[2];
2876 break;
2877 case 2:
2878 pixelBit[0] = x[0];
2879 pixelBit[1] = x[1];
2880 pixelBit[2] = y[0];
2881 pixelBit[3] = y[1];
2882 pixelBit[4] = z[0];
2883 pixelBit[5] = z[1];
2884 pixelBit[6] = y[2];
2885 pixelBit[7] = x[2];
2886 break;
2887 case 3:
2888 pixelBit[0] = x[0];
2889 pixelBit[1] = y[0];
2890 pixelBit[2] = y[1];
2891 pixelBit[3] = z[0];
2892 pixelBit[4] = z[1];
2893 pixelBit[5] = x[1];
2894 pixelBit[6] = x[2];
2895 break;
2896 case 4:
2897 pixelBit[0] = y[0];
2898 pixelBit[1] = y[1];
2899 pixelBit[2] = z[0];
2900 pixelBit[3] = z[1];
2901 pixelBit[4] = x[0];
2902 pixelBit[5] = x[1];
2903 break;
2904 default:
2905 ADDR_ASSERT_ALWAYS();
2906 ret = ADDR_INVALIDPARAMS;
2907 break;
2908 }
2909 }
2910 else
2911 {
2912 ADDR_ASSERT_ALWAYS();
2913 ret = ADDR_INVALIDPARAMS;
2914 }
2915
2916 if (ret == ADDR_OK)
2917 {
2918 Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2919 UINT_32 xIdx = Log2(microBlockDim.w);
2920 UINT_32 yIdx = Log2(microBlockDim.h);
2921 UINT_32 zIdx = Log2(microBlockDim.d);
2922
2923 pixelBit = pEquation->addr;
2924
2925 const UINT_32 lowBits = 10;
2926 ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2927 ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2928
2929 for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2930 {
2931 if ((i % 3) == 0)
2932 {
2933 pixelBit[i] = x[xIdx++];
2934 }
2935 else if ((i % 3) == 1)
2936 {
2937 pixelBit[i] = z[zIdx++];
2938 }
2939 else
2940 {
2941 pixelBit[i] = y[yIdx++];
2942 }
2943 }
2944
2945 for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2946 {
2947 if ((i % 3) == 0)
2948 {
2949 xorExtra[i - blockSizeLog2] = x[xIdx++];
2950 }
2951 else if ((i % 3) == 1)
2952 {
2953 xorExtra[i - blockSizeLog2] = z[zIdx++];
2954 }
2955 else
2956 {
2957 xorExtra[i - blockSizeLog2] = y[yIdx++];
2958 }
2959 }
2960
2961 if (IsXor(swMode))
2962 {
2963 // Fill XOR bits
2964 UINT_32 pipeStart = m_pipeInterleaveLog2;
2965 UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2966 for (UINT_32 i = 0; i < pipeXorBits; i++)
2967 {
2968 UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2969 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2970 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2971
2972 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2973
2974 UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2975 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2976 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2977
2978 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2979 }
2980
2981 UINT_32 bankStart = pipeStart + pipeXorBits;
2982 UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2983 for (UINT_32 i = 0; i < bankXorBits; i++)
2984 {
2985 UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2986 ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ?
2987 &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2988
2989 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2990
2991 UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2992 ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ?
2993 &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2994
2995 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2996 }
2997 }
2998
2999 FillEqBitComponents(pEquation);
3000 pEquation->numBits = blockSizeLog2;
3001 }
3002
3003 return ret;
3004 }
3005
3006 /**
3007 ************************************************************************************************************************
3008 * Gfx9Lib::IsValidDisplaySwizzleMode
3009 *
3010 * @brief
3011 * Check if a swizzle mode is supported by display engine
3012 *
3013 * @return
3014 * TRUE is swizzle mode is supported by display engine
3015 ************************************************************************************************************************
3016 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3017 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
3018 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3019 {
3020 BOOL_32 support = FALSE;
3021
3022 const UINT_32 swizzleMask = 1 << pIn->swizzleMode;
3023
3024 if (m_settings.isDce12)
3025 {
3026 if (pIn->bpp == 32)
3027 {
3028 support = (Dce12Bpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
3029 }
3030 else if (pIn->bpp <= 64)
3031 {
3032 support = (Dce12NonBpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
3033 }
3034 }
3035 else if (m_settings.isDcn1)
3036 {
3037 if (pIn->bpp < 64)
3038 {
3039 support = (Dcn1NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3040 }
3041 else if (pIn->bpp == 64)
3042 {
3043 support = (Dcn1Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3044 }
3045 }
3046 else if (m_settings.isDcn2)
3047 {
3048 if (pIn->bpp < 64)
3049 {
3050 support = (Dcn2NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3051 }
3052 else if (pIn->bpp == 64)
3053 {
3054 support = (Dcn2Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3055 }
3056 }
3057 else
3058 {
3059 ADDR_NOT_IMPLEMENTED();
3060 }
3061
3062 return support;
3063 }
3064
3065 /**
3066 ************************************************************************************************************************
3067 * Gfx9Lib::HwlComputePipeBankXor
3068 *
3069 * @brief
3070 * Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3071 *
3072 * @return
3073 * PipeBankXor value
3074 ************************************************************************************************************************
3075 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const3076 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3077 const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3078 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
3079 {
3080 if (IsXor(pIn->swizzleMode))
3081 {
3082 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3083 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3084 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3085
3086 UINT_32 pipeXor = 0;
3087 UINT_32 bankXor = 0;
3088
3089 const UINT_32 bankMask = (1 << bankBits) - 1;
3090 const UINT_32 index = pIn->surfIndex & bankMask;
3091
3092 const UINT_32 bpp = pIn->flags.fmask ?
3093 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3094 if (bankBits == 4)
3095 {
3096 static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3097 static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3098
3099 bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3100 }
3101 else if (bankBits > 0)
3102 {
3103 UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3104 bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3105 bankXor = (index * bankIncrease) & bankMask;
3106 }
3107
3108 pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3109 }
3110 else
3111 {
3112 pOut->pipeBankXor = 0;
3113 }
3114
3115 return ADDR_OK;
3116 }
3117
3118 /**
3119 ************************************************************************************************************************
3120 * Gfx9Lib::HwlComputeSlicePipeBankXor
3121 *
3122 * @brief
3123 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3124 *
3125 * @return
3126 * PipeBankXor value
3127 ************************************************************************************************************************
3128 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const3129 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3130 const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3131 ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
3132 {
3133 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3134 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3135 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3136
3137 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3138 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3139
3140 pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3141
3142 return ADDR_OK;
3143 }
3144
3145 /**
3146 ************************************************************************************************************************
3147 * Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3148 *
3149 * @brief
3150 * Compute sub resource offset to support swizzle pattern
3151 *
3152 * @return
3153 * Offset
3154 ************************************************************************************************************************
3155 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const3156 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3157 const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3158 ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
3159 {
3160 ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3161
3162 UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3163 UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
3164 UINT_32 bankBits = GetBankXorBits(macroBlockBits);
3165 UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
3166 UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3167 UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3168
3169 pOut->offset = pIn->slice * pIn->sliceSize +
3170 pIn->macroBlockOffset +
3171 (pIn->mipTailOffset ^ pipeBankXor) -
3172 static_cast<UINT_64>(pipeBankXor);
3173 return ADDR_OK;
3174 }
3175
3176 /**
3177 ************************************************************************************************************************
3178 * Gfx9Lib::ValidateNonSwModeParams
3179 *
3180 * @brief
3181 * Validate compute surface info params except swizzle mode
3182 *
3183 * @return
3184 * TRUE if parameters are valid, FALSE otherwise
3185 ************************************************************************************************************************
3186 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3187 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3188 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3189 {
3190 BOOL_32 valid = TRUE;
3191
3192 if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3193 {
3194 ADDR_ASSERT_ALWAYS();
3195 valid = FALSE;
3196 }
3197
3198 if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3199 {
3200 ADDR_ASSERT_ALWAYS();
3201 valid = FALSE;
3202 }
3203
3204 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3205 const BOOL_32 msaa = (pIn->numFrags > 1);
3206 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3207
3208 const AddrResourceType rsrcType = pIn->resourceType;
3209 const BOOL_32 tex3d = IsTex3d(rsrcType);
3210 const BOOL_32 tex2d = IsTex2d(rsrcType);
3211 const BOOL_32 tex1d = IsTex1d(rsrcType);
3212
3213 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3214 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3215 const BOOL_32 display = flags.display || flags.rotated;
3216 const BOOL_32 stereo = flags.qbStereo;
3217 const BOOL_32 fmask = flags.fmask;
3218
3219 // Resource type check
3220 if (tex1d)
3221 {
3222 if (msaa || zbuffer || display || stereo || isBc || fmask)
3223 {
3224 ADDR_ASSERT_ALWAYS();
3225 valid = FALSE;
3226 }
3227 }
3228 else if (tex2d)
3229 {
3230 if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3231 {
3232 ADDR_ASSERT_ALWAYS();
3233 valid = FALSE;
3234 }
3235 }
3236 else if (tex3d)
3237 {
3238 if (msaa || zbuffer || display || stereo || fmask)
3239 {
3240 ADDR_ASSERT_ALWAYS();
3241 valid = FALSE;
3242 }
3243 }
3244 else
3245 {
3246 ADDR_ASSERT_ALWAYS();
3247 valid = FALSE;
3248 }
3249
3250 return valid;
3251 }
3252
3253 /**
3254 ************************************************************************************************************************
3255 * Gfx9Lib::ValidateSwModeParams
3256 *
3257 * @brief
3258 * Validate compute surface info related to swizzle mode
3259 *
3260 * @return
3261 * TRUE if parameters are valid, FALSE otherwise
3262 ************************************************************************************************************************
3263 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3264 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3265 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3266 {
3267 BOOL_32 valid = TRUE;
3268
3269 if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
3270 {
3271 ADDR_ASSERT_ALWAYS();
3272 valid = FALSE;
3273 }
3274
3275 const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3276 const BOOL_32 msaa = (pIn->numFrags > 1);
3277 const BOOL_32 isBc = ElemLib::IsBlockCompressed(pIn->format);
3278 const BOOL_32 is422 = ElemLib::IsMacroPixelPacked(pIn->format);
3279
3280 const AddrResourceType rsrcType = pIn->resourceType;
3281 const BOOL_32 tex3d = IsTex3d(rsrcType);
3282 const BOOL_32 tex2d = IsTex2d(rsrcType);
3283 const BOOL_32 tex1d = IsTex1d(rsrcType);
3284
3285 const AddrSwizzleMode swizzle = pIn->swizzleMode;
3286 const BOOL_32 linear = IsLinear(swizzle);
3287 const BOOL_32 blk256B = IsBlock256b(swizzle);
3288 const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
3289
3290 const ADDR2_SURFACE_FLAGS flags = pIn->flags;
3291 const BOOL_32 zbuffer = flags.depth || flags.stencil;
3292 const BOOL_32 color = flags.color;
3293 const BOOL_32 texture = flags.texture;
3294 const BOOL_32 display = flags.display || flags.rotated;
3295 const BOOL_32 prt = flags.prt;
3296 const BOOL_32 fmask = flags.fmask;
3297
3298 const BOOL_32 thin3d = tex3d && flags.view3dAs2dArray;
3299 const BOOL_32 zMaxMip = tex3d && mipmap &&
3300 (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3301
3302 // Misc check
3303 if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3304 {
3305 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3306 ADDR_ASSERT_ALWAYS();
3307 valid = FALSE;
3308 }
3309
3310 if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3311 {
3312 ADDR_ASSERT_ALWAYS();
3313 valid = FALSE;
3314 }
3315
3316 if ((pIn->bpp == 96) && (linear == FALSE))
3317 {
3318 ADDR_ASSERT_ALWAYS();
3319 valid = FALSE;
3320 }
3321
3322 if (prt && isNonPrtXor)
3323 {
3324 ADDR_ASSERT_ALWAYS();
3325 valid = FALSE;
3326 }
3327
3328 // Resource type check
3329 if (tex1d)
3330 {
3331 if (linear == FALSE)
3332 {
3333 ADDR_ASSERT_ALWAYS();
3334 valid = FALSE;
3335 }
3336 }
3337
3338 // Swizzle type check
3339 if (linear)
3340 {
3341 if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3342 ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3343 {
3344 ADDR_ASSERT_ALWAYS();
3345 valid = FALSE;
3346 }
3347 }
3348 else if (IsZOrderSwizzle(swizzle))
3349 {
3350 if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3351 {
3352 ADDR_ASSERT_ALWAYS();
3353 valid = FALSE;
3354 }
3355 }
3356 else if (IsStandardSwizzle(swizzle))
3357 {
3358 if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3359 {
3360 ADDR_ASSERT_ALWAYS();
3361 valid = FALSE;
3362 }
3363 }
3364 else if (IsDisplaySwizzle(swizzle))
3365 {
3366 if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3367 {
3368 ADDR_ASSERT_ALWAYS();
3369 valid = FALSE;
3370 }
3371 }
3372 else if (IsRotateSwizzle(swizzle))
3373 {
3374 if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3375 {
3376 ADDR_ASSERT_ALWAYS();
3377 valid = FALSE;
3378 }
3379 }
3380 else
3381 {
3382 ADDR_ASSERT_ALWAYS();
3383 valid = FALSE;
3384 }
3385
3386 // Block type check
3387 if (blk256B)
3388 {
3389 if (prt || zbuffer || tex3d || mipmap || msaa)
3390 {
3391 ADDR_ASSERT_ALWAYS();
3392 valid = FALSE;
3393 }
3394 }
3395
3396 return valid;
3397 }
3398
3399 /**
3400 ************************************************************************************************************************
3401 * Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3402 *
3403 * @brief
3404 * Compute surface info sanity check
3405 *
3406 * @return
3407 * ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3408 ************************************************************************************************************************
3409 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3410 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3411 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3412 {
3413 return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3414 }
3415
3416 /**
3417 ************************************************************************************************************************
3418 * Gfx9Lib::HwlGetPreferredSurfaceSetting
3419 *
3420 * @brief
3421 * Internal function to get suggested surface information for cliet to use
3422 *
3423 * @return
3424 * ADDR_E_RETURNCODE
3425 ************************************************************************************************************************
3426 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const3427 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3428 const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3429 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
3430 {
3431 ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3432 ElemLib* pElemLib = GetElemLib();
3433
3434 UINT_32 bpp = pIn->bpp;
3435 UINT_32 width = Max(pIn->width, 1u);
3436 UINT_32 height = Max(pIn->height, 1u);
3437 UINT_32 numSamples = Max(pIn->numSamples, 1u);
3438 UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3439
3440 if (pIn->flags.fmask)
3441 {
3442 bpp = GetFmaskBpp(numSamples, numFrags);
3443 numFrags = 1;
3444 numSamples = 1;
3445 pOut->resourceType = ADDR_RSRC_TEX_2D;
3446 }
3447 else
3448 {
3449 // Set format to INVALID will skip this conversion
3450 if (pIn->format != ADDR_FMT_INVALID)
3451 {
3452 UINT_32 expandX, expandY;
3453
3454 // Don't care for this case
3455 ElemMode elemMode = ADDR_UNCOMPRESSED;
3456
3457 // Get compression/expansion factors and element mode which indicates compression/expansion
3458 bpp = pElemLib->GetBitsPerPixel(pIn->format,
3459 &elemMode,
3460 &expandX,
3461 &expandY);
3462
3463 UINT_32 basePitch = 0;
3464 GetElemLib()->AdjustSurfaceInfo(elemMode,
3465 expandX,
3466 expandY,
3467 &bpp,
3468 &basePitch,
3469 &width,
3470 &height);
3471 }
3472
3473 // The output may get changed for volume(3D) texture resource in future
3474 pOut->resourceType = pIn->resourceType;
3475 }
3476
3477 const UINT_32 numSlices = Max(pIn->numSlices, 1u);
3478 const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3479 const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
3480 const BOOL_32 displayRsrc = pIn->flags.display || pIn->flags.rotated;
3481
3482 // Pre sanity check on non swizzle mode parameters
3483 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3484 localIn.flags = pIn->flags;
3485 localIn.resourceType = pOut->resourceType;
3486 localIn.format = pIn->format;
3487 localIn.bpp = bpp;
3488 localIn.width = width;
3489 localIn.height = height;
3490 localIn.numSlices = numSlices;
3491 localIn.numMipLevels = numMipLevels;
3492 localIn.numSamples = numSamples;
3493 localIn.numFrags = numFrags;
3494
3495 if (ValidateNonSwModeParams(&localIn))
3496 {
3497 // Forbid swizzle mode(s) by client setting
3498 ADDR2_SWMODE_SET allowedSwModeSet = {};
3499 allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3500 allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask;
3501 allowedSwModeSet.value |=
3502 pIn->forbiddenBlock.macroThin4KB ? 0 :
3503 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
3504 allowedSwModeSet.value |=
3505 pIn->forbiddenBlock.macroThick4KB ? 0 :
3506 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
3507 allowedSwModeSet.value |=
3508 pIn->forbiddenBlock.macroThin64KB ? 0 :
3509 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
3510 allowedSwModeSet.value |=
3511 pIn->forbiddenBlock.macroThick64KB ? 0 :
3512 ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);
3513
3514 if (pIn->preferredSwSet.value != 0)
3515 {
3516 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3517 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3518 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3519 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3520 }
3521
3522 if (pIn->noXor)
3523 {
3524 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3525 }
3526
3527 if (pIn->maxAlign > 0)
3528 {
3529 if (pIn->maxAlign < Size64K)
3530 {
3531 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3532 }
3533
3534 if (pIn->maxAlign < Size4K)
3535 {
3536 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3537 }
3538
3539 if (pIn->maxAlign < Size256)
3540 {
3541 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3542 }
3543 }
3544
3545 // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3546 switch (pOut->resourceType)
3547 {
3548 case ADDR_RSRC_TEX_1D:
3549 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3550 break;
3551
3552 case ADDR_RSRC_TEX_2D:
3553 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3554
3555 if (bpp > 64)
3556 {
3557 allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3558 }
3559 break;
3560
3561 case ADDR_RSRC_TEX_3D:
3562 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3563
3564 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3565 {
3566 // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3567 // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3568 // or SW_*_Z modes if mipmapping is desired on a 3D surface
3569 allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3570 }
3571
3572 if ((bpp == 128) && pIn->flags.color)
3573 {
3574 allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3575 }
3576
3577 if (pIn->flags.view3dAs2dArray)
3578 {
3579 allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3580 }
3581 break;
3582
3583 default:
3584 ADDR_ASSERT_ALWAYS();
3585 allowedSwModeSet.value = 0;
3586 break;
3587 }
3588
3589 if (pIn->format == ADDR_FMT_32_32_32)
3590 {
3591 allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3592 }
3593
3594 if (ElemLib::IsBlockCompressed(pIn->format))
3595 {
3596 if (pIn->flags.texture)
3597 {
3598 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3599 }
3600 else
3601 {
3602 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3603 }
3604 }
3605
3606 if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3607 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3608 {
3609 allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3610 }
3611
3612 if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3613 {
3614 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3615
3616 if (pIn->flags.noMetadata == FALSE)
3617 {
3618 if (pIn->flags.depth &&
3619 pIn->flags.texture &&
3620 (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3621 {
3622 // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3623 // equation from wrong address within memory range a tile covered and use the
3624 // garbage data for compressed Z reading which finally leads to corruption.
3625 allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3626 }
3627
3628 if (m_settings.htileCacheRbConflict &&
3629 (pIn->flags.depth || pIn->flags.stencil) &&
3630 (numSlices > 1) &&
3631 (pIn->flags.metaRbUnaligned == FALSE) &&
3632 (pIn->flags.metaPipeUnaligned == FALSE))
3633 {
3634 // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3635 allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3636 }
3637 }
3638 }
3639
3640 if (msaa)
3641 {
3642 allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3643 }
3644
3645 if ((numFrags > 1) &&
3646 (Size4K < (m_pipeInterleaveBytes * numFrags)))
3647 {
3648 // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3649 allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3650 }
3651
3652 if (numMipLevels > 1)
3653 {
3654 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3655 }
3656
3657 if (displayRsrc)
3658 {
3659 if (m_settings.isDce12)
3660 {
3661 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3662 }
3663 else if (m_settings.isDcn1)
3664 {
3665 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3666 }
3667 else if (m_settings.isDcn2)
3668 {
3669 allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask;
3670 }
3671 else
3672 {
3673 ADDR_NOT_IMPLEMENTED();
3674 }
3675 }
3676
3677 if (allowedSwModeSet.value != 0)
3678 {
3679 #if DEBUG
3680 // Post sanity check, at least AddrLib should accept the output generated by its own
3681 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3682
3683 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3684 {
3685 if (validateSwModeSet & 1)
3686 {
3687 localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3688 ADDR_ASSERT(ValidateSwModeParams(&localIn));
3689 }
3690
3691 validateSwModeSet >>= 1;
3692 }
3693 #endif
3694
3695 pOut->validSwModeSet = allowedSwModeSet;
3696 pOut->canXor = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3697 pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3698 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3699
3700 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3701
3702 if (pOut->clientPreferredSwSet.value == 0)
3703 {
3704 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3705 }
3706
3707 // Apply optional restrictions
3708 if (pIn->flags.needEquation)
3709 {
3710 UINT_32 components = pIn->flags.allowExtEquation ? ADDR_MAX_EQUATION_COMP :
3711 ADDR_MAX_LEGACY_EQUATION_COMP;
3712 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
3713 }
3714
3715 if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3716 {
3717 pOut->swizzleMode = ADDR_SW_LINEAR;
3718 }
3719 else
3720 {
3721 const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
3722
3723 if ((height > 1) && (computeMinSize == FALSE))
3724 {
3725 // Always ignore linear swizzle mode if:
3726 // 1. This is a (2D/3D) resource with height > 1
3727 // 2. Client doesn't require computing minimize size
3728 allowedSwModeSet.swLinear = 0;
3729 }
3730
3731 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3732
3733 // Determine block size if there are 2 or more block type candidates
3734 if (IsPow2(allowedBlockSet.value) == FALSE)
3735 {
3736 AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
3737
3738 swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
3739 swMode[AddrBlockMicro] = ADDR_SW_256B_D;
3740 swMode[AddrBlockThin4KB] = ADDR_SW_4KB_D;
3741 swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;
3742
3743 if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3744 {
3745 swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
3746 swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3747 }
3748
3749 UINT_64 padSize[AddrBlockMaxTiledType] = {};
3750
3751 const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
3752 const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
3753 const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3754 UINT_32 minSizeBlk = AddrBlockMicro;
3755 UINT_64 minSize = 0;
3756
3757 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3758
3759 for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
3760 {
3761 if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
3762 {
3763 localIn.swizzleMode = swMode[i];
3764
3765 if (localIn.swizzleMode == ADDR_SW_LINEAR)
3766 {
3767 returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
3768 }
3769 else
3770 {
3771 returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
3772 }
3773
3774 if (returnCode == ADDR_OK)
3775 {
3776 padSize[i] = localOut.surfSize;
3777
3778 if ((minSize == 0) ||
3779 Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi))
3780 {
3781 minSize = padSize[i];
3782 minSizeBlk = i;
3783 }
3784 }
3785 else
3786 {
3787 ADDR_ASSERT_ALWAYS();
3788 break;
3789 }
3790 }
3791 }
3792
3793 if (pIn->memoryBudget > 1.0)
3794 {
3795 // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
3796 // smaller-block type again in coming loop
3797 switch (minSizeBlk)
3798 {
3799 case AddrBlockThick64KB:
3800 allowedBlockSet.macroThin64KB = 0;
3801 case AddrBlockThin64KB:
3802 allowedBlockSet.macroThick4KB = 0;
3803 case AddrBlockThick4KB:
3804 allowedBlockSet.macroThin4KB = 0;
3805 case AddrBlockThin4KB:
3806 allowedBlockSet.micro = 0;
3807 case AddrBlockMicro:
3808 allowedBlockSet.linear = 0;
3809 case AddrBlockLinear:
3810 break;
3811
3812 default:
3813 ADDR_ASSERT_ALWAYS();
3814 break;
3815 }
3816
3817 for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3818 {
3819 if ((i != minSizeBlk) &&
3820 Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
3821 {
3822 if (Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE)
3823 {
3824 // Clear the block type if the memory waste is unacceptable
3825 allowedBlockSet.value &= ~(1u << (i - 1));
3826 }
3827 }
3828 }
3829
3830 // Remove linear block type if 2 or more block types are allowed
3831 if (IsPow2(allowedBlockSet.value) == FALSE)
3832 {
3833 allowedBlockSet.linear = 0;
3834 }
3835
3836 // Select the biggest allowed block type
3837 minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
3838
3839 if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
3840 {
3841 minSizeBlk = AddrBlockLinear;
3842 }
3843 }
3844
3845 switch (minSizeBlk)
3846 {
3847 case AddrBlockLinear:
3848 allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3849 break;
3850
3851 case AddrBlockMicro:
3852 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3853 allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3854 break;
3855
3856 case AddrBlockThin4KB:
3857 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3858 Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
3859 break;
3860
3861 case AddrBlockThick4KB:
3862 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3863 allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
3864 break;
3865
3866 case AddrBlockThin64KB:
3867 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3868 Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
3869 break;
3870
3871 case AddrBlockThick64KB:
3872 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3873 allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
3874 break;
3875
3876 default:
3877 ADDR_ASSERT_ALWAYS();
3878 allowedSwModeSet.value = 0;
3879 break;
3880 }
3881 }
3882
3883 // Block type should be determined.
3884 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3885
3886 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3887
3888 // Determine swizzle type if there are 2 or more swizzle type candidates
3889 if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
3890 {
3891 if (ElemLib::IsBlockCompressed(pIn->format))
3892 {
3893 if (allowedSwSet.sw_D)
3894 {
3895 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3896 }
3897 else
3898 {
3899 ADDR_ASSERT(allowedSwSet.sw_S);
3900 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3901 }
3902 }
3903 else if (ElemLib::IsMacroPixelPacked(pIn->format))
3904 {
3905 if (allowedSwSet.sw_S)
3906 {
3907 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3908 }
3909 else if (allowedSwSet.sw_D)
3910 {
3911 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3912 }
3913 else
3914 {
3915 ADDR_ASSERT(allowedSwSet.sw_R);
3916 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3917 }
3918 }
3919 else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3920 {
3921 if (pIn->flags.color && allowedSwSet.sw_D)
3922 {
3923 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3924 }
3925 else if (allowedSwSet.sw_Z)
3926 {
3927 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3928 }
3929 else
3930 {
3931 ADDR_ASSERT(allowedSwSet.sw_S);
3932 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3933 }
3934 }
3935 else
3936 {
3937 if (pIn->flags.rotated && allowedSwSet.sw_R)
3938 {
3939 allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3940 }
3941 else if (allowedSwSet.sw_D)
3942 {
3943 allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3944 }
3945 else if (allowedSwSet.sw_S)
3946 {
3947 allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3948 }
3949 else
3950 {
3951 ADDR_ASSERT(allowedSwSet.sw_Z);
3952 allowedSwModeSet.value &= Gfx9ZSwModeMask;
3953 }
3954 }
3955
3956 // Swizzle type should be determined.
3957 ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3958 }
3959
3960 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
3961 // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3962 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3963 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3964 }
3965
3966 returnCode = ADDR_OK;
3967 }
3968 else
3969 {
3970 // Invalid combination...
3971 ADDR_ASSERT_ALWAYS();
3972 }
3973 }
3974 else
3975 {
3976 // Invalid combination...
3977 ADDR_ASSERT_ALWAYS();
3978 }
3979
3980 return returnCode;
3981 }
3982
3983 /**
3984 ************************************************************************************************************************
3985 * Gfx9Lib::ComputeStereoInfo
3986 *
3987 * @brief
3988 * Compute height alignment and right eye pipeBankXor for stereo surface
3989 *
3990 * @return
3991 * Error code
3992 *
3993 ************************************************************************************************************************
3994 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut,UINT_32 * pHeightAlign) const3995 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3996 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3997 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
3998 UINT_32* pHeightAlign
3999 ) const
4000 {
4001 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4002
4003 UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
4004
4005 if (eqIndex < m_numEquations)
4006 {
4007 if (IsXor(pIn->swizzleMode))
4008 {
4009 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4010 const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2);
4011 const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2);
4012 const UINT_32 bppLog2 = Log2(pIn->bpp >> 3);
4013 const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
4014 const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
4015
4016 ADDR_ASSERT(maxYCoordBlock256 ==
4017 GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));
4018
4019 const UINT_32 maxYCoordInBaseEquation =
4020 (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;
4021
4022 ADDR_ASSERT(maxYCoordInBaseEquation ==
4023 GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
4024
4025 const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
4026
4027 ADDR_ASSERT(maxYCoordInPipeXor ==
4028 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
4029
4030 const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
4031 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
4032
4033 ADDR_ASSERT(maxYCoordInBankXor ==
4034 GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
4035
4036 const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
4037
4038 if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
4039 {
4040 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
4041
4042 if (pOut->pStereoInfo != NULL)
4043 {
4044 pOut->pStereoInfo->rightSwizzle = 0;
4045
4046 if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
4047 {
4048 if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
4049 {
4050 pOut->pStereoInfo->rightSwizzle |= (1u << 1);
4051 }
4052
4053 if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
4054 {
4055 pOut->pStereoInfo->rightSwizzle |=
4056 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
4057 }
4058
4059 ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
4060 GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
4061 numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
4062 }
4063 }
4064 }
4065 }
4066 }
4067 else
4068 {
4069 ADDR_ASSERT_ALWAYS();
4070 returnCode = ADDR_ERROR;
4071 }
4072
4073 return returnCode;
4074 }
4075
4076 /**
4077 ************************************************************************************************************************
4078 * Gfx9Lib::HwlComputeSurfaceInfoTiled
4079 *
4080 * @brief
4081 * Internal function to calculate alignment for tiled surface
4082 *
4083 * @return
4084 * ADDR_E_RETURNCODE
4085 ************************************************************************************************************************
4086 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4087 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
4088 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4089 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4090 ) const
4091 {
4092 ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
4093 &pOut->blockHeight,
4094 &pOut->blockSlices,
4095 pIn->bpp,
4096 pIn->numFrags,
4097 pIn->resourceType,
4098 pIn->swizzleMode);
4099
4100 if (returnCode == ADDR_OK)
4101 {
4102 UINT_32 pitchAlignInElement = pOut->blockWidth;
4103
4104 if ((IsTex2d(pIn->resourceType) == TRUE) &&
4105 (pIn->flags.display || pIn->flags.rotated) &&
4106 (pIn->numMipLevels <= 1) &&
4107 (pIn->numSamples <= 1) &&
4108 (pIn->numFrags <= 1))
4109 {
4110 // Display engine needs pitch align to be at least 32 pixels.
4111 pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
4112 }
4113
4114 pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4115
4116 if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
4117 {
4118 if ((pIn->pitchInElement % pitchAlignInElement) != 0)
4119 {
4120 returnCode = ADDR_INVALIDPARAMS;
4121 }
4122 else if (pIn->pitchInElement < pOut->pitch)
4123 {
4124 returnCode = ADDR_INVALIDPARAMS;
4125 }
4126 else
4127 {
4128 pOut->pitch = pIn->pitchInElement;
4129 }
4130 }
4131
4132 UINT_32 heightAlign = 0;
4133
4134 if (pIn->flags.qbStereo)
4135 {
4136 returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
4137 }
4138
4139 if (returnCode == ADDR_OK)
4140 {
4141 pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
4142
4143 if (heightAlign > 1)
4144 {
4145 pOut->height = PowTwoAlign(pOut->height, heightAlign);
4146 }
4147
4148 pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
4149
4150 pOut->epitchIsHeight = FALSE;
4151 pOut->mipChainInTail = FALSE;
4152 pOut->firstMipIdInTail = pIn->numMipLevels;
4153
4154 pOut->mipChainPitch = pOut->pitch;
4155 pOut->mipChainHeight = pOut->height;
4156 pOut->mipChainSlice = pOut->numSlices;
4157
4158 if (pIn->numMipLevels > 1)
4159 {
4160 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
4161 pIn->swizzleMode,
4162 pIn->bpp,
4163 pIn->width,
4164 pIn->height,
4165 pIn->numSlices,
4166 pOut->blockWidth,
4167 pOut->blockHeight,
4168 pOut->blockSlices,
4169 pIn->numMipLevels,
4170 pOut->pMipInfo);
4171
4172 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4173
4174 if (endingMipId == 0)
4175 {
4176 const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4177 pIn->swizzleMode,
4178 pOut->blockWidth,
4179 pOut->blockHeight,
4180 pOut->blockSlices);
4181
4182 pOut->epitchIsHeight = TRUE;
4183 pOut->pitch = tailMaxDim.w;
4184 pOut->height = tailMaxDim.h;
4185 pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4186 tailMaxDim.d : pIn->numSlices;
4187 pOut->mipChainInTail = TRUE;
4188 }
4189 else
4190 {
4191 UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth;
4192 UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4193
4194 AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4195 pIn->swizzleMode,
4196 mip0WidthInBlk,
4197 mip0HeightInBlk,
4198 pOut->numSlices / pOut->blockSlices);
4199 if (majorMode == ADDR_MAJOR_Y)
4200 {
4201 UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4202
4203 if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4204 {
4205 mip1WidthInBlk++;
4206 }
4207
4208 pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4209
4210 pOut->epitchIsHeight = FALSE;
4211 }
4212 else
4213 {
4214 UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4215
4216 if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4217 {
4218 mip1HeightInBlk++;
4219 }
4220
4221 pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4222
4223 pOut->epitchIsHeight = TRUE;
4224 }
4225 }
4226
4227 if (pOut->pMipInfo != NULL)
4228 {
4229 UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4230
4231 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4232 {
4233 Dim3d mipStartPos = {0};
4234 UINT_32 mipTailOffsetInBytes = 0;
4235
4236 mipStartPos = GetMipStartPos(pIn->resourceType,
4237 pIn->swizzleMode,
4238 pOut->pitch,
4239 pOut->height,
4240 pOut->numSlices,
4241 pOut->blockWidth,
4242 pOut->blockHeight,
4243 pOut->blockSlices,
4244 i,
4245 elementBytesLog2,
4246 &mipTailOffsetInBytes);
4247
4248 UINT_32 pitchInBlock =
4249 pOut->mipChainPitch / pOut->blockWidth;
4250 UINT_32 sliceInBlock =
4251 (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4252 UINT_64 blockIndex =
4253 mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4254 UINT_64 macroBlockOffset =
4255 blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4256
4257 pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4258 pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes;
4259 }
4260 }
4261 }
4262 else if (pOut->pMipInfo != NULL)
4263 {
4264 pOut->pMipInfo[0].pitch = pOut->pitch;
4265 pOut->pMipInfo[0].height = pOut->height;
4266 pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4267 pOut->pMipInfo[0].offset = 0;
4268 }
4269
4270 pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4271 (pIn->bpp >> 3) * pIn->numFrags;
4272 pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice;
4273 pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4274
4275 if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4276 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4277 (pIn->flags.texture == TRUE) &&
4278 (pIn->flags.noMetadata == FALSE) &&
4279 (pIn->flags.metaPipeUnaligned == FALSE))
4280 {
4281 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4282 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4283 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4284 // them, which may cause invalid metadata to be fetched.
4285 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4286 }
4287
4288 if (pIn->flags.prt)
4289 {
4290 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4291 }
4292 }
4293 }
4294
4295 return returnCode;
4296 }
4297
4298 /**
4299 ************************************************************************************************************************
4300 * Gfx9Lib::HwlComputeSurfaceInfoLinear
4301 *
4302 * @brief
4303 * Internal function to calculate alignment for linear surface
4304 *
4305 * @return
4306 * ADDR_E_RETURNCODE
4307 ************************************************************************************************************************
4308 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4309 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4310 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4311 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4312 ) const
4313 {
4314 ADDR_E_RETURNCODE returnCode = ADDR_OK;
4315 UINT_32 pitch = 0;
4316 UINT_32 actualHeight = 0;
4317 UINT_32 elementBytes = pIn->bpp >> 3;
4318 const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256;
4319
4320 if (IsTex1d(pIn->resourceType))
4321 {
4322 if (pIn->height > 1)
4323 {
4324 returnCode = ADDR_INVALIDPARAMS;
4325 }
4326 else
4327 {
4328 const UINT_32 pitchAlignInElement = alignment / elementBytes;
4329
4330 pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4331 actualHeight = pIn->numMipLevels;
4332
4333 if (pIn->flags.prt == FALSE)
4334 {
4335 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4336 &pitch, &actualHeight);
4337 }
4338
4339 if (returnCode == ADDR_OK)
4340 {
4341 if (pOut->pMipInfo != NULL)
4342 {
4343 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4344 {
4345 pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4346 pOut->pMipInfo[i].pitch = pitch;
4347 pOut->pMipInfo[i].height = 1;
4348 pOut->pMipInfo[i].depth = 1;
4349 }
4350 }
4351 }
4352 }
4353 }
4354 else
4355 {
4356 returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4357 }
4358
4359 if ((pitch == 0) || (actualHeight == 0))
4360 {
4361 returnCode = ADDR_INVALIDPARAMS;
4362 }
4363
4364 if (returnCode == ADDR_OK)
4365 {
4366 pOut->pitch = pitch;
4367 pOut->height = pIn->height;
4368 pOut->numSlices = pIn->numSlices;
4369 pOut->mipChainPitch = pitch;
4370 pOut->mipChainHeight = actualHeight;
4371 pOut->mipChainSlice = pOut->numSlices;
4372 pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4373 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4374 pOut->surfSize = pOut->sliceSize * pOut->numSlices;
4375 pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4376 pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4377 pOut->blockHeight = 1;
4378 pOut->blockSlices = 1;
4379 }
4380
4381 // Post calculation validate
4382 ADDR_ASSERT(pOut->sliceSize > 0);
4383
4384 return returnCode;
4385 }
4386
4387 /**
4388 ************************************************************************************************************************
4389 * Gfx9Lib::GetMipChainInfo
4390 *
4391 * @brief
4392 * Internal function to get out information about mip chain
4393 *
4394 * @return
4395 * Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4396 ************************************************************************************************************************
4397 */
GetMipChainInfo(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 bpp,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 numMipLevel,ADDR2_MIP_INFO * pMipInfo) const4398 UINT_32 Gfx9Lib::GetMipChainInfo(
4399 AddrResourceType resourceType,
4400 AddrSwizzleMode swizzleMode,
4401 UINT_32 bpp,
4402 UINT_32 mip0Width,
4403 UINT_32 mip0Height,
4404 UINT_32 mip0Depth,
4405 UINT_32 blockWidth,
4406 UINT_32 blockHeight,
4407 UINT_32 blockDepth,
4408 UINT_32 numMipLevel,
4409 ADDR2_MIP_INFO* pMipInfo) const
4410 {
4411 const Dim3d tailMaxDim =
4412 GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4413
4414 UINT_32 mipPitch = mip0Width;
4415 UINT_32 mipHeight = mip0Height;
4416 UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1;
4417 UINT_32 offset = 0;
4418 UINT_32 firstMipIdInTail = numMipLevel;
4419 BOOL_32 inTail = FALSE;
4420 BOOL_32 finalDim = FALSE;
4421 BOOL_32 is3dThick = IsThick(resourceType, swizzleMode);
4422 BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE);
4423
4424 for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4425 {
4426 if (inTail)
4427 {
4428 if (finalDim == FALSE)
4429 {
4430 UINT_32 mipSize;
4431
4432 if (is3dThick)
4433 {
4434 mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4435 }
4436 else
4437 {
4438 mipSize = mipPitch * mipHeight * (bpp >> 3);
4439 }
4440
4441 if (mipSize <= 256)
4442 {
4443 UINT_32 index = Log2(bpp >> 3);
4444
4445 if (is3dThick)
4446 {
4447 mipPitch = Block256_3dZ[index].w;
4448 mipHeight = Block256_3dZ[index].h;
4449 mipDepth = Block256_3dZ[index].d;
4450 }
4451 else
4452 {
4453 mipPitch = Block256_2d[index].w;
4454 mipHeight = Block256_2d[index].h;
4455 }
4456
4457 finalDim = TRUE;
4458 }
4459 }
4460 }
4461 else
4462 {
4463 inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4464 mipPitch, mipHeight, mipDepth);
4465
4466 if (inTail)
4467 {
4468 firstMipIdInTail = mipId;
4469 mipPitch = tailMaxDim.w;
4470 mipHeight = tailMaxDim.h;
4471
4472 if (is3dThick)
4473 {
4474 mipDepth = tailMaxDim.d;
4475 }
4476 }
4477 else
4478 {
4479 mipPitch = PowTwoAlign(mipPitch, blockWidth);
4480 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4481
4482 if (is3dThick)
4483 {
4484 mipDepth = PowTwoAlign(mipDepth, blockDepth);
4485 }
4486 }
4487 }
4488
4489 if (pMipInfo != NULL)
4490 {
4491 pMipInfo[mipId].pitch = mipPitch;
4492 pMipInfo[mipId].height = mipHeight;
4493 pMipInfo[mipId].depth = mipDepth;
4494 pMipInfo[mipId].offset = offset;
4495 }
4496
4497 offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4498
4499 if (finalDim)
4500 {
4501 if (is3dThin)
4502 {
4503 mipDepth = Max(mipDepth >> 1, 1u);
4504 }
4505 }
4506 else
4507 {
4508 mipPitch = Max(mipPitch >> 1, 1u);
4509 mipHeight = Max(mipHeight >> 1, 1u);
4510
4511 if (is3dThick || is3dThin)
4512 {
4513 mipDepth = Max(mipDepth >> 1, 1u);
4514 }
4515 }
4516 }
4517
4518 return firstMipIdInTail;
4519 }
4520
4521 /**
4522 ************************************************************************************************************************
4523 * Gfx9Lib::GetMetaMiptailInfo
4524 *
4525 * @brief
4526 * Get mip tail coordinate information.
4527 *
4528 * @return
4529 * N/A
4530 ************************************************************************************************************************
4531 */
GetMetaMiptailInfo(ADDR2_META_MIP_INFO * pInfo,Dim3d mipCoord,UINT_32 numMipInTail,Dim3d * pMetaBlkDim) const4532 VOID Gfx9Lib::GetMetaMiptailInfo(
4533 ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord
4534 Dim3d mipCoord, ///< [in] mip tail base coord
4535 UINT_32 numMipInTail, ///< [in] number of mips in tail
4536 Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth
4537 ) const
4538 {
4539 BOOL_32 isThick = (pMetaBlkDim->d > 1);
4540 UINT_32 mipWidth = pMetaBlkDim->w;
4541 UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4542 UINT_32 mipDepth = pMetaBlkDim->d;
4543 UINT_32 minInc;
4544
4545 if (isThick)
4546 {
4547 minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4548 }
4549 else if (pMetaBlkDim->h >= 1024)
4550 {
4551 minInc = 256;
4552 }
4553 else if (pMetaBlkDim->h == 512)
4554 {
4555 minInc = 128;
4556 }
4557 else
4558 {
4559 minInc = 64;
4560 }
4561
4562 UINT_32 blk32MipId = 0xFFFFFFFF;
4563
4564 for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4565 {
4566 pInfo[mip].inMiptail = TRUE;
4567 pInfo[mip].startX = mipCoord.w;
4568 pInfo[mip].startY = mipCoord.h;
4569 pInfo[mip].startZ = mipCoord.d;
4570 pInfo[mip].width = mipWidth;
4571 pInfo[mip].height = mipHeight;
4572 pInfo[mip].depth = mipDepth;
4573
4574 if (mipWidth <= 32)
4575 {
4576 if (blk32MipId == 0xFFFFFFFF)
4577 {
4578 blk32MipId = mip;
4579 }
4580
4581 mipCoord.w = pInfo[blk32MipId].startX;
4582 mipCoord.h = pInfo[blk32MipId].startY;
4583 mipCoord.d = pInfo[blk32MipId].startZ;
4584
4585 switch (mip - blk32MipId)
4586 {
4587 case 0:
4588 mipCoord.w += 32; // 16x16
4589 break;
4590 case 1:
4591 mipCoord.h += 32; // 8x8
4592 break;
4593 case 2:
4594 mipCoord.h += 32; // 4x4
4595 mipCoord.w += 16;
4596 break;
4597 case 3:
4598 mipCoord.h += 32; // 2x2
4599 mipCoord.w += 32;
4600 break;
4601 case 4:
4602 mipCoord.h += 32; // 1x1
4603 mipCoord.w += 48;
4604 break;
4605 // The following are for BC/ASTC formats
4606 case 5:
4607 mipCoord.h += 48; // 1/2 x 1/2
4608 break;
4609 case 6:
4610 mipCoord.h += 48; // 1/4 x 1/4
4611 mipCoord.w += 16;
4612 break;
4613 case 7:
4614 mipCoord.h += 48; // 1/8 x 1/8
4615 mipCoord.w += 32;
4616 break;
4617 case 8:
4618 mipCoord.h += 48; // 1/16 x 1/16
4619 mipCoord.w += 48;
4620 break;
4621 default:
4622 ADDR_ASSERT_ALWAYS();
4623 break;
4624 }
4625
4626 mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4627 mipHeight = mipWidth;
4628
4629 if (isThick)
4630 {
4631 mipDepth = mipWidth;
4632 }
4633 }
4634 else
4635 {
4636 if (mipWidth <= minInc)
4637 {
4638 // if we're below the minimal increment...
4639 if (isThick)
4640 {
4641 // For 3d, just go in z direction
4642 mipCoord.d += mipDepth;
4643 }
4644 else
4645 {
4646 // For 2d, first go across, then down
4647 if ((mipWidth * 2) == minInc)
4648 {
4649 // if we're 2 mips below, that's when we go back in x, and down in y
4650 mipCoord.w -= minInc;
4651 mipCoord.h += minInc;
4652 }
4653 else
4654 {
4655 // otherwise, just go across in x
4656 mipCoord.w += minInc;
4657 }
4658 }
4659 }
4660 else
4661 {
4662 // On even mip, go down, otherwise, go across
4663 if (mip & 1)
4664 {
4665 mipCoord.w += mipWidth;
4666 }
4667 else
4668 {
4669 mipCoord.h += mipHeight;
4670 }
4671 }
4672 // Divide the width by 2
4673 mipWidth >>= 1;
4674 // After the first mip in tail, the mip is always a square
4675 mipHeight = mipWidth;
4676 // ...or for 3d, a cube
4677 if (isThick)
4678 {
4679 mipDepth = mipWidth;
4680 }
4681 }
4682 }
4683 }
4684
4685 /**
4686 ************************************************************************************************************************
4687 * Gfx9Lib::GetMipStartPos
4688 *
4689 * @brief
4690 * Internal function to get out information about mip logical start position
4691 *
4692 * @return
4693 * logical start position in macro block width/heith/depth of one mip level within one slice
4694 ************************************************************************************************************************
4695 */
GetMipStartPos(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 width,UINT_32 height,UINT_32 depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 mipId,UINT_32 log2ElementBytes,UINT_32 * pMipTailBytesOffset) const4696 Dim3d Gfx9Lib::GetMipStartPos(
4697 AddrResourceType resourceType,
4698 AddrSwizzleMode swizzleMode,
4699 UINT_32 width,
4700 UINT_32 height,
4701 UINT_32 depth,
4702 UINT_32 blockWidth,
4703 UINT_32 blockHeight,
4704 UINT_32 blockDepth,
4705 UINT_32 mipId,
4706 UINT_32 log2ElementBytes,
4707 UINT_32* pMipTailBytesOffset) const
4708 {
4709 Dim3d mipStartPos = {0};
4710 const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4711
4712 // Report mip in tail if Mip0 is already in mip tail
4713 BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4714 UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
4715 UINT_32 mipIndexInTail = mipId;
4716
4717 if (inMipTail == FALSE)
4718 {
4719 // Mip 0 dimension, unit in block
4720 UINT_32 mipWidthInBlk = width / blockWidth;
4721 UINT_32 mipHeightInBlk = height / blockHeight;
4722 UINT_32 mipDepthInBlk = depth / blockDepth;
4723 AddrMajorMode majorMode = GetMajorMode(resourceType,
4724 swizzleMode,
4725 mipWidthInBlk,
4726 mipHeightInBlk,
4727 mipDepthInBlk);
4728
4729 UINT_32 endingMip = mipId + 1;
4730
4731 for (UINT_32 i = 1; i <= mipId; i++)
4732 {
4733 if ((i == 1) || (i == 3))
4734 {
4735 if (majorMode == ADDR_MAJOR_Y)
4736 {
4737 mipStartPos.w += mipWidthInBlk;
4738 }
4739 else
4740 {
4741 mipStartPos.h += mipHeightInBlk;
4742 }
4743 }
4744 else
4745 {
4746 if (majorMode == ADDR_MAJOR_X)
4747 {
4748 mipStartPos.w += mipWidthInBlk;
4749 }
4750 else if (majorMode == ADDR_MAJOR_Y)
4751 {
4752 mipStartPos.h += mipHeightInBlk;
4753 }
4754 else
4755 {
4756 mipStartPos.d += mipDepthInBlk;
4757 }
4758 }
4759
4760 BOOL_32 inTail = FALSE;
4761
4762 if (IsThick(resourceType, swizzleMode))
4763 {
4764 UINT_32 dim = log2BlkSize % 3;
4765
4766 if (dim == 0)
4767 {
4768 inTail =
4769 (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4770 }
4771 else if (dim == 1)
4772 {
4773 inTail =
4774 (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4775 }
4776 else
4777 {
4778 inTail =
4779 (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4780 }
4781 }
4782 else
4783 {
4784 if (log2BlkSize & 1)
4785 {
4786 inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4787 }
4788 else
4789 {
4790 inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4791 }
4792 }
4793
4794 if (inTail)
4795 {
4796 endingMip = i;
4797 break;
4798 }
4799
4800 mipWidthInBlk = RoundHalf(mipWidthInBlk);
4801 mipHeightInBlk = RoundHalf(mipHeightInBlk);
4802 mipDepthInBlk = RoundHalf(mipDepthInBlk);
4803 }
4804
4805 if (mipId >= endingMip)
4806 {
4807 inMipTail = TRUE;
4808 mipIndexInTail = mipId - endingMip;
4809 }
4810 }
4811
4812 if (inMipTail)
4813 {
4814 UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
4815 ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4816 *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4817 }
4818
4819 return mipStartPos;
4820 }
4821
4822 /**
4823 ************************************************************************************************************************
4824 * Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4825 *
4826 * @brief
4827 * Internal function to calculate address from coord for tiled swizzle surface
4828 *
4829 * @return
4830 * ADDR_E_RETURNCODE
4831 ************************************************************************************************************************
4832 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4833 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4834 const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4835 ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4836 ) const
4837 {
4838 ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4839 localIn.swizzleMode = pIn->swizzleMode;
4840 localIn.flags = pIn->flags;
4841 localIn.resourceType = pIn->resourceType;
4842 localIn.bpp = pIn->bpp;
4843 localIn.width = Max(pIn->unalignedWidth, 1u);
4844 localIn.height = Max(pIn->unalignedHeight, 1u);
4845 localIn.numSlices = Max(pIn->numSlices, 1u);
4846 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4847 localIn.numSamples = Max(pIn->numSamples, 1u);
4848 localIn.numFrags = Max(pIn->numFrags, 1u);
4849 if (localIn.numMipLevels <= 1)
4850 {
4851 localIn.pitchInElement = pIn->pitchInElement;
4852 }
4853
4854 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4855 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4856
4857 BOOL_32 valid = (returnCode == ADDR_OK) &&
4858 (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4859 IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4860 ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4861
4862 if (valid)
4863 {
4864 UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
4865 Dim3d mipStartPos = {0};
4866 UINT_32 mipTailBytesOffset = 0;
4867
4868 if (pIn->numMipLevels > 1)
4869 {
4870 // Mip-map chain cannot be MSAA surface
4871 ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4872
4873 mipStartPos = GetMipStartPos(pIn->resourceType,
4874 pIn->swizzleMode,
4875 localOut.pitch,
4876 localOut.height,
4877 localOut.numSlices,
4878 localOut.blockWidth,
4879 localOut.blockHeight,
4880 localOut.blockSlices,
4881 pIn->mipId,
4882 log2ElementBytes,
4883 &mipTailBytesOffset);
4884 }
4885
4886 UINT_32 interleaveOffset = 0;
4887 UINT_32 pipeBits = 0;
4888 UINT_32 pipeXor = 0;
4889 UINT_32 bankBits = 0;
4890 UINT_32 bankXor = 0;
4891
4892 if (IsThin(pIn->resourceType, pIn->swizzleMode))
4893 {
4894 UINT_32 blockOffset = 0;
4895 UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4896
4897 if (IsZOrderSwizzle(pIn->swizzleMode))
4898 {
4899 // Morton generation
4900 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4901 {
4902 UINT_32 totalLowBits = 6 - log2ElementBytes;
4903 UINT_32 mortBits = totalLowBits / 2;
4904 UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4905 // Are 9 bits enough?
4906 UINT_32 highBitsValue =
4907 MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4908 blockOffset = lowBitsValue | highBitsValue;
4909 ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4910 }
4911 else
4912 {
4913 blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4914 }
4915
4916 // Fill LSBs with sample bits
4917 if (pIn->numSamples > 1)
4918 {
4919 blockOffset *= pIn->numSamples;
4920 blockOffset |= pIn->sample;
4921 }
4922
4923 // Shift according to BytesPP
4924 blockOffset <<= log2ElementBytes;
4925 }
4926 else
4927 {
4928 // Micro block offset
4929 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4930 blockOffset = microBlockOffset;
4931
4932 // Micro block dimension
4933 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4934 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4935 // Morton generation, does 12 bit enough?
4936 blockOffset |=
4937 MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4938
4939 // Sample bits start location
4940 UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
4941 // Join sample bits information to the highest Macro block bits
4942 if (IsNonPrtXor(pIn->swizzleMode))
4943 {
4944 // Non-prt-Xor : xor highest Macro block bits with sample bits
4945 blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4946 }
4947 else
4948 {
4949 // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4950 // after this op, the blockOffset only contains log2 Macro block size bits
4951 blockOffset %= (1 << sampleStart);
4952 blockOffset |= (pIn->sample << sampleStart);
4953 ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
4954 }
4955 }
4956
4957 if (IsXor(pIn->swizzleMode))
4958 {
4959 // Mask off bits above Macro block bits to keep page synonyms working for prt
4960 if (IsPrt(pIn->swizzleMode))
4961 {
4962 blockOffset &= ((1 << log2BlkSize) - 1);
4963 }
4964
4965 // Preserve offset inside pipe interleave
4966 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4967 blockOffset >>= m_pipeInterleaveLog2;
4968
4969 // Pipe/Se xor bits
4970 pipeBits = GetPipeXorBits(log2BlkSize);
4971 // Pipe xor
4972 pipeXor = FoldXor2d(blockOffset, pipeBits);
4973 blockOffset >>= pipeBits;
4974
4975 // Bank xor bits
4976 bankBits = GetBankXorBits(log2BlkSize);
4977 // Bank Xor
4978 bankXor = FoldXor2d(blockOffset, bankBits);
4979 blockOffset >>= bankBits;
4980
4981 // Put all the part back together
4982 blockOffset <<= bankBits;
4983 blockOffset |= bankXor;
4984 blockOffset <<= pipeBits;
4985 blockOffset |= pipeXor;
4986 blockOffset <<= m_pipeInterleaveLog2;
4987 blockOffset |= interleaveOffset;
4988 }
4989
4990 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4991 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4992
4993 blockOffset |= mipTailBytesOffset;
4994
4995 if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4996 {
4997 // Apply slice xor if not MSAA/PRT
4998 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4999 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
5000 (m_pipeInterleaveLog2 + pipeBits));
5001 }
5002
5003 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
5004 bankBits, pipeBits, &blockOffset);
5005
5006 blockOffset %= (1 << log2BlkSize);
5007
5008 UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
5009 UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
5010 UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
5011 UINT_64 macroBlockIndex =
5012 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
5013 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
5014 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
5015
5016 pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
5017 }
5018 else
5019 {
5020 UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
5021
5022 Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
5023
5024 UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
5025 (pIn->y / microBlockDim.h),
5026 (pIn->slice / microBlockDim.d),
5027 8);
5028
5029 blockOffset <<= 10;
5030 blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
5031
5032 if (IsXor(pIn->swizzleMode))
5033 {
5034 // Mask off bits above Macro block bits to keep page synonyms working for prt
5035 if (IsPrt(pIn->swizzleMode))
5036 {
5037 blockOffset &= ((1 << log2BlkSize) - 1);
5038 }
5039
5040 // Preserve offset inside pipe interleave
5041 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
5042 blockOffset >>= m_pipeInterleaveLog2;
5043
5044 // Pipe/Se xor bits
5045 pipeBits = GetPipeXorBits(log2BlkSize);
5046 // Pipe xor
5047 pipeXor = FoldXor3d(blockOffset, pipeBits);
5048 blockOffset >>= pipeBits;
5049
5050 // Bank xor bits
5051 bankBits = GetBankXorBits(log2BlkSize);
5052 // Bank Xor
5053 bankXor = FoldXor3d(blockOffset, bankBits);
5054 blockOffset >>= bankBits;
5055
5056 // Put all the part back together
5057 blockOffset <<= bankBits;
5058 blockOffset |= bankXor;
5059 blockOffset <<= pipeBits;
5060 blockOffset |= pipeXor;
5061 blockOffset <<= m_pipeInterleaveLog2;
5062 blockOffset |= interleaveOffset;
5063 }
5064
5065 ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
5066 ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
5067 blockOffset |= mipTailBytesOffset;
5068
5069 returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
5070 bankBits, pipeBits, &blockOffset);
5071
5072 blockOffset %= (1 << log2BlkSize);
5073
5074 UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w;
5075 UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
5076 UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
5077
5078 UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
5079 UINT_32 sliceSizeInBlock =
5080 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
5081 UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
5082
5083 pOut->addr = blockOffset | (blockIndex << log2BlkSize);
5084 }
5085 }
5086 else
5087 {
5088 returnCode = ADDR_INVALIDPARAMS;
5089 }
5090
5091 return returnCode;
5092 }
5093
5094 /**
5095 ************************************************************************************************************************
5096 * Gfx9Lib::ComputeSurfaceInfoLinear
5097 *
5098 * @brief
5099 * Internal function to calculate padding for linear swizzle 2D/3D surface
5100 *
5101 * @return
5102 * N/A
5103 ************************************************************************************************************************
5104 */
ComputeSurfaceLinearPadding(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pMipmap0PaddedWidth,UINT_32 * pSlice0PaddedHeight,ADDR2_MIP_INFO * pMipInfo) const5105 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
5106 const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture
5107 UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element
5108 UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW
5109 ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information
5110 ) const
5111 {
5112 ADDR_E_RETURNCODE returnCode = ADDR_OK;
5113
5114 UINT_32 elementBytes = pIn->bpp >> 3;
5115 UINT_32 pitchAlignInElement = 0;
5116
5117 if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
5118 {
5119 ADDR_ASSERT(pIn->numMipLevels <= 1);
5120 ADDR_ASSERT(pIn->numSlices <= 1);
5121 pitchAlignInElement = 1;
5122 }
5123 else
5124 {
5125 pitchAlignInElement = (256 / elementBytes);
5126 }
5127
5128 UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement);
5129 UINT_32 slice0PaddedHeight = pIn->height;
5130
5131 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
5132 &mipChainWidth, &slice0PaddedHeight);
5133
5134 if (returnCode == ADDR_OK)
5135 {
5136 UINT_32 mipChainHeight = 0;
5137 UINT_32 mipHeight = pIn->height;
5138 UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
5139
5140 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
5141 {
5142 if (pMipInfo != NULL)
5143 {
5144 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
5145 pMipInfo[i].pitch = mipChainWidth;
5146 pMipInfo[i].height = mipHeight;
5147 pMipInfo[i].depth = mipDepth;
5148 }
5149
5150 mipChainHeight += mipHeight;
5151 mipHeight = RoundHalf(mipHeight);
5152 mipHeight = Max(mipHeight, 1u);
5153 }
5154
5155 *pMipmap0PaddedWidth = mipChainWidth;
5156 *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
5157 }
5158
5159 return returnCode;
5160 }
5161
5162 /**
5163 ************************************************************************************************************************
5164 * Gfx9Lib::ComputeThinBlockDimension
5165 *
5166 * @brief
5167 * Internal function to get thin block width/height/depth in element from surface input params.
5168 *
5169 * @return
5170 * N/A
5171 ************************************************************************************************************************
5172 */
ComputeThinBlockDimension(UINT_32 * pWidth,UINT_32 * pHeight,UINT_32 * pDepth,UINT_32 bpp,UINT_32 numSamples,AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const5173 VOID Gfx9Lib::ComputeThinBlockDimension(
5174 UINT_32* pWidth,
5175 UINT_32* pHeight,
5176 UINT_32* pDepth,
5177 UINT_32 bpp,
5178 UINT_32 numSamples,
5179 AddrResourceType resourceType,
5180 AddrSwizzleMode swizzleMode) const
5181 {
5182 ADDR_ASSERT(IsThin(resourceType, swizzleMode));
5183
5184 const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
5185 const UINT_32 eleBytes = bpp >> 3;
5186 const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
5187 const UINT_32 log2blkSizeIn256B = log2BlkSize - 8;
5188 const UINT_32 widthAmp = log2blkSizeIn256B / 2;
5189 const UINT_32 heightAmp = log2blkSizeIn256B - widthAmp;
5190
5191 ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
5192
5193 *pWidth = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
5194 *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
5195 *pDepth = 1;
5196
5197 if (numSamples > 1)
5198 {
5199 const UINT_32 log2sample = Log2(numSamples);
5200 const UINT_32 q = log2sample >> 1;
5201 const UINT_32 r = log2sample & 1;
5202
5203 if (log2BlkSize & 1)
5204 {
5205 *pWidth >>= q;
5206 *pHeight >>= (q + r);
5207 }
5208 else
5209 {
5210 *pWidth >>= (q + r);
5211 *pHeight >>= q;
5212 }
5213 }
5214 }
5215
5216 } // V2
5217 } // Addr
5218