xref: /aosp_15_r20/external/mesa3d/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2 ************************************************************************************************************************
3 *
4 *  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
5 *  SPDX-License-Identifier: MIT
6 *
7 ***********************************************************************************************************************/
8 
9 /**
10 ************************************************************************************************************************
11 * @file  gfx9addrlib.cpp
12 * @brief Contgfx9ns the implementation for the Gfx9Lib class.
13 ************************************************************************************************************************
14 */
15 
16 #include "gfx9addrlib.h"
17 
18 #include "gfx9_gb_reg.h"
19 
20 #include "amdgpu_asic_addr.h"
21 
22 ////////////////////////////////////////////////////////////////////////////////////////////////////
23 ////////////////////////////////////////////////////////////////////////////////////////////////////
24 
25 namespace Addr
26 {
27 
28 /**
29 ************************************************************************************************************************
30 *   Gfx9HwlInit
31 *
32 *   @brief
33 *       Creates an Gfx9Lib object.
34 *
35 *   @return
36 *       Returns an Gfx9Lib object pointer.
37 ************************************************************************************************************************
38 */
Gfx9HwlInit(const Client * pClient)39 Addr::Lib* Gfx9HwlInit(const Client* pClient)
40 {
41     return V2::Gfx9Lib::CreateObj(pClient);
42 }
43 
44 namespace V2
45 {
46 
47 ////////////////////////////////////////////////////////////////////////////////////////////////////
48 //                               Static Const Member
49 ////////////////////////////////////////////////////////////////////////////////////////////////////
50 
51 const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
52 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
53     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR
54     {{0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_S
55     {{0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_D
56     {{0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}}, // ADDR_SW_256B_R
57 
58     {{0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_Z
59     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_S
60     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_D
61     {{0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}}, // ADDR_SW_4KB_R
62 
63     {{0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_Z
64     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_S
65     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_D
66     {{0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0}}, // ADDR_SW_64KB_R
67 
68     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
69     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
70     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
71     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
72 
73     {{0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_Z_T
74     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_S_T
75     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_D_T
76     {{0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    1,    0,    0}}, // ADDR_SW_64KB_R_T
77 
78     {{0,    0,    1,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_Z_x
79     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_S_x
80     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_D_x
81     {{0,    0,    1,    0,    0,    0,    0,    0,    1,    1,    0,    0,    0}}, // ADDR_SW_4KB_R_x
82 
83     {{0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_Z_X
84     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_S_X
85     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_D_X
86     {{0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    0,    0,    0}}, // ADDR_SW_64KB_R_X
87 
88     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
89     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
90     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
91     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
92     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR_GENERAL
93 };
94 
95 const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 6, 5, 4, 3, 2, 1, 0};
96 
97 const Dim3d   Gfx9Lib::Block256_3dS[]  = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
98 
99 const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
100 
101 /**
102 ************************************************************************************************************************
103 *   Gfx9Lib::Gfx9Lib
104 *
105 *   @brief
106 *       Constructor
107 *
108 ************************************************************************************************************************
109 */
Gfx9Lib(const Client * pClient)110 Gfx9Lib::Gfx9Lib(const Client* pClient)
111     :
112     Lib(pClient)
113 {
114     memset(&m_settings, 0, sizeof(m_settings));
115     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
116     memset(m_cachedMetaEqKey, 0, sizeof(m_cachedMetaEqKey));
117     m_metaEqOverrideIndex = 0;
118 }
119 
120 /**
121 ************************************************************************************************************************
122 *   Gfx9Lib::~Gfx9Lib
123 *
124 *   @brief
125 *       Destructor
126 ************************************************************************************************************************
127 */
~Gfx9Lib()128 Gfx9Lib::~Gfx9Lib()
129 {
130 }
131 
132 /**
133 ************************************************************************************************************************
134 *   Gfx9Lib::HwlComputeHtileInfo
135 *
136 *   @brief
137 *       Interface function stub of AddrComputeHtilenfo
138 *
139 *   @return
140 *       ADDR_E_RETURNCODE
141 ************************************************************************************************************************
142 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const143 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
144     const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
145     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
146     ) const
147 {
148     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
149                                                        pIn->swizzleMode);
150 
151     UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
152 
153     UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
154 
155     if ((numPipeTotal == 1) && (numRbTotal == 1))
156     {
157         numCompressBlkPerMetaBlkLog2 = 10;
158     }
159     else
160     {
161         if (m_settings.applyAliasFix)
162         {
163             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
164         }
165         else
166         {
167             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
168         }
169     }
170 
171     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
172 
173     Dim3d   metaBlkDim   = {8, 8, 1};
174     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
175     UINT_32 widthAmp     = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
176     UINT_32 heightAmp    = totalAmpBits - widthAmp;
177     metaBlkDim.w <<= widthAmp;
178     metaBlkDim.h <<= heightAmp;
179 
180 #if DEBUG
181     Dim3d metaBlkDimDbg = {8, 8, 1};
182     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
183     {
184         if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
185             ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
186         {
187             metaBlkDimDbg.h <<= 1;
188         }
189         else
190         {
191             metaBlkDimDbg.w <<= 1;
192         }
193     }
194     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
195 #endif
196 
197     UINT_32 numMetaBlkX;
198     UINT_32 numMetaBlkY;
199     UINT_32 numMetaBlkZ;
200 
201     GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
202                    pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
203                    &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
204 
205     const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
206     UINT_32       align       = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
207 
208     if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
209     {
210         align *= (numPipeTotal >> 1);
211     }
212 
213     align = Max(align, metaBlkSize);
214 
215     if (m_settings.metaBaseAlignFix)
216     {
217         align = Max(align, GetBlockSize(pIn->swizzleMode));
218     }
219 
220     if (m_settings.htileAlignFix)
221     {
222         const INT_32 metaBlkSizeLog2        = numCompressBlkPerMetaBlkLog2 + 2;
223         const INT_32 htileCachelineSizeLog2 = 11;
224         const INT_32 maxNumOfRbMaskBits     = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
225 
226         INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
227 
228         align <<= rbMaskPadding;
229     }
230 
231     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
232     pOut->height     = numMetaBlkY * metaBlkDim.h;
233     pOut->sliceSize  = numMetaBlkX * numMetaBlkY * metaBlkSize;
234 
235     pOut->metaBlkWidth       = metaBlkDim.w;
236     pOut->metaBlkHeight      = metaBlkDim.h;
237     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
238 
239     pOut->baseAlign  = align;
240     pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
241 
242     return ADDR_OK;
243 }
244 
245 /**
246 ************************************************************************************************************************
247 *   Gfx9Lib::HwlComputeCmaskInfo
248 *
249 *   @brief
250 *       Interface function stub of AddrComputeCmaskInfo
251 *
252 *   @return
253 *       ADDR_E_RETURNCODE
254 ************************************************************************************************************************
255 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const256 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
257     const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
258     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
259     ) const
260 {
261     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
262 
263     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
264                                                        pIn->swizzleMode);
265 
266     UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
267 
268     UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
269 
270     if ((numPipeTotal == 1) && (numRbTotal == 1))
271     {
272         numCompressBlkPerMetaBlkLog2 = 13;
273     }
274     else
275     {
276         if (m_settings.applyAliasFix)
277         {
278             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
279         }
280         else
281         {
282             numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
283         }
284 
285         numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
286     }
287 
288     numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
289 
290     Dim2d metaBlkDim = {8, 8};
291     UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
292     UINT_32 heightAmp = totalAmpBits >> 1;
293     UINT_32 widthAmp = totalAmpBits - heightAmp;
294     metaBlkDim.w <<= widthAmp;
295     metaBlkDim.h <<= heightAmp;
296 
297 #if DEBUG
298     Dim2d metaBlkDimDbg = {8, 8};
299     for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
300     {
301         if (metaBlkDimDbg.h < metaBlkDimDbg.w)
302         {
303             metaBlkDimDbg.h <<= 1;
304         }
305         else
306         {
307             metaBlkDimDbg.w <<= 1;
308         }
309     }
310     ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
311 #endif
312 
313     UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
314     UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
315     UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
316 
317     UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
318 
319     if (m_settings.metaBaseAlignFix)
320     {
321         sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
322     }
323 
324     pOut->pitch      = numMetaBlkX * metaBlkDim.w;
325     pOut->height     = numMetaBlkY * metaBlkDim.h;
326     pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
327     pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
328     pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
329 
330     pOut->metaBlkWidth = metaBlkDim.w;
331     pOut->metaBlkHeight = metaBlkDim.h;
332 
333     pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
334 
335     // Get the CMASK address equation (copied from CmaskAddrFromCoord)
336     UINT_32 fmaskBpp              = GetFmaskBpp(1, 1);
337     UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
338     UINT_32 metaBlkWidthLog2      = Log2(pOut->metaBlkWidth);
339     UINT_32 metaBlkHeightLog2     = Log2(pOut->metaBlkHeight);
340 
341     MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
342                                 Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
343                                 metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
344 
345     CoordEq *eq = (CoordEq *)((Gfx9Lib *)this)->GetMetaEquation(metaEqParams);
346 
347     // Generate the CMASK address equation.
348     pOut->equation.gfx9.num_bits = Min(32u, eq->getsize());
349     bool checked = false;
350     for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) {
351        CoordTerm &bit = (*eq)[b];
352 
353        unsigned c;
354        for (c = 0; c < bit.getsize(); c++) {
355           Coordinate &coord = bit[c];
356           pOut->equation.gfx9.bit[b].coord[c].dim = coord.getdim();
357           pOut->equation.gfx9.bit[b].coord[c].ord = coord.getord();
358        }
359        for (; c < 5; c++)
360           pOut->equation.gfx9.bit[b].coord[c].dim = 5; /* meaning invalid */
361     }
362 
363     // Reduce num_bits because DIM_M fills the rest of the bits monotonically.
364     for (int b = pOut->equation.gfx9.num_bits - 1; b >= 1; b--) {
365        CoordTerm &prev = (*eq)[b - 1];
366        CoordTerm &cur = (*eq)[b];
367 
368        if (cur.getsize() == 1 && cur[0].getdim() == DIM_M &&
369           prev.getsize() == 1 && prev[0].getdim() == DIM_M &&
370           prev[0].getord() + 1 == cur[0].getord())
371           pOut->equation.gfx9.num_bits = b;
372        else
373           break;
374     }
375 
376     pOut->equation.gfx9.numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
377                                                                    pIn->swizzleMode);
378 
379     return ADDR_OK;
380 }
381 
382 /**
383 ************************************************************************************************************************
384 *   Gfx9Lib::GetMetaMipInfo
385 *
386 *   @brief
387 *       Get meta mip info
388 *
389 *   @return
390 *       N/A
391 ************************************************************************************************************************
392 */
GetMetaMipInfo(UINT_32 numMipLevels,Dim3d * pMetaBlkDim,BOOL_32 dataThick,ADDR2_META_MIP_INFO * pInfo,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 * pNumMetaBlkX,UINT_32 * pNumMetaBlkY,UINT_32 * pNumMetaBlkZ) const393 VOID Gfx9Lib::GetMetaMipInfo(
394     UINT_32 numMipLevels,           ///< [in]  number of mip levels
395     Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
396     BOOL_32 dataThick,              ///< [in]  data surface is thick
397     ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
398     UINT_32 mip0Width,              ///< [in]  mip0 width
399     UINT_32 mip0Height,             ///< [in]  mip0 height
400     UINT_32 mip0Depth,              ///< [in]  mip0 depth
401     UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
402     UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
403     UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
404     const
405 {
406     UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
407     UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
408     UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
409     UINT_32 tailWidth   = pMetaBlkDim->w;
410     UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
411     UINT_32 tailDepth   = pMetaBlkDim->d;
412     BOOL_32 inTail      = FALSE;
413     AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
414 
415     if (numMipLevels > 1)
416     {
417         if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
418         {
419             // Z major
420             major = ADDR_MAJOR_Z;
421         }
422         else if (numMetaBlkX >= numMetaBlkY)
423         {
424             // X major
425             major = ADDR_MAJOR_X;
426         }
427         else
428         {
429             // Y major
430             major = ADDR_MAJOR_Y;
431         }
432 
433         inTail = ((mip0Width <= tailWidth) &&
434                   (mip0Height <= tailHeight) &&
435                   ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
436 
437         if (inTail == FALSE)
438         {
439             UINT_32 orderLimit;
440             UINT_32 *pMipDim;
441             UINT_32 *pOrderDim;
442 
443             if (major == ADDR_MAJOR_Z)
444             {
445                 // Z major
446                 pMipDim = &numMetaBlkY;
447                 pOrderDim = &numMetaBlkZ;
448                 orderLimit = 4;
449             }
450             else if (major == ADDR_MAJOR_X)
451             {
452                 // X major
453                 pMipDim = &numMetaBlkY;
454                 pOrderDim = &numMetaBlkX;
455                 orderLimit = 4;
456             }
457             else
458             {
459                 // Y major
460                 pMipDim = &numMetaBlkX;
461                 pOrderDim = &numMetaBlkY;
462                 orderLimit = 2;
463             }
464 
465             if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
466             {
467                 *pMipDim += 2;
468             }
469             else
470             {
471                 *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
472             }
473         }
474     }
475 
476     if (pInfo != NULL)
477     {
478         UINT_32 mipWidth  = mip0Width;
479         UINT_32 mipHeight = mip0Height;
480         UINT_32 mipDepth  = mip0Depth;
481         Dim3d   mipCoord  = {0};
482 
483         for (UINT_32 mip = 0; mip < numMipLevels; mip++)
484         {
485             if (inTail)
486             {
487                 GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
488                                    pMetaBlkDim);
489                 break;
490             }
491             else
492             {
493                 mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
494                 mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
495                 mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);
496 
497                 pInfo[mip].inMiptail = FALSE;
498                 pInfo[mip].startX = mipCoord.w;
499                 pInfo[mip].startY = mipCoord.h;
500                 pInfo[mip].startZ = mipCoord.d;
501                 pInfo[mip].width  = mipWidth;
502                 pInfo[mip].height = mipHeight;
503                 pInfo[mip].depth  = dataThick ? mipDepth : 1;
504 
505                 if ((mip >= 3) || (mip & 1))
506                 {
507                     switch (major)
508                     {
509                         case ADDR_MAJOR_X:
510                             mipCoord.w += mipWidth;
511                             break;
512                         case ADDR_MAJOR_Y:
513                             mipCoord.h += mipHeight;
514                             break;
515                         case ADDR_MAJOR_Z:
516                             mipCoord.d += mipDepth;
517                             break;
518                         default:
519                             break;
520                     }
521                 }
522                 else
523                 {
524                     switch (major)
525                     {
526                         case ADDR_MAJOR_X:
527                             mipCoord.h += mipHeight;
528                             break;
529                         case ADDR_MAJOR_Y:
530                             mipCoord.w += mipWidth;
531                             break;
532                         case ADDR_MAJOR_Z:
533                             mipCoord.h += mipHeight;
534                             break;
535                         default:
536                             break;
537                     }
538                 }
539 
540                 mipWidth  = Max(mipWidth >> 1, 1u);
541                 mipHeight = Max(mipHeight >> 1, 1u);
542                 mipDepth = Max(mipDepth >> 1, 1u);
543 
544                 inTail = ((mipWidth <= tailWidth) &&
545                           (mipHeight <= tailHeight) &&
546                           ((dataThick == FALSE) || (mipDepth <= tailDepth)));
547             }
548         }
549     }
550 
551     *pNumMetaBlkX = numMetaBlkX;
552     *pNumMetaBlkY = numMetaBlkY;
553     *pNumMetaBlkZ = numMetaBlkZ;
554 }
555 
556 /**
557 ************************************************************************************************************************
558 *   Gfx9Lib::HwlComputeDccInfo
559 *
560 *   @brief
561 *       Interface function to compute DCC key info
562 *
563 *   @return
564 *       ADDR_E_RETURNCODE
565 ************************************************************************************************************************
566 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const567 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
568     const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
569     ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
570     ) const
571 {
572     BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
573     BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
574     BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
575 
576     if (dataLinear)
577     {
578         metaLinear = TRUE;
579     }
580     else if (metaLinear == TRUE)
581     {
582         pipeAligned = FALSE;
583     }
584 
585     UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
586 
587     if (metaLinear)
588     {
589         // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
590         ADDR_ASSERT_ALWAYS();
591 
592         pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
593         pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
594     }
595     else
596     {
597         BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
598 
599         UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
600 
601         UINT_32 numFrags = Max(pIn->numFrags, 1u);
602         UINT_32 numSlices = Max(pIn->numSlices, 1u);
603 
604         minMetaBlkSize /= numFrags;
605 
606         UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
607 
608         UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
609 
610         if ((numPipeTotal > 1) || (numRbTotal > 1))
611         {
612             const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
613 
614             numCompressBlkPerMetaBlk =
615                 Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
616 
617             if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
618             {
619                 numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
620             }
621         }
622 
623         Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
624         Dim3d metaBlkDim = compressBlkDim;
625 
626         for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
627         {
628             if ((metaBlkDim.h < metaBlkDim.w) ||
629                 ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
630             {
631                 if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
632                 {
633                     metaBlkDim.h <<= 1;
634                 }
635                 else
636                 {
637                     metaBlkDim.d <<= 1;
638                 }
639             }
640             else
641             {
642                 if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
643                 {
644                     metaBlkDim.w <<= 1;
645                 }
646                 else
647                 {
648                     metaBlkDim.d <<= 1;
649                 }
650             }
651         }
652 
653         UINT_32 numMetaBlkX;
654         UINT_32 numMetaBlkY;
655         UINT_32 numMetaBlkZ;
656 
657         GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
658                        pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
659                        &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
660 
661         UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
662 
663         if (numFrags > m_maxCompFrag)
664         {
665             sizeAlign *= (numFrags / m_maxCompFrag);
666         }
667 
668         if (m_settings.metaBaseAlignFix)
669         {
670             sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
671         }
672 
673         pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
674                            numCompressBlkPerMetaBlk * numFrags;
675         pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
676         pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
677 
678         pOut->pitch = numMetaBlkX * metaBlkDim.w;
679         pOut->height = numMetaBlkY * metaBlkDim.h;
680         pOut->depth = numMetaBlkZ * metaBlkDim.d;
681 
682         pOut->compressBlkWidth = compressBlkDim.w;
683         pOut->compressBlkHeight = compressBlkDim.h;
684         pOut->compressBlkDepth = compressBlkDim.d;
685 
686         pOut->metaBlkWidth = metaBlkDim.w;
687         pOut->metaBlkHeight = metaBlkDim.h;
688         pOut->metaBlkDepth = metaBlkDim.d;
689         pOut->metaBlkSize = numCompressBlkPerMetaBlk * numFrags;
690 
691         pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
692         pOut->fastClearSizePerSlice =
693             pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
694 
695         // Get the DCC address equation (copied from DccAddrFromCoord)
696         UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
697         UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
698         UINT_32 metaBlkWidthLog2  = Log2(pOut->metaBlkWidth);
699         UINT_32 metaBlkHeightLog2 = Log2(pOut->metaBlkHeight);
700         UINT_32 metaBlkDepthLog2  = Log2(pOut->metaBlkDepth);
701         UINT_32 compBlkWidthLog2  = Log2(pOut->compressBlkWidth);
702         UINT_32 compBlkHeightLog2 = Log2(pOut->compressBlkHeight);
703         UINT_32 compBlkDepthLog2  = Log2(pOut->compressBlkDepth);
704 
705         MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
706                                      Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
707                                      metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
708                                      compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
709 
710         CoordEq *eq = (CoordEq *)((Gfx9Lib *)this)->GetMetaEquation(metaEqParams);
711 
712         // Generate the DCC address equation.
713         pOut->equation.gfx9.num_bits = Min(32u, eq->getsize());
714         bool checked = false;
715         for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) {
716            CoordTerm &bit = (*eq)[b];
717 
718            unsigned c;
719            for (c = 0; c < bit.getsize(); c++) {
720               Coordinate &coord = bit[c];
721               pOut->equation.gfx9.bit[b].coord[c].dim = coord.getdim();
722               pOut->equation.gfx9.bit[b].coord[c].ord = coord.getord();
723            }
724            for (; c < 5; c++)
725               pOut->equation.gfx9.bit[b].coord[c].dim = 5; /* meaning invalid */
726         }
727 
728         // Reduce num_bits because DIM_M fills the rest of the bits monotonically.
729         for (int b = pOut->equation.gfx9.num_bits - 1; b >= 1; b--) {
730            CoordTerm &prev = (*eq)[b - 1];
731            CoordTerm &cur = (*eq)[b];
732 
733            if (cur.getsize() == 1 && cur[0].getdim() == DIM_M &&
734                prev.getsize() == 1 && prev[0].getdim() == DIM_M &&
735                prev[0].getord() + 1 == cur[0].getord())
736               pOut->equation.gfx9.num_bits = b;
737            else
738               break;
739         }
740 
741         pOut->equation.gfx9.numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
742                                                                        pIn->swizzleMode);
743     }
744 
745     return ADDR_OK;
746 }
747 
748 /**
749 ************************************************************************************************************************
750 *   Gfx9Lib::HwlComputeMaxBaseAlignments
751 *
752 *   @brief
753 *       Gets maximum alignments
754 *   @return
755 *       maximum alignments
756 ************************************************************************************************************************
757 */
HwlComputeMaxBaseAlignments() const758 UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
759 {
760     return Size64K;
761 }
762 
763 /**
764 ************************************************************************************************************************
765 *   Gfx9Lib::HwlComputeMaxMetaBaseAlignments
766 *
767 *   @brief
768 *       Gets maximum alignments for metadata
769 *   @return
770 *       maximum alignments for metadata
771 ************************************************************************************************************************
772 */
HwlComputeMaxMetaBaseAlignments() const773 UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
774 {
775     // Max base alignment for Htile
776     const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
777     const UINT_32 maxNumRbTotal   = m_se * m_rbPerSe;
778 
779     // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
780     // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
781     ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
782     const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
783 
784     UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
785 
786     if (maxNumPipeTotal > 2)
787     {
788         maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
789     }
790 
791     maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
792 
793     if (m_settings.metaBaseAlignFix)
794     {
795         maxBaseAlignHtile = Max(maxBaseAlignHtile, Size64K);
796     }
797 
798     if (m_settings.htileAlignFix)
799     {
800         maxBaseAlignHtile *= maxNumPipeTotal;
801     }
802 
803     // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
804 
805     // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
806     UINT_32 maxBaseAlignDcc3D = 65536;
807 
808     if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
809     {
810         maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
811     }
812 
813     // Max base alignment for Msaa Dcc
814     UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
815 
816     if (m_settings.metaBaseAlignFix)
817     {
818         maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, Size64K);
819     }
820 
821     return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
822 }
823 
824 /**
825 ************************************************************************************************************************
826 *   Gfx9Lib::HwlComputeCmaskAddrFromCoord
827 *
828 *   @brief
829 *       Interface function stub of AddrComputeCmaskAddrFromCoord
830 *
831 *   @return
832 *       ADDR_E_RETURNCODE
833 ************************************************************************************************************************
834 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)835 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
836     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
837     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
838 {
839     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
840     input.size            = sizeof(input);
841     input.cMaskFlags      = pIn->cMaskFlags;
842     input.colorFlags      = pIn->colorFlags;
843     input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
844     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
845     input.numSlices       = Max(pIn->numSlices, 1u);
846     input.swizzleMode     = pIn->swizzleMode;
847     input.resourceType    = pIn->resourceType;
848 
849     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
850     output.size = sizeof(output);
851 
852     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
853 
854     if (returnCode == ADDR_OK)
855     {
856         UINT_32 fmaskBpp              = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
857         UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
858         UINT_32 metaBlkWidthLog2      = Log2(output.metaBlkWidth);
859         UINT_32 metaBlkHeightLog2     = Log2(output.metaBlkHeight);
860 
861         MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
862                                      Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
863                                      metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
864 
865         const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
866 
867         UINT_32 xb = pIn->x / output.metaBlkWidth;
868         UINT_32 yb = pIn->y / output.metaBlkHeight;
869         UINT_32 zb = pIn->slice;
870 
871         UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
872         UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
873         UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
874 
875         UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, 0, blockIndex};
876         UINT_64 address  = pMetaEq->solve(coords);
877 
878         pOut->addr = address >> 1;
879         pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
880 
881 
882         UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
883                                                            pIn->swizzleMode);
884 
885         UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
886 
887         pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
888     }
889 
890     return returnCode;
891 }
892 
893 /**
894 ************************************************************************************************************************
895 *   Gfx9Lib::HwlComputeHtileAddrFromCoord
896 *
897 *   @brief
898 *       Interface function stub of AddrComputeHtileAddrFromCoord
899 *
900 *   @return
901 *       ADDR_E_RETURNCODE
902 ************************************************************************************************************************
903 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)904 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
905     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
906     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
907 {
908     ADDR_E_RETURNCODE returnCode = ADDR_OK;
909 
910     if (pIn->numMipLevels > 1)
911     {
912         returnCode = ADDR_NOTIMPLEMENTED;
913     }
914     else
915     {
916         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
917         input.size            = sizeof(input);
918         input.hTileFlags      = pIn->hTileFlags;
919         input.depthFlags      = pIn->depthflags;
920         input.swizzleMode     = pIn->swizzleMode;
921         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
922         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
923         input.numSlices       = Max(pIn->numSlices, 1u);
924         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
925 
926         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
927         output.size = sizeof(output);
928 
929         returnCode = ComputeHtileInfo(&input, &output);
930 
931         if (returnCode == ADDR_OK)
932         {
933             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
934             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
935             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
936             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
937 
938             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
939                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
940                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
941 
942             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
943 
944             UINT_32 xb = pIn->x / output.metaBlkWidth;
945             UINT_32 yb = pIn->y / output.metaBlkHeight;
946             UINT_32 zb = pIn->slice;
947 
948             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
949             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
950             UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
951 
952             UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, 0, blockIndex};
953             UINT_64 address  = pMetaEq->solve(coords);
954 
955             pOut->addr = address >> 1;
956 
957             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
958                                                                pIn->swizzleMode);
959 
960             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
961 
962             pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
963         }
964     }
965 
966     return returnCode;
967 }
968 
969 /**
970 ************************************************************************************************************************
971 *   Gfx9Lib::HwlComputeHtileCoordFromAddr
972 *
973 *   @brief
974 *       Interface function stub of AddrComputeHtileCoordFromAddr
975 *
976 *   @return
977 *       ADDR_E_RETURNCODE
978 ************************************************************************************************************************
979 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)980 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
981     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
982     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
983 {
984     ADDR_E_RETURNCODE returnCode = ADDR_OK;
985 
986     if (pIn->numMipLevels > 1)
987     {
988         returnCode = ADDR_NOTIMPLEMENTED;
989     }
990     else
991     {
992         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
993         input.size            = sizeof(input);
994         input.hTileFlags      = pIn->hTileFlags;
995         input.swizzleMode     = pIn->swizzleMode;
996         input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
997         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
998         input.numSlices       = Max(pIn->numSlices, 1u);
999         input.numMipLevels    = Max(pIn->numMipLevels, 1u);
1000 
1001         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
1002         output.size = sizeof(output);
1003 
1004         returnCode = ComputeHtileInfo(&input, &output);
1005 
1006         if (returnCode == ADDR_OK)
1007         {
1008             UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
1009             UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
1010             UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
1011             UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
1012 
1013             MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
1014                                          Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
1015                                          metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
1016 
1017             const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1018 
1019             UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
1020                                                                pIn->swizzleMode);
1021 
1022             UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1023 
1024             UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
1025 
1026             UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
1027             UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
1028 
1029             UINT_32 coords[NUM_DIMS];
1030             pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, coords);
1031 
1032             pOut->slice = coords[DIM_M] / sliceSizeInBlock;
1033             pOut->y     = ((coords[DIM_M] % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + coords[DIM_Y];
1034             pOut->x     = (coords[DIM_M] % pitchInBlock) * output.metaBlkWidth + coords[DIM_X];
1035         }
1036     }
1037 
1038     return returnCode;
1039 }
1040 
1041 /**
1042 ************************************************************************************************************************
1043 *   Gfx9Lib::HwlSupportComputeDccAddrFromCoord
1044 *
1045 *   @brief
1046 *       Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
1047 *
1048 *   @return
1049 *       ADDR_E_RETURNCODE
1050 ************************************************************************************************************************
1051 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)1052 ADDR_E_RETURNCODE Gfx9Lib::HwlSupportComputeDccAddrFromCoord(
1053     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
1054 {
1055     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1056 
1057     if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
1058     {
1059         returnCode = ADDR_NOTSUPPORTED;
1060     }
1061     else if ((pIn->pitch == 0)             ||
1062              (pIn->height == 0)            ||
1063              (pIn->compressBlkWidth == 0)  ||
1064              (pIn->compressBlkHeight == 0) ||
1065              (pIn->compressBlkDepth == 0)  ||
1066              (pIn->metaBlkWidth == 0)      ||
1067              (pIn->metaBlkHeight == 0)     ||
1068              (pIn->metaBlkDepth == 0)      ||
1069              (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
1070     {
1071         returnCode = ADDR_NOTSUPPORTED;
1072     }
1073 
1074     return returnCode;
1075 }
1076 
1077 /**
1078 ************************************************************************************************************************
1079 *   Gfx9Lib::HwlComputeDccAddrFromCoord
1080 *
1081 *   @brief
1082 *       Interface function stub of AddrComputeDccAddrFromCoord
1083 *
1084 *   @return
1085 *       N/A
1086 ************************************************************************************************************************
1087 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)1088 VOID Gfx9Lib::HwlComputeDccAddrFromCoord(
1089     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*  pIn,
1090     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
1091 {
1092     UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
1093     UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
1094     UINT_32 metaBlkWidthLog2  = Log2(pIn->metaBlkWidth);
1095     UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight);
1096     UINT_32 metaBlkDepthLog2  = Log2(pIn->metaBlkDepth);
1097     UINT_32 compBlkWidthLog2  = Log2(pIn->compressBlkWidth);
1098     UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight);
1099     UINT_32 compBlkDepthLog2  = Log2(pIn->compressBlkDepth);
1100 
1101     MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
1102                                  Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
1103                                  metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
1104                                  compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
1105 
1106     const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
1107 
1108     UINT_32 xb = pIn->x / pIn->metaBlkWidth;
1109     UINT_32 yb = pIn->y / pIn->metaBlkHeight;
1110     UINT_32 zb = pIn->slice / pIn->metaBlkDepth;
1111 
1112     UINT_32 pitchInBlock     = pIn->pitch / pIn->metaBlkWidth;
1113     UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock;
1114     UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
1115 
1116     UINT_32 coords[] = {pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex};
1117     UINT_64 address  = pMetaEq->solve(coords);
1118 
1119     pOut->addr = address >> 1;
1120 
1121     UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
1122                                                        pIn->swizzleMode);
1123 
1124     UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
1125 
1126     pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
1127 }
1128 
1129 /**
1130 ************************************************************************************************************************
1131 *   Gfx9Lib::HwlInitGlobalParams
1132 *
1133 *   @brief
1134 *       Initializes global parameters
1135 *
1136 *   @return
1137 *       TRUE if all settings are valid
1138 *
1139 ************************************************************************************************************************
1140 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)1141 BOOL_32 Gfx9Lib::HwlInitGlobalParams(
1142     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1143 {
1144     BOOL_32 valid = TRUE;
1145 
1146     if (m_settings.isArcticIsland)
1147     {
1148         GB_ADDR_CONFIG_GFX9 gbAddrConfig;
1149 
1150         gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1151 
1152         // These values are copied from CModel code
1153         switch (gbAddrConfig.bits.NUM_PIPES)
1154         {
1155             case ADDR_CONFIG_1_PIPE:
1156                 m_pipes = 1;
1157                 m_pipesLog2 = 0;
1158                 break;
1159             case ADDR_CONFIG_2_PIPE:
1160                 m_pipes = 2;
1161                 m_pipesLog2 = 1;
1162                 break;
1163             case ADDR_CONFIG_4_PIPE:
1164                 m_pipes = 4;
1165                 m_pipesLog2 = 2;
1166                 break;
1167             case ADDR_CONFIG_8_PIPE:
1168                 m_pipes = 8;
1169                 m_pipesLog2 = 3;
1170                 break;
1171             case ADDR_CONFIG_16_PIPE:
1172                 m_pipes = 16;
1173                 m_pipesLog2 = 4;
1174                 break;
1175             case ADDR_CONFIG_32_PIPE:
1176                 m_pipes = 32;
1177                 m_pipesLog2 = 5;
1178                 break;
1179             default:
1180                 ADDR_ASSERT_ALWAYS();
1181                 break;
1182         }
1183 
1184         switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1185         {
1186             case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1187                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
1188                 m_pipeInterleaveLog2 = 8;
1189                 break;
1190             case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1191                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
1192                 m_pipeInterleaveLog2 = 9;
1193                 break;
1194             case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1195                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
1196                 m_pipeInterleaveLog2 = 10;
1197                 break;
1198             case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1199                 m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
1200                 m_pipeInterleaveLog2 = 11;
1201                 break;
1202             default:
1203                 ADDR_ASSERT_ALWAYS();
1204                 break;
1205         }
1206 
1207         // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
1208         // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
1209         ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
1210 
1211         switch (gbAddrConfig.bits.NUM_BANKS)
1212         {
1213             case ADDR_CONFIG_1_BANK:
1214                 m_banks = 1;
1215                 m_banksLog2 = 0;
1216                 break;
1217             case ADDR_CONFIG_2_BANK:
1218                 m_banks = 2;
1219                 m_banksLog2 = 1;
1220                 break;
1221             case ADDR_CONFIG_4_BANK:
1222                 m_banks = 4;
1223                 m_banksLog2 = 2;
1224                 break;
1225             case ADDR_CONFIG_8_BANK:
1226                 m_banks = 8;
1227                 m_banksLog2 = 3;
1228                 break;
1229             case ADDR_CONFIG_16_BANK:
1230                 m_banks = 16;
1231                 m_banksLog2 = 4;
1232                 break;
1233             default:
1234                 ADDR_ASSERT_ALWAYS();
1235                 break;
1236         }
1237 
1238         switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
1239         {
1240             case ADDR_CONFIG_1_SHADER_ENGINE:
1241                 m_se = 1;
1242                 m_seLog2 = 0;
1243                 break;
1244             case ADDR_CONFIG_2_SHADER_ENGINE:
1245                 m_se = 2;
1246                 m_seLog2 = 1;
1247                 break;
1248             case ADDR_CONFIG_4_SHADER_ENGINE:
1249                 m_se = 4;
1250                 m_seLog2 = 2;
1251                 break;
1252             case ADDR_CONFIG_8_SHADER_ENGINE:
1253                 m_se = 8;
1254                 m_seLog2 = 3;
1255                 break;
1256             default:
1257                 ADDR_ASSERT_ALWAYS();
1258                 break;
1259         }
1260 
1261         switch (gbAddrConfig.bits.NUM_RB_PER_SE)
1262         {
1263             case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
1264                 m_rbPerSe = 1;
1265                 m_rbPerSeLog2 = 0;
1266                 break;
1267             case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
1268                 m_rbPerSe = 2;
1269                 m_rbPerSeLog2 = 1;
1270                 break;
1271             case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
1272                 m_rbPerSe = 4;
1273                 m_rbPerSeLog2 = 2;
1274                 break;
1275             default:
1276                 ADDR_ASSERT_ALWAYS();
1277                 break;
1278         }
1279 
1280         switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
1281         {
1282             case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
1283                 m_maxCompFrag = 1;
1284                 m_maxCompFragLog2 = 0;
1285                 break;
1286             case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
1287                 m_maxCompFrag = 2;
1288                 m_maxCompFragLog2 = 1;
1289                 break;
1290             case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
1291                 m_maxCompFrag = 4;
1292                 m_maxCompFragLog2 = 2;
1293                 break;
1294             case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
1295                 m_maxCompFrag = 8;
1296                 m_maxCompFragLog2 = 3;
1297                 break;
1298             default:
1299                 ADDR_ASSERT_ALWAYS();
1300                 break;
1301         }
1302 
1303         if ((m_rbPerSeLog2 == 1) &&
1304             (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
1305              ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
1306         {
1307             ADDR_ASSERT(m_settings.isVega10 == FALSE);
1308             ADDR_ASSERT(m_settings.isRaven == FALSE);
1309             ADDR_ASSERT(m_settings.isVega20 == FALSE);
1310             if (m_settings.isVega12)
1311             {
1312                 m_settings.htileCacheRbConflict = 1;
1313             }
1314         }
1315 
1316         // For simplicity we never allow VAR swizzle mode for GFX9, the actural value is 18 on GFX9
1317         m_blockVarSizeLog2 = 0;
1318     }
1319     else
1320     {
1321         valid = FALSE;
1322         ADDR_NOT_IMPLEMENTED();
1323     }
1324 
1325     if (valid)
1326     {
1327         InitEquationTable();
1328     }
1329 
1330     return valid;
1331 }
1332 
1333 /**
1334 ************************************************************************************************************************
1335 *   Gfx9Lib::HwlConvertChipFamily
1336 *
1337 *   @brief
1338 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1339 *   @return
1340 *       ChipFamily
1341 ************************************************************************************************************************
1342 */
HwlConvertChipFamily(UINT_32 uChipFamily,UINT_32 uChipRevision)1343 ChipFamily Gfx9Lib::HwlConvertChipFamily(
1344     UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
1345     UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1346 {
1347     ChipFamily family = ADDR_CHIP_FAMILY_AI;
1348 
1349     switch (uChipFamily)
1350     {
1351         case FAMILY_AI:
1352             m_settings.isArcticIsland = 1;
1353             m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
1354             m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
1355             m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
1356             m_settings.isDce12 = 1;
1357 
1358             if (m_settings.isVega10 == 0)
1359             {
1360                 m_settings.htileAlignFix = 1;
1361                 m_settings.applyAliasFix = 1;
1362             }
1363 
1364             m_settings.metaBaseAlignFix = 1;
1365 
1366             m_settings.depthPipeXorDisable = 1;
1367             break;
1368         case FAMILY_RV:
1369             m_settings.isArcticIsland = 1;
1370 
1371             if (ASICREV_IS_RAVEN(uChipRevision))
1372             {
1373                 m_settings.isRaven = 1;
1374 
1375                 m_settings.depthPipeXorDisable = 1;
1376             }
1377 
1378             if (ASICREV_IS_RAVEN2(uChipRevision))
1379             {
1380                 m_settings.isRaven = 1;
1381             }
1382 
1383             if (m_settings.isRaven == 0)
1384             {
1385                 m_settings.htileAlignFix = 1;
1386                 m_settings.applyAliasFix = 1;
1387             }
1388 
1389             m_settings.isDcn1 = m_settings.isRaven;
1390 
1391             if (ASICREV_IS_RENOIR(uChipRevision))
1392             {
1393                 m_settings.isRaven = 1;
1394                 m_settings.isDcn2  = 1;
1395             }
1396 
1397             m_settings.metaBaseAlignFix = 1;
1398             break;
1399 
1400         default:
1401             ADDR_ASSERT(!"No Chip found");
1402             break;
1403     }
1404 
1405     return family;
1406 }
1407 
1408 /**
1409 ************************************************************************************************************************
1410 *   Gfx9Lib::InitRbEquation
1411 *
1412 *   @brief
1413 *       Init RB equation
1414 *   @return
1415 *       N/A
1416 ************************************************************************************************************************
1417 */
GetRbEquation(CoordEq * pRbEq,UINT_32 numRbPerSeLog2,UINT_32 numSeLog2) const1418 VOID Gfx9Lib::GetRbEquation(
1419     CoordEq* pRbEq,             ///< [out] rb equation
1420     UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
1421     UINT_32  numSeLog2)         ///< [in] number of shader engine
1422     const
1423 {
1424     // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
1425     UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
1426     Coordinate cx(DIM_X, rbRegion);
1427     Coordinate cy(DIM_Y, rbRegion);
1428 
1429     UINT_32 start = 0;
1430     UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
1431 
1432     // Clear the rb equation
1433     pRbEq->resize(0);
1434     pRbEq->resize(numRbTotalLog2);
1435 
1436     if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
1437     {
1438         // Special case when more than 1 SE, and 2 RB per SE
1439         (*pRbEq)[0].add(cx);
1440         (*pRbEq)[0].add(cy);
1441         cx++;
1442         cy++;
1443 
1444         if (m_settings.applyAliasFix == false)
1445         {
1446             (*pRbEq)[0].add(cy);
1447         }
1448 
1449         (*pRbEq)[0].add(cy);
1450         start++;
1451     }
1452 
1453     UINT_32 numBits = 2 * (numRbTotalLog2 - start);
1454 
1455     for (UINT_32 i = 0; i < numBits; i++)
1456     {
1457         UINT_32 idx =
1458             start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
1459 
1460         if ((i % 2) == 1)
1461         {
1462             (*pRbEq)[idx].add(cx);
1463             cx++;
1464         }
1465         else
1466         {
1467             (*pRbEq)[idx].add(cy);
1468             cy++;
1469         }
1470     }
1471 }
1472 
1473 /**
1474 ************************************************************************************************************************
1475 *   Gfx9Lib::GetDataEquation
1476 *
1477 *   @brief
1478 *       Get data equation for fmask and Z
1479 *   @return
1480 *       N/A
1481 ************************************************************************************************************************
1482 */
GetDataEquation(CoordEq * pDataEq,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2) const1483 VOID Gfx9Lib::GetDataEquation(
1484     CoordEq* pDataEq,               ///< [out] data surface equation
1485     Gfx9DataType dataSurfaceType,   ///< [in] data surface type
1486     AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
1487     AddrResourceType resourceType,  ///< [in] data surface resource type
1488     UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
1489     UINT_32 numSamplesLog2)         ///< [in] data surface sample count
1490     const
1491 {
1492     Coordinate cx(DIM_X, 0);
1493     Coordinate cy(DIM_Y, 0);
1494     Coordinate cz(DIM_Z, 0);
1495     Coordinate cs(DIM_S, 0);
1496 
1497     // Clear the equation
1498     pDataEq->resize(0);
1499     pDataEq->resize(27);
1500 
1501     if (dataSurfaceType == Gfx9DataColor)
1502     {
1503         if (IsLinear(swizzleMode))
1504         {
1505             Coordinate cm(DIM_M, 0);
1506 
1507             pDataEq->resize(49);
1508 
1509             for (UINT_32 i = 0; i < 49; i++)
1510             {
1511                 (*pDataEq)[i].add(cm);
1512                 cm++;
1513             }
1514         }
1515         else if (IsThick(resourceType, swizzleMode))
1516         {
1517             // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
1518             UINT_32 i;
1519             if (IsStandardSwizzle(resourceType, swizzleMode))
1520             {
1521                 // Standard 3d swizzle
1522                 // Fill in bottom x bits
1523                 for (i = elementBytesLog2; i < 4; i++)
1524                 {
1525                     (*pDataEq)[i].add(cx);
1526                     cx++;
1527                 }
1528                 // Fill in 2 bits of y and then z
1529                 for (i = 4; i < 6; i++)
1530                 {
1531                     (*pDataEq)[i].add(cy);
1532                     cy++;
1533                 }
1534                 for (i = 6; i < 8; i++)
1535                 {
1536                     (*pDataEq)[i].add(cz);
1537                     cz++;
1538                 }
1539                 if (elementBytesLog2 < 2)
1540                 {
1541                     // fill in z & y bit
1542                     (*pDataEq)[8].add(cz);
1543                     (*pDataEq)[9].add(cy);
1544                     cz++;
1545                     cy++;
1546                 }
1547                 else if (elementBytesLog2 == 2)
1548                 {
1549                     // fill in y and x bit
1550                     (*pDataEq)[8].add(cy);
1551                     (*pDataEq)[9].add(cx);
1552                     cy++;
1553                     cx++;
1554                 }
1555                 else
1556                 {
1557                     // fill in 2 x bits
1558                     (*pDataEq)[8].add(cx);
1559                     cx++;
1560                     (*pDataEq)[9].add(cx);
1561                     cx++;
1562                 }
1563             }
1564             else
1565             {
1566                 // Z 3d swizzle
1567                 UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
1568                 UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
1569                                 2 : ((elementBytesLog2 == 1) ? 3 : 1);
1570                 pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
1571                 for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
1572                 {
1573                     (*pDataEq)[i].add(cz);
1574                     cz++;
1575                 }
1576                 if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
1577                 {
1578                     // add an x and z
1579                     (*pDataEq)[6].add(cx);
1580                     (*pDataEq)[7].add(cz);
1581                     cx++;
1582                     cz++;
1583                 }
1584                 else if (elementBytesLog2 == 2)
1585                 {
1586                     // add a y and z
1587                     (*pDataEq)[6].add(cy);
1588                     (*pDataEq)[7].add(cz);
1589                     cy++;
1590                     cz++;
1591                 }
1592                 // add y and x
1593                 (*pDataEq)[8].add(cy);
1594                 (*pDataEq)[9].add(cx);
1595                 cy++;
1596                 cx++;
1597             }
1598             // Fill in bit 10 and up
1599             pDataEq->mort3d( cz, cy, cx, 10 );
1600         }
1601         else if (IsThin(resourceType, swizzleMode))
1602         {
1603             UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1604             // Color 2D
1605             UINT_32 microYBits = (8 - elementBytesLog2) / 2;
1606             UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
1607             UINT_32 i;
1608             // Fill in bottom x bits
1609             for (i = elementBytesLog2; i < 4; i++)
1610             {
1611                 (*pDataEq)[i].add(cx);
1612                 cx++;
1613             }
1614             // Fill in bottom y bits
1615             for (i = 4; i < 4 + microYBits; i++)
1616             {
1617                 (*pDataEq)[i].add(cy);
1618                 cy++;
1619             }
1620             // Fill in last of the micro_x bits
1621             for (i = 4 + microYBits; i < 8; i++)
1622             {
1623                 (*pDataEq)[i].add(cx);
1624                 cx++;
1625             }
1626             // Fill in x/y bits below sample split
1627             pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
1628             // Fill in sample bits
1629             for (i = 0; i < numSamplesLog2; i++)
1630             {
1631                 cs.set(DIM_S, i);
1632                 (*pDataEq)[tileSplitStart + i].add(cs);
1633             }
1634             // Fill in x/y bits above sample split
1635             if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
1636             {
1637                 pDataEq->mort2d(cx, cy, blockSizeLog2);
1638             }
1639             else
1640             {
1641                 pDataEq->mort2d(cy, cx, blockSizeLog2);
1642             }
1643         }
1644         else
1645         {
1646             ADDR_ASSERT_ALWAYS();
1647         }
1648     }
1649     else
1650     {
1651         // Fmask or depth
1652         UINT_32 sampleStart = elementBytesLog2;
1653         UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
1654         UINT_32 ymajStart = 6 + numSamplesLog2;
1655 
1656         for (UINT_32 s = 0; s < numSamplesLog2; s++)
1657         {
1658             cs.set(DIM_S, s);
1659             (*pDataEq)[sampleStart + s].add(cs);
1660         }
1661 
1662         // Put in the x-major order pixel bits
1663         pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
1664         // Put in the y-major order pixel bits
1665         pDataEq->mort2d(cy, cx, ymajStart);
1666     }
1667 }
1668 
1669 /**
1670 ************************************************************************************************************************
1671 *   Gfx9Lib::GetPipeEquation
1672 *
1673 *   @brief
1674 *       Get pipe equation
1675 *   @return
1676 *       N/A
1677 ************************************************************************************************************************
1678 */
GetPipeEquation(CoordEq * pPipeEq,CoordEq * pDataEq,UINT_32 pipeInterleaveLog2,UINT_32 numPipeLog2,UINT_32 numSamplesLog2,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType) const1679 VOID Gfx9Lib::GetPipeEquation(
1680     CoordEq*         pPipeEq,            ///< [out] pipe equation
1681     CoordEq*         pDataEq,            ///< [in] data equation
1682     UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
1683     UINT_32          numPipeLog2,        ///< [in] number of pipes
1684     UINT_32          numSamplesLog2,     ///< [in] data surface sample count
1685     Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
1686     AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
1687     AddrResourceType resourceType        ///< [in] data surface resource type
1688     ) const
1689 {
1690     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
1691     CoordEq dataEq;
1692 
1693     pDataEq->copy(dataEq);
1694 
1695     if (dataSurfaceType == Gfx9DataColor)
1696     {
1697         INT_32 shift = static_cast<INT_32>(numSamplesLog2);
1698         dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
1699     }
1700 
1701     dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
1702 
1703     // This section should only apply to z/stencil, maybe fmask
1704     // If the pipe bit is below the comp block size,
1705     // then keep moving up the address until we find a bit that is above
1706     UINT_32 pipeStart = 0;
1707 
1708     if (dataSurfaceType != Gfx9DataColor)
1709     {
1710         Coordinate tileMin(DIM_X, 3);
1711 
1712         while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
1713         {
1714             pipeStart++;
1715         }
1716 
1717         // if pipe is 0, then the first pipe bit is above the comp block size,
1718         // so we don't need to do anything
1719         // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
1720         // we will get the same pipe equation
1721         if (pipeStart != 0)
1722         {
1723             for (UINT_32 i = 0; i < numPipeLog2; i++)
1724             {
1725                 // Copy the jth bit above pipe interleave to the current pipe equation bit
1726                 dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
1727             }
1728         }
1729     }
1730 
1731     if (IsPrt(swizzleMode))
1732     {
1733         // Clear out bits above the block size if prt's are enabled
1734         dataEq.resize(blockSizeLog2);
1735         dataEq.resize(48);
1736     }
1737 
1738     if (IsXor(swizzleMode))
1739     {
1740         CoordEq xorMask;
1741 
1742         if (IsThick(resourceType, swizzleMode))
1743         {
1744             CoordEq xorMask2;
1745 
1746             dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
1747 
1748             xorMask.resize(numPipeLog2);
1749 
1750             for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1751             {
1752                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
1753                 xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
1754             }
1755         }
1756         else
1757         {
1758             // Xor in the bits above the pipe+gpu bits
1759             dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
1760 
1761             if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
1762             {
1763                 Coordinate co;
1764                 CoordEq xorMask2;
1765                 // if 1xaa and not prt, then xor in the z bits
1766                 xorMask2.resize(0);
1767                 xorMask2.resize(numPipeLog2);
1768                 for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
1769                 {
1770                     co.set(DIM_Z, numPipeLog2 - 1 - pipeIdx);
1771                     xorMask2[pipeIdx].add(co);
1772                 }
1773 
1774                 pPipeEq->xorin(xorMask2);
1775             }
1776         }
1777 
1778         xorMask.reverse();
1779         pPipeEq->xorin(xorMask);
1780     }
1781 }
1782 /**
1783 ************************************************************************************************************************
1784 *   Gfx9Lib::GetMetaEquation
1785 *
1786 *   @brief
1787 *       Get meta equation for cmask/htile/DCC
1788 *   @return
1789 *       Pointer to a calculated meta equation
1790 ************************************************************************************************************************
1791 */
GetMetaEquation(const MetaEqParams & metaEqParams)1792 const CoordEq* Gfx9Lib::GetMetaEquation(
1793     const MetaEqParams& metaEqParams)
1794 {
1795     UINT_32 cachedMetaEqIndex;
1796 
1797     for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
1798     {
1799         if (memcmp(&metaEqParams,
1800                    &m_cachedMetaEqKey[cachedMetaEqIndex],
1801                    static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
1802         {
1803             break;
1804         }
1805     }
1806 
1807     CoordEq* pMetaEq = NULL;
1808 
1809     if (cachedMetaEqIndex < MaxCachedMetaEq)
1810     {
1811         pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
1812     }
1813     else
1814     {
1815         m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
1816 
1817         pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
1818 
1819         m_metaEqOverrideIndex %= MaxCachedMetaEq;
1820 
1821         GenMetaEquation(pMetaEq,
1822                         metaEqParams.maxMip,
1823                         metaEqParams.elementBytesLog2,
1824                         metaEqParams.numSamplesLog2,
1825                         metaEqParams.metaFlag,
1826                         metaEqParams.dataSurfaceType,
1827                         metaEqParams.swizzleMode,
1828                         metaEqParams.resourceType,
1829                         metaEqParams.metaBlkWidthLog2,
1830                         metaEqParams.metaBlkHeightLog2,
1831                         metaEqParams.metaBlkDepthLog2,
1832                         metaEqParams.compBlkWidthLog2,
1833                         metaEqParams.compBlkHeightLog2,
1834                         metaEqParams.compBlkDepthLog2);
1835     }
1836 
1837     return pMetaEq;
1838 }
1839 
1840 /**
1841 ************************************************************************************************************************
1842 *   Gfx9Lib::GenMetaEquation
1843 *
1844 *   @brief
1845 *       Get meta equation for cmask/htile/DCC
1846 *   @return
1847 *       N/A
1848 ************************************************************************************************************************
1849 */
GenMetaEquation(CoordEq * pMetaEq,UINT_32 maxMip,UINT_32 elementBytesLog2,UINT_32 numSamplesLog2,ADDR2_META_FLAGS metaFlag,Gfx9DataType dataSurfaceType,AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 metaBlkWidthLog2,UINT_32 metaBlkHeightLog2,UINT_32 metaBlkDepthLog2,UINT_32 compBlkWidthLog2,UINT_32 compBlkHeightLog2,UINT_32 compBlkDepthLog2) const1850 VOID Gfx9Lib::GenMetaEquation(
1851     CoordEq*         pMetaEq,               ///< [out] meta equation
1852     UINT_32          maxMip,                ///< [in] max mip Id
1853     UINT_32          elementBytesLog2,      ///< [in] data surface element bytes
1854     UINT_32          numSamplesLog2,        ///< [in] data surface sample count
1855     ADDR2_META_FLAGS metaFlag,              ///< [in] meta falg
1856     Gfx9DataType     dataSurfaceType,       ///< [in] data surface type
1857     AddrSwizzleMode  swizzleMode,           ///< [in] data surface swizzle mode
1858     AddrResourceType resourceType,          ///< [in] data surface resource type
1859     UINT_32          metaBlkWidthLog2,      ///< [in] meta block width
1860     UINT_32          metaBlkHeightLog2,     ///< [in] meta block height
1861     UINT_32          metaBlkDepthLog2,      ///< [in] meta block depth
1862     UINT_32          compBlkWidthLog2,      ///< [in] compress block width
1863     UINT_32          compBlkHeightLog2,     ///< [in] compress block height
1864     UINT_32          compBlkDepthLog2)      ///< [in] compress block depth
1865     const
1866 {
1867     UINT_32 numPipeTotalLog2   = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
1868     UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
1869 
1870     // Get the correct data address and rb equation
1871     CoordEq dataEq;
1872     GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
1873                     elementBytesLog2, numSamplesLog2);
1874 
1875     // Get pipe and rb equations
1876     CoordEq pipeEquation;
1877     GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
1878                     numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
1879     numPipeTotalLog2 = pipeEquation.getsize();
1880 
1881     if (metaFlag.linear)
1882     {
1883         // Linear metadata supporting was removed for GFX9! No one can use this feature.
1884         ADDR_ASSERT_ALWAYS();
1885 
1886         ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
1887 
1888         dataEq.copy(*pMetaEq);
1889 
1890         if (IsLinear(swizzleMode))
1891         {
1892             if (metaFlag.pipeAligned)
1893             {
1894                 // Remove the pipe bits
1895                 INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
1896                 pMetaEq->shift(-shift, pipeInterleaveLog2);
1897             }
1898             // Divide by comp block size, which for linear (which is always color) is 256 B
1899             pMetaEq->shift(-8);
1900 
1901             if (metaFlag.pipeAligned)
1902             {
1903                 // Put pipe bits back in
1904                 pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
1905 
1906                 for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
1907                 {
1908                     pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
1909                 }
1910             }
1911         }
1912 
1913         pMetaEq->shift(1);
1914     }
1915     else
1916     {
1917         UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
1918         UINT_32 compFragLog2 =
1919             ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
1920             maxCompFragLog2 : numSamplesLog2;
1921 
1922         UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
1923 
1924         // Make sure the metaaddr is cleared
1925         pMetaEq->resize(0);
1926         pMetaEq->resize(27);
1927 
1928         if (IsThick(resourceType, swizzleMode))
1929         {
1930             Coordinate cx(DIM_X, 0);
1931             Coordinate cy(DIM_Y, 0);
1932             Coordinate cz(DIM_Z, 0);
1933 
1934             if (maxMip > 0)
1935             {
1936                 pMetaEq->mort3d(cy, cx, cz);
1937             }
1938             else
1939             {
1940                 pMetaEq->mort3d(cx, cy, cz);
1941             }
1942         }
1943         else
1944         {
1945             Coordinate cx(DIM_X, 0);
1946             Coordinate cy(DIM_Y, 0);
1947             Coordinate cs;
1948 
1949             if (maxMip > 0)
1950             {
1951                 pMetaEq->mort2d(cy, cx, compFragLog2);
1952             }
1953             else
1954             {
1955                 pMetaEq->mort2d(cx, cy, compFragLog2);
1956             }
1957 
1958             //------------------------------------------------------------------------------------------------------------------------
1959             // Put the compressible fragments at the lsb
1960             // the uncompressible frags will be at the msb of the micro address
1961             //------------------------------------------------------------------------------------------------------------------------
1962             for (UINT_32 s = 0; s < compFragLog2; s++)
1963             {
1964                 cs.set(DIM_S, s);
1965                 (*pMetaEq)[s].add(cs);
1966             }
1967         }
1968 
1969         // Keep a copy of the pipe equations
1970         CoordEq origPipeEquation;
1971         pipeEquation.copy(origPipeEquation);
1972 
1973         Coordinate co;
1974         // filter out everything under the compressed block size
1975         co.set(DIM_X, compBlkWidthLog2);
1976         pMetaEq->Filter('<', co, 0, DIM_X);
1977         co.set(DIM_Y, compBlkHeightLog2);
1978         pMetaEq->Filter('<', co, 0, DIM_Y);
1979         co.set(DIM_Z, compBlkDepthLog2);
1980         pMetaEq->Filter('<', co, 0, DIM_Z);
1981 
1982         // For non-color, filter out sample bits
1983         if (dataSurfaceType != Gfx9DataColor)
1984         {
1985             co.set(DIM_X, 0);
1986             pMetaEq->Filter('<', co, 0, DIM_S);
1987         }
1988 
1989         // filter out everything above the metablock size
1990         co.set(DIM_X, metaBlkWidthLog2 - 1);
1991         pMetaEq->Filter('>', co, 0, DIM_X);
1992         co.set(DIM_Y, metaBlkHeightLog2 - 1);
1993         pMetaEq->Filter('>', co, 0, DIM_Y);
1994         co.set(DIM_Z, metaBlkDepthLog2 - 1);
1995         pMetaEq->Filter('>', co, 0, DIM_Z);
1996 
1997         // filter out everything above the metablock size for the channel bits
1998         co.set(DIM_X, metaBlkWidthLog2 - 1);
1999         pipeEquation.Filter('>', co, 0, DIM_X);
2000         co.set(DIM_Y, metaBlkHeightLog2 - 1);
2001         pipeEquation.Filter('>', co, 0, DIM_Y);
2002         co.set(DIM_Z, metaBlkDepthLog2 - 1);
2003         pipeEquation.Filter('>', co, 0, DIM_Z);
2004 
2005         // Make sure we still have the same number of channel bits
2006         if (pipeEquation.getsize() != numPipeTotalLog2)
2007         {
2008             ADDR_ASSERT_ALWAYS();
2009         }
2010 
2011         // Loop through all channel and rb bits,
2012         // and make sure these components exist in the metadata address
2013         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2014         {
2015             for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
2016             {
2017                 if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
2018                 {
2019                     ADDR_ASSERT_ALWAYS();
2020                 }
2021             }
2022         }
2023 
2024         const UINT_32 numSeLog2     = metaFlag.rbAligned ? m_seLog2      : 0;
2025         const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
2026         const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
2027         CoordEq       origRbEquation;
2028 
2029         GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
2030 
2031         CoordEq rbEquation = origRbEquation;
2032 
2033         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2034         {
2035             for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
2036             {
2037                 if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
2038                 {
2039                     ADDR_ASSERT_ALWAYS();
2040                 }
2041             }
2042         }
2043 
2044         if (m_settings.applyAliasFix)
2045         {
2046             co.set(DIM_Z, -1);
2047         }
2048 
2049         // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
2050         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2051         {
2052             for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
2053             {
2054                 BOOL_32 isRbEquationInPipeEquation = FALSE;
2055 
2056                 if (m_settings.applyAliasFix)
2057                 {
2058                     CoordTerm filteredPipeEq;
2059                     filteredPipeEq = pipeEquation[j];
2060 
2061                     filteredPipeEq.Filter('>', co, 0, DIM_Z);
2062 
2063                     isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
2064                 }
2065                 else
2066                 {
2067                     isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
2068                 }
2069 
2070                 if (isRbEquationInPipeEquation)
2071                 {
2072                     rbEquation[i].Clear();
2073                 }
2074             }
2075         }
2076 
2077          bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
2078 
2079         // Loop through each bit of the channel, get the smallest coordinate,
2080         // and remove it from the metaaddr, and rb_equation
2081         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2082         {
2083             pipeEquation[i].getsmallest(co);
2084 
2085             UINT_32 old_size = pMetaEq->getsize();
2086             pMetaEq->Filter('=', co);
2087             UINT_32 new_size = pMetaEq->getsize();
2088             if (new_size != old_size-1)
2089             {
2090                 ADDR_ASSERT_ALWAYS();
2091             }
2092             pipeEquation.remove(co);
2093             for (UINT_32 j = 0; j < numRbTotalLog2; j++)
2094             {
2095                 if (rbEquation[j].remove(co))
2096                 {
2097                     // if we actually removed something from this bit, then add the remaining
2098                     // channel bits, as these can be removed for this bit
2099                     for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
2100                     {
2101                         if (pipeEquation[i][k] != co)
2102                         {
2103                             rbEquation[j].add(pipeEquation[i][k]);
2104                             rbAppendedWithPipeBits[j] = true;
2105                         }
2106                     }
2107                 }
2108             }
2109         }
2110 
2111         // Loop through the rb bits and see what remain;
2112         // filter out the smallest coordinate if it remains
2113         UINT_32 rbBitsLeft = 0;
2114         for (UINT_32 i = 0; i < numRbTotalLog2; i++)
2115         {
2116             BOOL_32 isRbEqAppended = FALSE;
2117 
2118             if (m_settings.applyAliasFix)
2119             {
2120                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2121             }
2122             else
2123             {
2124                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2125             }
2126 
2127             if (isRbEqAppended)
2128             {
2129                 rbBitsLeft++;
2130                 rbEquation[i].getsmallest(co);
2131                 UINT_32 old_size = pMetaEq->getsize();
2132                 pMetaEq->Filter('=', co);
2133                 UINT_32 new_size = pMetaEq->getsize();
2134                 if (new_size != old_size - 1)
2135                 {
2136                     // assert warning
2137                 }
2138                 for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
2139                 {
2140                     if (rbEquation[j].remove(co))
2141                     {
2142                         // if we actually removed something from this bit, then add the remaining
2143                         // rb bits, as these can be removed for this bit
2144                         for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
2145                         {
2146                             if (rbEquation[i][k] != co)
2147                             {
2148                                 rbEquation[j].add(rbEquation[i][k]);
2149                                 rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
2150                             }
2151                         }
2152                     }
2153                 }
2154             }
2155         }
2156 
2157         // capture the size of the metaaddr
2158         UINT_32 metaSize = pMetaEq->getsize();
2159         // resize to 49 bits...make this a nibble address
2160         pMetaEq->resize(49);
2161         // Concatenate the macro address above the current address
2162         for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
2163         {
2164             co.set(DIM_M, j);
2165             (*pMetaEq)[i].add(co);
2166         }
2167 
2168         // Multiply by meta element size (in nibbles)
2169         if (dataSurfaceType == Gfx9DataColor)
2170         {
2171             pMetaEq->shift(1);
2172         }
2173         else if (dataSurfaceType == Gfx9DataDepthStencil)
2174         {
2175             pMetaEq->shift(3);
2176         }
2177 
2178         //------------------------------------------------------------------------------------------
2179         // Note the pipeInterleaveLog2+1 is because address is a nibble address
2180         // Shift up from pipe interleave number of channel
2181         // and rb bits left, and uncompressed fragments
2182         //------------------------------------------------------------------------------------------
2183 
2184         pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
2185 
2186         // Put in the channel bits
2187         for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
2188         {
2189             origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
2190         }
2191 
2192         // Put in remaining rb bits
2193         for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
2194         {
2195             BOOL_32 isRbEqAppended = FALSE;
2196 
2197             if (m_settings.applyAliasFix)
2198             {
2199                 isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
2200             }
2201             else
2202             {
2203                 isRbEqAppended = (rbEquation[i].getsize() > 0);
2204             }
2205 
2206             if (isRbEqAppended)
2207             {
2208                 origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
2209                 // Mark any rb bit we add in to the rb mask
2210                 j++;
2211             }
2212         }
2213 
2214         //------------------------------------------------------------------------------------------
2215         // Put in the uncompressed fragment bits
2216         //------------------------------------------------------------------------------------------
2217         for (UINT_32 i = 0; i < uncompFragLog2; i++)
2218         {
2219             co.set(DIM_S, compFragLog2 + i);
2220             (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
2221         }
2222     }
2223 }
2224 
2225 /**
2226 ************************************************************************************************************************
2227 *   Gfx9Lib::IsEquationSupported
2228 *
2229 *   @brief
2230 *       Check if equation is supported for given swizzle mode and resource type.
2231 *
2232 *   @return
2233 *       TRUE if supported
2234 ************************************************************************************************************************
2235 */
IsEquationSupported(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2) const2236 BOOL_32 Gfx9Lib::IsEquationSupported(
2237     AddrResourceType rsrcType,
2238     AddrSwizzleMode  swMode,
2239     UINT_32          elementBytesLog2) const
2240 {
2241     BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
2242                         (IsValidSwMode(swMode) == TRUE) &&
2243                         (IsLinear(swMode) == FALSE) &&
2244                         (((IsTex2d(rsrcType) == TRUE) &&
2245                           ((elementBytesLog2 < 4) ||
2246                            ((IsRotateSwizzle(swMode) == FALSE) &&
2247                             (IsZOrderSwizzle(swMode) == FALSE)))) ||
2248                          ((IsTex3d(rsrcType) == TRUE) &&
2249                           (IsRotateSwizzle(swMode) == FALSE) &&
2250                           (IsBlock256b(swMode) == FALSE)));
2251 
2252     return supported;
2253 }
2254 
2255 /**
2256 ************************************************************************************************************************
2257 *   Gfx9Lib::InitEquationTable
2258 *
2259 *   @brief
2260 *       Initialize Equation table.
2261 *
2262 *   @return
2263 *       N/A
2264 ************************************************************************************************************************
2265 */
InitEquationTable()2266 VOID Gfx9Lib::InitEquationTable()
2267 {
2268     memset(m_equationTable, 0, sizeof(m_equationTable));
2269 
2270     // Loop all possible resource type (2D/3D)
2271     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
2272     {
2273         AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
2274 
2275         // Loop all possible swizzle mode
2276         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
2277         {
2278             AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
2279 
2280             // Loop all possible bpp
2281             for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
2282             {
2283                 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
2284 
2285                 // Check if the input is supported
2286                 if (IsEquationSupported(rsrcType, swMode, bppIdx))
2287                 {
2288                     ADDR_EQUATION     equation;
2289                     ADDR_E_RETURNCODE retCode;
2290 
2291                     memset(&equation, 0, sizeof(ADDR_EQUATION));
2292 
2293                     // Generate the equation
2294                     if (IsBlock256b(swMode) && IsTex2d(rsrcType))
2295                     {
2296                         retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
2297                     }
2298                     else if (IsThin(rsrcType, swMode))
2299                     {
2300                         retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
2301                     }
2302                     else
2303                     {
2304                         retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
2305                     }
2306 
2307                     // Only fill the equation into the table if the return code is ADDR_OK,
2308                     // otherwise if the return code is not ADDR_OK, it indicates this is not
2309                     // a valid input, we do nothing but just fill invalid equation index
2310                     // into the lookup table.
2311                     if (retCode == ADDR_OK)
2312                     {
2313                         equationIndex = m_numEquations;
2314                         ADDR_ASSERT(equationIndex < EquationTableSize);
2315 
2316                         m_equationTable[equationIndex] = equation;
2317 
2318                         m_numEquations++;
2319                     }
2320                     else
2321                     {
2322                         ADDR_ASSERT_ALWAYS();
2323                     }
2324                 }
2325 
2326                 // Fill the index into the lookup table, if the combination is not supported
2327                 // fill the invalid equation index
2328                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
2329             }
2330         }
2331     }
2332 }
2333 
2334 /**
2335 ************************************************************************************************************************
2336 *   Gfx9Lib::HwlGetEquationIndex
2337 *
2338 *   @brief
2339 *       Interface function stub of GetEquationIndex
2340 *
2341 *   @return
2342 *       ADDR_E_RETURNCODE
2343 ************************************************************************************************************************
2344 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2345 UINT_32 Gfx9Lib::HwlGetEquationIndex(
2346     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
2347     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
2348     ) const
2349 {
2350     AddrResourceType rsrcType         = pIn->resourceType;
2351     AddrSwizzleMode  swMode           = pIn->swizzleMode;
2352     UINT_32          elementBytesLog2 = Log2(pIn->bpp >> 3);
2353     UINT_32          index            = ADDR_INVALID_EQUATION_INDEX;
2354 
2355     if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
2356     {
2357         UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
2358         UINT_32 swModeIdx   = static_cast<UINT_32>(swMode);
2359 
2360         index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
2361     }
2362 
2363     if (pOut->pMipInfo != NULL)
2364     {
2365         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2366         {
2367             pOut->pMipInfo[i].equationIndex = index;
2368         }
2369     }
2370 
2371     return index;
2372 }
2373 
2374 /**
2375 ************************************************************************************************************************
2376 *   Gfx9Lib::HwlComputeBlock256Equation
2377 *
2378 *   @brief
2379 *       Interface function stub of ComputeBlock256Equation
2380 *
2381 *   @return
2382 *       ADDR_E_RETURNCODE
2383 ************************************************************************************************************************
2384 */
HwlComputeBlock256Equation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2385 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
2386     AddrResourceType rsrcType,
2387     AddrSwizzleMode  swMode,
2388     UINT_32          elementBytesLog2,
2389     ADDR_EQUATION*   pEquation) const
2390 {
2391     ADDR_E_RETURNCODE ret = ADDR_OK;
2392 
2393     pEquation->numBits = 8;
2394     pEquation->numBitComponents = 1;
2395 
2396     UINT_32 i = 0;
2397     for (; i < elementBytesLog2; i++)
2398     {
2399         InitChannel(1, 0 , i, &pEquation->addr[i]);
2400     }
2401 
2402     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2403 
2404     const UINT_32 maxBitsUsed = 4;
2405     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2406     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2407 
2408     for (i = 0; i < maxBitsUsed; i++)
2409     {
2410         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2411         InitChannel(1, 1, i, &y[i]);
2412     }
2413 
2414     if (IsStandardSwizzle(rsrcType, swMode))
2415     {
2416         switch (elementBytesLog2)
2417         {
2418             case 0:
2419                 pixelBit[0] = x[0];
2420                 pixelBit[1] = x[1];
2421                 pixelBit[2] = x[2];
2422                 pixelBit[3] = x[3];
2423                 pixelBit[4] = y[0];
2424                 pixelBit[5] = y[1];
2425                 pixelBit[6] = y[2];
2426                 pixelBit[7] = y[3];
2427                 break;
2428             case 1:
2429                 pixelBit[0] = x[0];
2430                 pixelBit[1] = x[1];
2431                 pixelBit[2] = x[2];
2432                 pixelBit[3] = y[0];
2433                 pixelBit[4] = y[1];
2434                 pixelBit[5] = y[2];
2435                 pixelBit[6] = x[3];
2436                 break;
2437             case 2:
2438                 pixelBit[0] = x[0];
2439                 pixelBit[1] = x[1];
2440                 pixelBit[2] = y[0];
2441                 pixelBit[3] = y[1];
2442                 pixelBit[4] = y[2];
2443                 pixelBit[5] = x[2];
2444                 break;
2445             case 3:
2446                 pixelBit[0] = x[0];
2447                 pixelBit[1] = y[0];
2448                 pixelBit[2] = y[1];
2449                 pixelBit[3] = x[1];
2450                 pixelBit[4] = x[2];
2451                 break;
2452             case 4:
2453                 pixelBit[0] = y[0];
2454                 pixelBit[1] = y[1];
2455                 pixelBit[2] = x[0];
2456                 pixelBit[3] = x[1];
2457                 break;
2458             default:
2459                 ADDR_ASSERT_ALWAYS();
2460                 ret = ADDR_INVALIDPARAMS;
2461                 break;
2462         }
2463     }
2464     else if (IsDisplaySwizzle(rsrcType, swMode))
2465     {
2466         switch (elementBytesLog2)
2467         {
2468             case 0:
2469                 pixelBit[0] = x[0];
2470                 pixelBit[1] = x[1];
2471                 pixelBit[2] = x[2];
2472                 pixelBit[3] = y[1];
2473                 pixelBit[4] = y[0];
2474                 pixelBit[5] = y[2];
2475                 pixelBit[6] = x[3];
2476                 pixelBit[7] = y[3];
2477                 break;
2478             case 1:
2479                 pixelBit[0] = x[0];
2480                 pixelBit[1] = x[1];
2481                 pixelBit[2] = x[2];
2482                 pixelBit[3] = y[0];
2483                 pixelBit[4] = y[1];
2484                 pixelBit[5] = y[2];
2485                 pixelBit[6] = x[3];
2486                 break;
2487             case 2:
2488                 pixelBit[0] = x[0];
2489                 pixelBit[1] = x[1];
2490                 pixelBit[2] = y[0];
2491                 pixelBit[3] = x[2];
2492                 pixelBit[4] = y[1];
2493                 pixelBit[5] = y[2];
2494                 break;
2495             case 3:
2496                 pixelBit[0] = x[0];
2497                 pixelBit[1] = y[0];
2498                 pixelBit[2] = x[1];
2499                 pixelBit[3] = x[2];
2500                 pixelBit[4] = y[1];
2501                 break;
2502             case 4:
2503                 pixelBit[0] = x[0];
2504                 pixelBit[1] = y[0];
2505                 pixelBit[2] = x[1];
2506                 pixelBit[3] = y[1];
2507                 break;
2508             default:
2509                 ADDR_ASSERT_ALWAYS();
2510                 ret = ADDR_INVALIDPARAMS;
2511                 break;
2512         }
2513     }
2514     else if (IsRotateSwizzle(swMode))
2515     {
2516         switch (elementBytesLog2)
2517         {
2518             case 0:
2519                 pixelBit[0] = y[0];
2520                 pixelBit[1] = y[1];
2521                 pixelBit[2] = y[2];
2522                 pixelBit[3] = x[1];
2523                 pixelBit[4] = x[0];
2524                 pixelBit[5] = x[2];
2525                 pixelBit[6] = x[3];
2526                 pixelBit[7] = y[3];
2527                 break;
2528             case 1:
2529                 pixelBit[0] = y[0];
2530                 pixelBit[1] = y[1];
2531                 pixelBit[2] = y[2];
2532                 pixelBit[3] = x[0];
2533                 pixelBit[4] = x[1];
2534                 pixelBit[5] = x[2];
2535                 pixelBit[6] = x[3];
2536                 break;
2537             case 2:
2538                 pixelBit[0] = y[0];
2539                 pixelBit[1] = y[1];
2540                 pixelBit[2] = x[0];
2541                 pixelBit[3] = y[2];
2542                 pixelBit[4] = x[1];
2543                 pixelBit[5] = x[2];
2544                 break;
2545             case 3:
2546                 pixelBit[0] = y[0];
2547                 pixelBit[1] = x[0];
2548                 pixelBit[2] = y[1];
2549                 pixelBit[3] = x[1];
2550                 pixelBit[4] = x[2];
2551                 break;
2552             default:
2553                 ADDR_ASSERT_ALWAYS();
2554             case 4:
2555                 ret = ADDR_INVALIDPARAMS;
2556                 break;
2557         }
2558     }
2559     else
2560     {
2561         ADDR_ASSERT_ALWAYS();
2562         ret = ADDR_INVALIDPARAMS;
2563     }
2564 
2565     // Post validation
2566     if (ret == ADDR_OK)
2567     {
2568         Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2569         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
2570                     (microBlockDim.w * (1 << elementBytesLog2)));
2571         ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
2572     }
2573 
2574     return ret;
2575 }
2576 
2577 /**
2578 ************************************************************************************************************************
2579 *   Gfx9Lib::HwlComputeThinEquation
2580 *
2581 *   @brief
2582 *       Interface function stub of ComputeThinEquation
2583 *
2584 *   @return
2585 *       ADDR_E_RETURNCODE
2586 ************************************************************************************************************************
2587 */
HwlComputeThinEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2588 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
2589     AddrResourceType rsrcType,
2590     AddrSwizzleMode  swMode,
2591     UINT_32          elementBytesLog2,
2592     ADDR_EQUATION*   pEquation) const
2593 {
2594     ADDR_E_RETURNCODE ret = ADDR_OK;
2595 
2596     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2597 
2598     UINT_32 maxXorBits = blockSizeLog2;
2599     if (IsNonPrtXor(swMode))
2600     {
2601         // For non-prt-xor, maybe need to initialize some more bits for xor
2602         // The highest xor bit used in equation will be max the following 3 items:
2603         // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
2604         // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
2605         // 3. blockSizeLog2
2606 
2607         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
2608         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2609                                      GetPipeXorBits(blockSizeLog2) +
2610                                      2 * GetBankXorBits(blockSizeLog2));
2611     }
2612 
2613     const UINT_32 maxBitsUsed = 14;
2614     ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
2615     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2616     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2617 
2618     const UINT_32 extraXorBits = 16;
2619     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2620     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2621 
2622     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2623     {
2624         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2625         InitChannel(1, 1, i, &y[i]);
2626     }
2627 
2628     ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
2629 
2630     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2631     {
2632         InitChannel(1, 0 , i, &pixelBit[i]);
2633     }
2634 
2635     UINT_32 xIdx = 0;
2636     UINT_32 yIdx = 0;
2637     UINT_32 lowBits = 0;
2638 
2639     if (IsZOrderSwizzle(swMode))
2640     {
2641         if (elementBytesLog2 <= 3)
2642         {
2643             for (UINT_32 i = elementBytesLog2; i < 6; i++)
2644             {
2645                 pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
2646             }
2647 
2648             lowBits = 6;
2649         }
2650         else
2651         {
2652             ret = ADDR_INVALIDPARAMS;
2653         }
2654     }
2655     else
2656     {
2657         ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
2658 
2659         if (ret == ADDR_OK)
2660         {
2661             Dim2d microBlockDim = Block256_2d[elementBytesLog2];
2662             xIdx = Log2(microBlockDim.w);
2663             yIdx = Log2(microBlockDim.h);
2664             lowBits = 8;
2665         }
2666     }
2667 
2668     if (ret == ADDR_OK)
2669     {
2670         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2671         {
2672             pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2673         }
2674 
2675         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2676         {
2677             xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
2678         }
2679 
2680         if (IsXor(swMode))
2681         {
2682             // Fill XOR bits
2683             UINT_32 pipeStart = m_pipeInterleaveLog2;
2684             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2685 
2686             UINT_32 bankStart = pipeStart + pipeXorBits;
2687             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2688 
2689             for (UINT_32 i = 0; i < pipeXorBits; i++)
2690             {
2691                 UINT_32               xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
2692                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2693                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2694 
2695                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2696             }
2697 
2698             for (UINT_32 i = 0; i < bankXorBits; i++)
2699             {
2700                 UINT_32               xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
2701                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2702                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2703 
2704                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2705             }
2706 
2707             if (IsPrt(swMode) == FALSE)
2708             {
2709                 for (UINT_32 i = 0; i < pipeXorBits; i++)
2710                 {
2711                     InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
2712                 }
2713 
2714                 for (UINT_32 i = 0; i < bankXorBits; i++)
2715                 {
2716                     InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
2717                 }
2718             }
2719         }
2720 
2721         FillEqBitComponents(pEquation);
2722         pEquation->numBits = blockSizeLog2;
2723     }
2724 
2725     return ret;
2726 }
2727 
2728 /**
2729 ************************************************************************************************************************
2730 *   Gfx9Lib::HwlComputeThickEquation
2731 *
2732 *   @brief
2733 *       Interface function stub of ComputeThickEquation
2734 *
2735 *   @return
2736 *       ADDR_E_RETURNCODE
2737 ************************************************************************************************************************
2738 */
HwlComputeThickEquation(AddrResourceType rsrcType,AddrSwizzleMode swMode,UINT_32 elementBytesLog2,ADDR_EQUATION * pEquation) const2739 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
2740     AddrResourceType rsrcType,
2741     AddrSwizzleMode  swMode,
2742     UINT_32          elementBytesLog2,
2743     ADDR_EQUATION*   pEquation) const
2744 {
2745     ADDR_E_RETURNCODE ret = ADDR_OK;
2746 
2747     ADDR_ASSERT(IsTex3d(rsrcType));
2748 
2749     UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
2750 
2751     UINT_32 maxXorBits = blockSizeLog2;
2752     if (IsNonPrtXor(swMode))
2753     {
2754         // For non-prt-xor, maybe need to initialize some more bits for xor
2755         // The highest xor bit used in equation will be max the following 3:
2756         // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
2757         // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
2758         // 3. blockSizeLog2
2759 
2760         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
2761         maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
2762                                      GetPipeXorBits(blockSizeLog2) +
2763                                      3 * GetBankXorBits(blockSizeLog2));
2764     }
2765 
2766     for (UINT_32 i = 0; i < elementBytesLog2; i++)
2767     {
2768         InitChannel(1, 0 , i, &pEquation->addr[i]);
2769     }
2770 
2771     ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
2772 
2773     const UINT_32 maxBitsUsed = 12;
2774     ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
2775     ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
2776     ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
2777     ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
2778 
2779     const UINT_32 extraXorBits = 24;
2780     ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
2781     ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
2782 
2783     for (UINT_32 i = 0; i < maxBitsUsed; i++)
2784     {
2785         InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
2786         InitChannel(1, 1, i, &y[i]);
2787         InitChannel(1, 2, i, &z[i]);
2788     }
2789 
2790     if (IsZOrderSwizzle(swMode))
2791     {
2792         switch (elementBytesLog2)
2793         {
2794             case 0:
2795                 pixelBit[0]  = x[0];
2796                 pixelBit[1]  = y[0];
2797                 pixelBit[2]  = x[1];
2798                 pixelBit[3]  = y[1];
2799                 pixelBit[4]  = z[0];
2800                 pixelBit[5]  = z[1];
2801                 pixelBit[6]  = x[2];
2802                 pixelBit[7]  = z[2];
2803                 pixelBit[8]  = y[2];
2804                 pixelBit[9]  = x[3];
2805                 break;
2806             case 1:
2807                 pixelBit[0]  = x[0];
2808                 pixelBit[1]  = y[0];
2809                 pixelBit[2]  = x[1];
2810                 pixelBit[3]  = y[1];
2811                 pixelBit[4]  = z[0];
2812                 pixelBit[5]  = z[1];
2813                 pixelBit[6]  = z[2];
2814                 pixelBit[7]  = y[2];
2815                 pixelBit[8]  = x[2];
2816                 break;
2817             case 2:
2818                 pixelBit[0]  = x[0];
2819                 pixelBit[1]  = y[0];
2820                 pixelBit[2]  = x[1];
2821                 pixelBit[3]  = z[0];
2822                 pixelBit[4]  = y[1];
2823                 pixelBit[5]  = z[1];
2824                 pixelBit[6]  = y[2];
2825                 pixelBit[7]  = x[2];
2826                 break;
2827             case 3:
2828                 pixelBit[0]  = x[0];
2829                 pixelBit[1]  = y[0];
2830                 pixelBit[2]  = z[0];
2831                 pixelBit[3]  = x[1];
2832                 pixelBit[4]  = z[1];
2833                 pixelBit[5]  = y[1];
2834                 pixelBit[6]  = x[2];
2835                 break;
2836             case 4:
2837                 pixelBit[0]  = x[0];
2838                 pixelBit[1]  = y[0];
2839                 pixelBit[2]  = z[0];
2840                 pixelBit[3]  = z[1];
2841                 pixelBit[4]  = y[1];
2842                 pixelBit[5]  = x[1];
2843                 break;
2844             default:
2845                 ADDR_ASSERT_ALWAYS();
2846                 ret = ADDR_INVALIDPARAMS;
2847                 break;
2848         }
2849     }
2850     else if (IsStandardSwizzle(rsrcType, swMode))
2851     {
2852         switch (elementBytesLog2)
2853         {
2854             case 0:
2855                 pixelBit[0]  = x[0];
2856                 pixelBit[1]  = x[1];
2857                 pixelBit[2]  = x[2];
2858                 pixelBit[3]  = x[3];
2859                 pixelBit[4]  = y[0];
2860                 pixelBit[5]  = y[1];
2861                 pixelBit[6]  = z[0];
2862                 pixelBit[7]  = z[1];
2863                 pixelBit[8]  = z[2];
2864                 pixelBit[9]  = y[2];
2865                 break;
2866             case 1:
2867                 pixelBit[0]  = x[0];
2868                 pixelBit[1]  = x[1];
2869                 pixelBit[2]  = x[2];
2870                 pixelBit[3]  = y[0];
2871                 pixelBit[4]  = y[1];
2872                 pixelBit[5]  = z[0];
2873                 pixelBit[6]  = z[1];
2874                 pixelBit[7]  = z[2];
2875                 pixelBit[8]  = y[2];
2876                 break;
2877             case 2:
2878                 pixelBit[0]  = x[0];
2879                 pixelBit[1]  = x[1];
2880                 pixelBit[2]  = y[0];
2881                 pixelBit[3]  = y[1];
2882                 pixelBit[4]  = z[0];
2883                 pixelBit[5]  = z[1];
2884                 pixelBit[6]  = y[2];
2885                 pixelBit[7]  = x[2];
2886                 break;
2887             case 3:
2888                 pixelBit[0]  = x[0];
2889                 pixelBit[1]  = y[0];
2890                 pixelBit[2]  = y[1];
2891                 pixelBit[3]  = z[0];
2892                 pixelBit[4]  = z[1];
2893                 pixelBit[5]  = x[1];
2894                 pixelBit[6]  = x[2];
2895                 break;
2896             case 4:
2897                 pixelBit[0]  = y[0];
2898                 pixelBit[1]  = y[1];
2899                 pixelBit[2]  = z[0];
2900                 pixelBit[3]  = z[1];
2901                 pixelBit[4]  = x[0];
2902                 pixelBit[5]  = x[1];
2903                 break;
2904             default:
2905                 ADDR_ASSERT_ALWAYS();
2906                 ret = ADDR_INVALIDPARAMS;
2907                 break;
2908         }
2909     }
2910     else
2911     {
2912         ADDR_ASSERT_ALWAYS();
2913         ret = ADDR_INVALIDPARAMS;
2914     }
2915 
2916     if (ret == ADDR_OK)
2917     {
2918         Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
2919         UINT_32 xIdx = Log2(microBlockDim.w);
2920         UINT_32 yIdx = Log2(microBlockDim.h);
2921         UINT_32 zIdx = Log2(microBlockDim.d);
2922 
2923         pixelBit = pEquation->addr;
2924 
2925         const UINT_32 lowBits = 10;
2926         ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
2927         ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
2928 
2929         for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
2930         {
2931             if ((i % 3) == 0)
2932             {
2933                 pixelBit[i] = x[xIdx++];
2934             }
2935             else if ((i % 3) == 1)
2936             {
2937                 pixelBit[i] = z[zIdx++];
2938             }
2939             else
2940             {
2941                 pixelBit[i] = y[yIdx++];
2942             }
2943         }
2944 
2945         for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
2946         {
2947             if ((i % 3) == 0)
2948             {
2949                 xorExtra[i - blockSizeLog2] = x[xIdx++];
2950             }
2951             else if ((i % 3) == 1)
2952             {
2953                 xorExtra[i - blockSizeLog2] = z[zIdx++];
2954             }
2955             else
2956             {
2957                 xorExtra[i - blockSizeLog2] = y[yIdx++];
2958             }
2959         }
2960 
2961         if (IsXor(swMode))
2962         {
2963             // Fill XOR bits
2964             UINT_32 pipeStart = m_pipeInterleaveLog2;
2965             UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
2966             for (UINT_32 i = 0; i < pipeXorBits; i++)
2967             {
2968                 UINT_32               xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
2969                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2970                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2971 
2972                 InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
2973 
2974                 UINT_32               xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
2975                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2976                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2977 
2978                 InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
2979             }
2980 
2981             UINT_32 bankStart = pipeStart + pipeXorBits;
2982             UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
2983             for (UINT_32 i = 0; i < bankXorBits; i++)
2984             {
2985                 UINT_32               xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
2986                 ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
2987                                                    &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
2988 
2989                 InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
2990 
2991                 UINT_32               xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
2992                 ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
2993                                                    &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
2994 
2995                 InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
2996             }
2997         }
2998 
2999         FillEqBitComponents(pEquation);
3000         pEquation->numBits = blockSizeLog2;
3001     }
3002 
3003     return ret;
3004 }
3005 
3006 /**
3007 ************************************************************************************************************************
3008 *   Gfx9Lib::IsValidDisplaySwizzleMode
3009 *
3010 *   @brief
3011 *       Check if a swizzle mode is supported by display engine
3012 *
3013 *   @return
3014 *       TRUE is swizzle mode is supported by display engine
3015 ************************************************************************************************************************
3016 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3017 BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
3018     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3019 {
3020     BOOL_32 support = FALSE;
3021 
3022     const UINT_32 swizzleMask = 1 << pIn->swizzleMode;
3023 
3024     if (m_settings.isDce12)
3025     {
3026         if (pIn->bpp == 32)
3027         {
3028             support = (Dce12Bpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
3029         }
3030         else if (pIn->bpp <= 64)
3031         {
3032             support = (Dce12NonBpp32SwModeMask & swizzleMask) ? TRUE : FALSE;
3033         }
3034     }
3035     else if (m_settings.isDcn1)
3036     {
3037         if (pIn->bpp < 64)
3038         {
3039             support = (Dcn1NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3040         }
3041         else if (pIn->bpp == 64)
3042         {
3043             support = (Dcn1Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3044         }
3045     }
3046     else if (m_settings.isDcn2)
3047     {
3048         if (pIn->bpp < 64)
3049         {
3050             support = (Dcn2NonBpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3051         }
3052         else if (pIn->bpp == 64)
3053         {
3054             support = (Dcn2Bpp64SwModeMask & swizzleMask) ? TRUE : FALSE;
3055         }
3056     }
3057     else
3058     {
3059         ADDR_NOT_IMPLEMENTED();
3060     }
3061 
3062     return support;
3063 }
3064 
3065 /**
3066 ************************************************************************************************************************
3067 *   Gfx9Lib::HwlComputePipeBankXor
3068 *
3069 *   @brief
3070 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
3071 *
3072 *   @return
3073 *       PipeBankXor value
3074 ************************************************************************************************************************
3075 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const3076 ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
3077     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
3078     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
3079 {
3080     if (IsXor(pIn->swizzleMode))
3081     {
3082         UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3083         UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3084         UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3085 
3086         UINT_32 pipeXor = 0;
3087         UINT_32 bankXor = 0;
3088 
3089         const UINT_32 bankMask = (1 << bankBits) - 1;
3090         const UINT_32 index    = pIn->surfIndex & bankMask;
3091 
3092         const UINT_32 bpp      = pIn->flags.fmask ?
3093                                  GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
3094         if (bankBits == 4)
3095         {
3096             static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
3097             static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
3098 
3099             bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
3100         }
3101         else if (bankBits > 0)
3102         {
3103             UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
3104             bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
3105             bankXor = (index * bankIncrease) & bankMask;
3106         }
3107 
3108         pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
3109     }
3110     else
3111     {
3112         pOut->pipeBankXor = 0;
3113     }
3114 
3115     return ADDR_OK;
3116 }
3117 
3118 /**
3119 ************************************************************************************************************************
3120 *   Gfx9Lib::HwlComputeSlicePipeBankXor
3121 *
3122 *   @brief
3123 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
3124 *
3125 *   @return
3126 *       PipeBankXor value
3127 ************************************************************************************************************************
3128 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const3129 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
3130     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
3131     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
3132 {
3133     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3134     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3135     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3136 
3137     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3138     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3139 
3140     pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
3141 
3142     return ADDR_OK;
3143 }
3144 
3145 /**
3146 ************************************************************************************************************************
3147 *   Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
3148 *
3149 *   @brief
3150 *       Compute sub resource offset to support swizzle pattern
3151 *
3152 *   @return
3153 *       Offset
3154 ************************************************************************************************************************
3155 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const3156 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
3157     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
3158     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
3159 {
3160     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
3161 
3162     UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
3163     UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
3164     UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
3165     UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
3166     UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
3167     UINT_32 pipeBankXor    = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
3168 
3169     pOut->offset = pIn->slice * pIn->sliceSize +
3170                    pIn->macroBlockOffset +
3171                    (pIn->mipTailOffset ^ pipeBankXor) -
3172                    static_cast<UINT_64>(pipeBankXor);
3173     return ADDR_OK;
3174 }
3175 
3176 /**
3177 ************************************************************************************************************************
3178 *   Gfx9Lib::ValidateNonSwModeParams
3179 *
3180 *   @brief
3181 *       Validate compute surface info params except swizzle mode
3182 *
3183 *   @return
3184 *       TRUE if parameters are valid, FALSE otherwise
3185 ************************************************************************************************************************
3186 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3187 BOOL_32 Gfx9Lib::ValidateNonSwModeParams(
3188     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3189 {
3190     BOOL_32 valid = TRUE;
3191 
3192     if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
3193     {
3194         ADDR_ASSERT_ALWAYS();
3195         valid = FALSE;
3196     }
3197 
3198     if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
3199     {
3200         ADDR_ASSERT_ALWAYS();
3201         valid = FALSE;
3202     }
3203 
3204     const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3205     const BOOL_32 msaa   = (pIn->numFrags > 1);
3206     const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
3207 
3208     const AddrResourceType rsrcType = pIn->resourceType;
3209     const BOOL_32          tex3d    = IsTex3d(rsrcType);
3210     const BOOL_32          tex2d    = IsTex2d(rsrcType);
3211     const BOOL_32          tex1d    = IsTex1d(rsrcType);
3212 
3213     const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
3214     const BOOL_32             zbuffer = flags.depth || flags.stencil;
3215     const BOOL_32             display = flags.display || flags.rotated;
3216     const BOOL_32             stereo  = flags.qbStereo;
3217     const BOOL_32             fmask   = flags.fmask;
3218 
3219     // Resource type check
3220     if (tex1d)
3221     {
3222         if (msaa || zbuffer || display || stereo || isBc || fmask)
3223         {
3224             ADDR_ASSERT_ALWAYS();
3225             valid = FALSE;
3226         }
3227     }
3228     else if (tex2d)
3229     {
3230         if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
3231         {
3232             ADDR_ASSERT_ALWAYS();
3233             valid = FALSE;
3234         }
3235     }
3236     else if (tex3d)
3237     {
3238         if (msaa || zbuffer || display || stereo || fmask)
3239         {
3240             ADDR_ASSERT_ALWAYS();
3241             valid = FALSE;
3242         }
3243     }
3244     else
3245     {
3246         ADDR_ASSERT_ALWAYS();
3247         valid = FALSE;
3248     }
3249 
3250     return valid;
3251 }
3252 
3253 /**
3254 ************************************************************************************************************************
3255 *   Gfx9Lib::ValidateSwModeParams
3256 *
3257 *   @brief
3258 *       Validate compute surface info related to swizzle mode
3259 *
3260 *   @return
3261 *       TRUE if parameters are valid, FALSE otherwise
3262 ************************************************************************************************************************
3263 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3264 BOOL_32 Gfx9Lib::ValidateSwModeParams(
3265     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3266 {
3267     BOOL_32 valid = TRUE;
3268 
3269     if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || (IsValidSwMode(pIn->swizzleMode) == FALSE))
3270     {
3271         ADDR_ASSERT_ALWAYS();
3272         valid = FALSE;
3273     }
3274 
3275     const BOOL_32 mipmap = (pIn->numMipLevels > 1);
3276     const BOOL_32 msaa   = (pIn->numFrags > 1);
3277     const BOOL_32 isBc   = ElemLib::IsBlockCompressed(pIn->format);
3278     const BOOL_32 is422  = ElemLib::IsMacroPixelPacked(pIn->format);
3279 
3280     const AddrResourceType rsrcType = pIn->resourceType;
3281     const BOOL_32          tex3d    = IsTex3d(rsrcType);
3282     const BOOL_32          tex2d    = IsTex2d(rsrcType);
3283     const BOOL_32          tex1d    = IsTex1d(rsrcType);
3284 
3285     const AddrSwizzleMode  swizzle     = pIn->swizzleMode;
3286     const BOOL_32          linear      = IsLinear(swizzle);
3287     const BOOL_32          blk256B     = IsBlock256b(swizzle);
3288     const BOOL_32          isNonPrtXor = IsNonPrtXor(swizzle);
3289 
3290     const ADDR2_SURFACE_FLAGS flags   = pIn->flags;
3291     const BOOL_32             zbuffer = flags.depth || flags.stencil;
3292     const BOOL_32             color   = flags.color;
3293     const BOOL_32             texture = flags.texture;
3294     const BOOL_32             display = flags.display || flags.rotated;
3295     const BOOL_32             prt     = flags.prt;
3296     const BOOL_32             fmask   = flags.fmask;
3297 
3298     const BOOL_32             thin3d  = tex3d && flags.view3dAs2dArray;
3299     const BOOL_32             zMaxMip = tex3d && mipmap &&
3300                                         (pIn->numSlices >= pIn->width) && (pIn->numSlices >= pIn->height);
3301 
3302     // Misc check
3303     if (msaa && (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
3304     {
3305         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3306         ADDR_ASSERT_ALWAYS();
3307         valid = FALSE;
3308     }
3309 
3310     if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
3311     {
3312         ADDR_ASSERT_ALWAYS();
3313         valid = FALSE;
3314     }
3315 
3316     if ((pIn->bpp == 96) && (linear == FALSE))
3317     {
3318         ADDR_ASSERT_ALWAYS();
3319         valid = FALSE;
3320     }
3321 
3322     if (prt && isNonPrtXor)
3323     {
3324         ADDR_ASSERT_ALWAYS();
3325         valid = FALSE;
3326     }
3327 
3328     // Resource type check
3329     if (tex1d)
3330     {
3331         if (linear == FALSE)
3332         {
3333             ADDR_ASSERT_ALWAYS();
3334             valid = FALSE;
3335         }
3336     }
3337 
3338     // Swizzle type check
3339     if (linear)
3340     {
3341         if (((tex1d == FALSE) && prt) || zbuffer || msaa || (pIn->bpp == 0) ||
3342             ((pIn->bpp % 8) != 0) || (isBc && texture) || fmask)
3343         {
3344             ADDR_ASSERT_ALWAYS();
3345             valid = FALSE;
3346         }
3347     }
3348     else if (IsZOrderSwizzle(swizzle))
3349     {
3350         if ((color && msaa) || thin3d || isBc || is422 || (tex2d && (pIn->bpp > 64)) || (msaa && (pIn->bpp > 32)))
3351         {
3352             ADDR_ASSERT_ALWAYS();
3353             valid = FALSE;
3354         }
3355     }
3356     else if (IsStandardSwizzle(swizzle))
3357     {
3358         if (zbuffer || thin3d || (tex3d && (pIn->bpp == 128) && color) || fmask)
3359         {
3360             ADDR_ASSERT_ALWAYS();
3361             valid = FALSE;
3362         }
3363     }
3364     else if (IsDisplaySwizzle(swizzle))
3365     {
3366         if (zbuffer || (prt && tex3d) || fmask || zMaxMip)
3367         {
3368             ADDR_ASSERT_ALWAYS();
3369             valid = FALSE;
3370         }
3371     }
3372     else if (IsRotateSwizzle(swizzle))
3373     {
3374         if (zbuffer || (pIn->bpp > 64) || tex3d || isBc || fmask)
3375         {
3376             ADDR_ASSERT_ALWAYS();
3377             valid = FALSE;
3378         }
3379     }
3380     else
3381     {
3382         ADDR_ASSERT_ALWAYS();
3383         valid = FALSE;
3384     }
3385 
3386     // Block type check
3387     if (blk256B)
3388     {
3389         if (prt || zbuffer || tex3d || mipmap || msaa)
3390         {
3391             ADDR_ASSERT_ALWAYS();
3392             valid = FALSE;
3393         }
3394     }
3395 
3396     return valid;
3397 }
3398 
3399 /**
3400 ************************************************************************************************************************
3401 *   Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
3402 *
3403 *   @brief
3404 *       Compute surface info sanity check
3405 *
3406 *   @return
3407 *       ADDR_OK if parameters are valid, ADDR_INVALIDPARAMS otherwise
3408 ************************************************************************************************************************
3409 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const3410 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
3411     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
3412 {
3413     return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
3414 }
3415 
3416 /**
3417 ************************************************************************************************************************
3418 *   Gfx9Lib::HwlGetPreferredSurfaceSetting
3419 *
3420 *   @brief
3421 *       Internal function to get suggested surface information for cliet to use
3422 *
3423 *   @return
3424 *       ADDR_E_RETURNCODE
3425 ************************************************************************************************************************
3426 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const3427 ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
3428     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
3429     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
3430 {
3431     ADDR_E_RETURNCODE returnCode = ADDR_INVALIDPARAMS;
3432     ElemLib*          pElemLib   = GetElemLib();
3433 
3434     UINT_32 bpp        = pIn->bpp;
3435     UINT_32 width      = Max(pIn->width, 1u);
3436     UINT_32 height     = Max(pIn->height, 1u);
3437     UINT_32 numSamples = Max(pIn->numSamples, 1u);
3438     UINT_32 numFrags   = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
3439 
3440     if (pIn->flags.fmask)
3441     {
3442         bpp                = GetFmaskBpp(numSamples, numFrags);
3443         numFrags           = 1;
3444         numSamples         = 1;
3445         pOut->resourceType = ADDR_RSRC_TEX_2D;
3446     }
3447     else
3448     {
3449         // Set format to INVALID will skip this conversion
3450         if (pIn->format != ADDR_FMT_INVALID)
3451         {
3452             UINT_32 expandX, expandY;
3453 
3454             // Don't care for this case
3455             ElemMode elemMode = ADDR_UNCOMPRESSED;
3456 
3457             // Get compression/expansion factors and element mode which indicates compression/expansion
3458             bpp = pElemLib->GetBitsPerPixel(pIn->format,
3459                                             &elemMode,
3460                                             &expandX,
3461                                             &expandY);
3462 
3463             UINT_32 basePitch = 0;
3464             GetElemLib()->AdjustSurfaceInfo(elemMode,
3465                                             expandX,
3466                                             expandY,
3467                                             &bpp,
3468                                             &basePitch,
3469                                             &width,
3470                                             &height);
3471         }
3472 
3473         // The output may get changed for volume(3D) texture resource in future
3474         pOut->resourceType = pIn->resourceType;
3475     }
3476 
3477     const UINT_32 numSlices    = Max(pIn->numSlices, 1u);
3478     const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
3479     const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
3480     const BOOL_32 displayRsrc  = pIn->flags.display || pIn->flags.rotated;
3481 
3482     // Pre sanity check on non swizzle mode parameters
3483     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
3484     localIn.flags        = pIn->flags;
3485     localIn.resourceType = pOut->resourceType;
3486     localIn.format       = pIn->format;
3487     localIn.bpp          = bpp;
3488     localIn.width        = width;
3489     localIn.height       = height;
3490     localIn.numSlices    = numSlices;
3491     localIn.numMipLevels = numMipLevels;
3492     localIn.numSamples   = numSamples;
3493     localIn.numFrags     = numFrags;
3494 
3495     if (ValidateNonSwModeParams(&localIn))
3496     {
3497         // Forbid swizzle mode(s) by client setting
3498         ADDR2_SWMODE_SET allowedSwModeSet = {};
3499         allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask;
3500         allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx9Blk256BSwModeMask;
3501         allowedSwModeSet.value |=
3502             pIn->forbiddenBlock.macroThin4KB ? 0 :
3503             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask);
3504         allowedSwModeSet.value |=
3505             pIn->forbiddenBlock.macroThick4KB ? 0 :
3506             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick4KBSwModeMask : 0);
3507         allowedSwModeSet.value |=
3508             pIn->forbiddenBlock.macroThin64KB ? 0 :
3509             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask);
3510         allowedSwModeSet.value |=
3511             pIn->forbiddenBlock.macroThick64KB ? 0 :
3512             ((pOut->resourceType == ADDR_RSRC_TEX_3D) ? Gfx9Rsrc3dThick64KBSwModeMask : 0);
3513 
3514         if (pIn->preferredSwSet.value != 0)
3515         {
3516             allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
3517             allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
3518             allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
3519             allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
3520         }
3521 
3522         if (pIn->noXor)
3523         {
3524             allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3525         }
3526 
3527         if (pIn->maxAlign > 0)
3528         {
3529             if (pIn->maxAlign < Size64K)
3530             {
3531                 allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
3532             }
3533 
3534             if (pIn->maxAlign < Size4K)
3535             {
3536                 allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
3537             }
3538 
3539             if (pIn->maxAlign < Size256)
3540             {
3541                 allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3542             }
3543         }
3544 
3545         // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
3546         switch (pOut->resourceType)
3547         {
3548             case ADDR_RSRC_TEX_1D:
3549                 allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
3550                 break;
3551 
3552             case ADDR_RSRC_TEX_2D:
3553                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
3554 
3555                 if (bpp > 64)
3556                 {
3557                     allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
3558                 }
3559                 break;
3560 
3561             case ADDR_RSRC_TEX_3D:
3562                 allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
3563 
3564                 if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
3565                 {
3566                     // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
3567                     // When depth (Z) is the maximum dimension then must use one of the SW_*_S
3568                     // or SW_*_Z modes if mipmapping is desired on a 3D surface
3569                     allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
3570                 }
3571 
3572                 if ((bpp == 128) && pIn->flags.color)
3573                 {
3574                     allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
3575                 }
3576 
3577                 if (pIn->flags.view3dAs2dArray)
3578                 {
3579                     allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
3580                 }
3581                 break;
3582 
3583             default:
3584                 ADDR_ASSERT_ALWAYS();
3585                 allowedSwModeSet.value = 0;
3586                 break;
3587         }
3588 
3589         if (pIn->format == ADDR_FMT_32_32_32)
3590         {
3591             allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3592         }
3593 
3594         if (ElemLib::IsBlockCompressed(pIn->format))
3595         {
3596             if (pIn->flags.texture)
3597             {
3598                 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
3599             }
3600             else
3601             {
3602                 allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
3603             }
3604         }
3605 
3606         if (ElemLib::IsMacroPixelPacked(pIn->format) ||
3607             (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
3608         {
3609             allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
3610         }
3611 
3612         if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
3613         {
3614             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3615 
3616             if (pIn->flags.noMetadata == FALSE)
3617             {
3618                 if (pIn->flags.depth &&
3619                     pIn->flags.texture &&
3620                     (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
3621                 {
3622                     // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
3623                     // equation from wrong address within memory range a tile covered and use the
3624                     // garbage data for compressed Z reading which finally leads to corruption.
3625                     allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
3626                 }
3627 
3628                 if (m_settings.htileCacheRbConflict &&
3629                     (pIn->flags.depth || pIn->flags.stencil) &&
3630                     (numSlices > 1) &&
3631                     (pIn->flags.metaRbUnaligned == FALSE) &&
3632                     (pIn->flags.metaPipeUnaligned == FALSE))
3633                 {
3634                     // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
3635                     allowedSwModeSet.value &= ~Gfx9XSwModeMask;
3636                 }
3637             }
3638         }
3639 
3640         if (msaa)
3641         {
3642             allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
3643         }
3644 
3645         if ((numFrags > 1) &&
3646             (Size4K < (m_pipeInterleaveBytes * numFrags)))
3647         {
3648             // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
3649             allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
3650         }
3651 
3652         if (numMipLevels > 1)
3653         {
3654             allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
3655         }
3656 
3657         if (displayRsrc)
3658         {
3659             if (m_settings.isDce12)
3660             {
3661                 allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
3662             }
3663             else if (m_settings.isDcn1)
3664             {
3665                 allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
3666             }
3667             else if (m_settings.isDcn2)
3668             {
3669                 allowedSwModeSet.value &= (bpp == 64) ? Dcn2Bpp64SwModeMask : Dcn2NonBpp64SwModeMask;
3670             }
3671             else
3672             {
3673                 ADDR_NOT_IMPLEMENTED();
3674             }
3675         }
3676 
3677         if (allowedSwModeSet.value != 0)
3678         {
3679 #if DEBUG
3680             // Post sanity check, at least AddrLib should accept the output generated by its own
3681             UINT_32 validateSwModeSet = allowedSwModeSet.value;
3682 
3683             for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3684             {
3685                 if (validateSwModeSet & 1)
3686                 {
3687                     localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3688                     ADDR_ASSERT(ValidateSwModeParams(&localIn));
3689                 }
3690 
3691                 validateSwModeSet >>= 1;
3692             }
3693 #endif
3694 
3695             pOut->validSwModeSet = allowedSwModeSet;
3696             pOut->canXor         = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
3697             pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3698             pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3699 
3700             pOut->clientPreferredSwSet = pIn->preferredSwSet;
3701 
3702             if (pOut->clientPreferredSwSet.value == 0)
3703             {
3704                 pOut->clientPreferredSwSet.value = AddrSwSetAll;
3705             }
3706 
3707             // Apply optional restrictions
3708             if (pIn->flags.needEquation)
3709             {
3710                 UINT_32 components = pIn->flags.allowExtEquation ?  ADDR_MAX_EQUATION_COMP :
3711                                                                     ADDR_MAX_LEGACY_EQUATION_COMP;
3712                 FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
3713             }
3714 
3715             if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
3716             {
3717                 pOut->swizzleMode = ADDR_SW_LINEAR;
3718             }
3719             else
3720             {
3721                 const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
3722 
3723                 if ((height > 1) && (computeMinSize == FALSE))
3724                 {
3725                     // Always ignore linear swizzle mode if:
3726                     // 1. This is a (2D/3D) resource with height > 1
3727                     // 2. Client doesn't require computing minimize size
3728                     allowedSwModeSet.swLinear = 0;
3729                 }
3730 
3731                 ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3732 
3733                 // Determine block size if there are 2 or more block type candidates
3734                 if (IsPow2(allowedBlockSet.value) == FALSE)
3735                 {
3736                     AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
3737 
3738                     swMode[AddrBlockLinear]   = ADDR_SW_LINEAR;
3739                     swMode[AddrBlockMicro]    = ADDR_SW_256B_D;
3740                     swMode[AddrBlockThin4KB]  = ADDR_SW_4KB_D;
3741                     swMode[AddrBlockThin64KB] = ADDR_SW_64KB_D;
3742 
3743                     if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3744                     {
3745                         swMode[AddrBlockThick4KB]  = ADDR_SW_4KB_S;
3746                         swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3747                     }
3748 
3749                     UINT_64 padSize[AddrBlockMaxTiledType] = {};
3750 
3751                     const UINT_32 ratioLow           = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
3752                     const UINT_32 ratioHi            = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
3753                     const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3754                     UINT_32       minSizeBlk         = AddrBlockMicro;
3755                     UINT_64       minSize            = 0;
3756 
3757                     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3758 
3759                     for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
3760                     {
3761                         if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
3762                         {
3763                             localIn.swizzleMode = swMode[i];
3764 
3765                             if (localIn.swizzleMode == ADDR_SW_LINEAR)
3766                             {
3767                                 returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
3768                             }
3769                             else
3770                             {
3771                                 returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
3772                             }
3773 
3774                             if (returnCode == ADDR_OK)
3775                             {
3776                                 padSize[i] = localOut.surfSize;
3777 
3778                                 if ((minSize == 0) ||
3779                                     Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi))
3780                                 {
3781                                     minSize    = padSize[i];
3782                                     minSizeBlk = i;
3783                                 }
3784                             }
3785                             else
3786                             {
3787                                 ADDR_ASSERT_ALWAYS();
3788                                 break;
3789                             }
3790                         }
3791                     }
3792 
3793                     if (pIn->memoryBudget > 1.0)
3794                     {
3795                         // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
3796                         // smaller-block type again in coming loop
3797                         switch (minSizeBlk)
3798                         {
3799                             case AddrBlockThick64KB:
3800                                 allowedBlockSet.macroThin64KB = 0;
3801                             case AddrBlockThin64KB:
3802                                 allowedBlockSet.macroThick4KB = 0;
3803                             case AddrBlockThick4KB:
3804                                 allowedBlockSet.macroThin4KB = 0;
3805                             case AddrBlockThin4KB:
3806                                 allowedBlockSet.micro  = 0;
3807                             case AddrBlockMicro:
3808                                 allowedBlockSet.linear = 0;
3809                             case AddrBlockLinear:
3810                                 break;
3811 
3812                             default:
3813                                 ADDR_ASSERT_ALWAYS();
3814                                 break;
3815                         }
3816 
3817                         for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3818                         {
3819                             if ((i != minSizeBlk) &&
3820                                 Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
3821                             {
3822                                 if (Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE)
3823                                 {
3824                                     // Clear the block type if the memory waste is unacceptable
3825                                     allowedBlockSet.value &= ~(1u << (i - 1));
3826                                 }
3827                             }
3828                         }
3829 
3830                         // Remove linear block type if 2 or more block types are allowed
3831                         if (IsPow2(allowedBlockSet.value) == FALSE)
3832                         {
3833                             allowedBlockSet.linear = 0;
3834                         }
3835 
3836                         // Select the biggest allowed block type
3837                         minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
3838 
3839                         if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
3840                         {
3841                             minSizeBlk = AddrBlockLinear;
3842                         }
3843                     }
3844 
3845                     switch (minSizeBlk)
3846                     {
3847                         case AddrBlockLinear:
3848                             allowedSwModeSet.value &= Gfx9LinearSwModeMask;
3849                             break;
3850 
3851                         case AddrBlockMicro:
3852                             ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3853                             allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
3854                             break;
3855 
3856                         case AddrBlockThin4KB:
3857                             allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3858                                                       Gfx9Rsrc3dThin4KBSwModeMask : Gfx9Blk4KBSwModeMask;
3859                             break;
3860 
3861                         case AddrBlockThick4KB:
3862                             ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3863                             allowedSwModeSet.value &= Gfx9Rsrc3dThick4KBSwModeMask;
3864                             break;
3865 
3866                         case AddrBlockThin64KB:
3867                             allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3868                                                       Gfx9Rsrc3dThin64KBSwModeMask : Gfx9Blk64KBSwModeMask;
3869                             break;
3870 
3871                         case AddrBlockThick64KB:
3872                             ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3873                             allowedSwModeSet.value &= Gfx9Rsrc3dThick64KBSwModeMask;
3874                             break;
3875 
3876                         default:
3877                             ADDR_ASSERT_ALWAYS();
3878                             allowedSwModeSet.value = 0;
3879                             break;
3880                     }
3881                 }
3882 
3883                 // Block type should be determined.
3884                 ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3885 
3886                 ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3887 
3888                 // Determine swizzle type if there are 2 or more swizzle type candidates
3889                 if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
3890                 {
3891                     if (ElemLib::IsBlockCompressed(pIn->format))
3892                     {
3893                         if (allowedSwSet.sw_D)
3894                         {
3895                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3896                         }
3897                         else
3898                         {
3899                             ADDR_ASSERT(allowedSwSet.sw_S);
3900                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3901                         }
3902                     }
3903                     else if (ElemLib::IsMacroPixelPacked(pIn->format))
3904                     {
3905                         if (allowedSwSet.sw_S)
3906                         {
3907                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3908                         }
3909                         else if (allowedSwSet.sw_D)
3910                         {
3911                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3912                         }
3913                         else
3914                         {
3915                             ADDR_ASSERT(allowedSwSet.sw_R);
3916                             allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3917                         }
3918                     }
3919                     else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3920                     {
3921                         if (pIn->flags.color && allowedSwSet.sw_D)
3922                         {
3923                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3924                         }
3925                         else if (allowedSwSet.sw_Z)
3926                         {
3927                             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3928                         }
3929                         else
3930                         {
3931                             ADDR_ASSERT(allowedSwSet.sw_S);
3932                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3933                         }
3934                     }
3935                     else
3936                     {
3937                         if (pIn->flags.rotated && allowedSwSet.sw_R)
3938                         {
3939                             allowedSwModeSet.value &= Gfx9RotateSwModeMask;
3940                         }
3941                         else if (allowedSwSet.sw_D)
3942                         {
3943                             allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
3944                         }
3945                         else if (allowedSwSet.sw_S)
3946                         {
3947                             allowedSwModeSet.value &= Gfx9StandardSwModeMask;
3948                         }
3949                         else
3950                         {
3951                             ADDR_ASSERT(allowedSwSet.sw_Z);
3952                             allowedSwModeSet.value &= Gfx9ZSwModeMask;
3953                         }
3954                     }
3955 
3956                     // Swizzle type should be determined.
3957                     ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3958                 }
3959 
3960                 // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + swizzle
3961                 // type combination. For example, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3962                 // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3963                 pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3964             }
3965 
3966             returnCode = ADDR_OK;
3967         }
3968         else
3969         {
3970             // Invalid combination...
3971             ADDR_ASSERT_ALWAYS();
3972         }
3973     }
3974     else
3975     {
3976         // Invalid combination...
3977         ADDR_ASSERT_ALWAYS();
3978     }
3979 
3980     return returnCode;
3981 }
3982 
3983 /**
3984 ************************************************************************************************************************
3985 *   Gfx9Lib::ComputeStereoInfo
3986 *
3987 *   @brief
3988 *       Compute height alignment and right eye pipeBankXor for stereo surface
3989 *
3990 *   @return
3991 *       Error code
3992 *
3993 ************************************************************************************************************************
3994 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut,UINT_32 * pHeightAlign) const3995 ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
3996     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
3997     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
3998     UINT_32*                                pHeightAlign
3999     ) const
4000 {
4001     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4002 
4003     UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
4004 
4005     if (eqIndex < m_numEquations)
4006     {
4007         if (IsXor(pIn->swizzleMode))
4008         {
4009             const UINT_32        blkSizeLog2       = GetBlockSizeLog2(pIn->swizzleMode);
4010             const UINT_32        numPipeBits       = GetPipeXorBits(blkSizeLog2);
4011             const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
4012             const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
4013             const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
4014             const ADDR_EQUATION *pEqToCheck        = &m_equationTable[eqIndex];
4015 
4016             ADDR_ASSERT(maxYCoordBlock256 ==
4017                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], Log2Size256, 1));
4018 
4019             const UINT_32 maxYCoordInBaseEquation =
4020                 (blkSizeLog2 - Log2Size256) / 2 + maxYCoordBlock256;
4021 
4022             ADDR_ASSERT(maxYCoordInBaseEquation ==
4023                         GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
4024 
4025             const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
4026 
4027             ADDR_ASSERT(maxYCoordInPipeXor ==
4028                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
4029 
4030             const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
4031                                                0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
4032 
4033             ADDR_ASSERT(maxYCoordInBankXor ==
4034                         GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
4035 
4036             const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
4037 
4038             if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
4039             {
4040                 *pHeightAlign = 1u << maxYCoordInPipeBankXor;
4041 
4042                 if (pOut->pStereoInfo != NULL)
4043                 {
4044                     pOut->pStereoInfo->rightSwizzle = 0;
4045 
4046                     if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
4047                     {
4048                         if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
4049                         {
4050                             pOut->pStereoInfo->rightSwizzle |= (1u << 1);
4051                         }
4052 
4053                         if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
4054                         {
4055                             pOut->pStereoInfo->rightSwizzle |=
4056                                 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
4057                         }
4058 
4059                         ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
4060                                     GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
4061                                                        numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
4062                     }
4063                 }
4064             }
4065         }
4066     }
4067     else
4068     {
4069         ADDR_ASSERT_ALWAYS();
4070         returnCode = ADDR_ERROR;
4071     }
4072 
4073     return returnCode;
4074 }
4075 
4076 /**
4077 ************************************************************************************************************************
4078 *   Gfx9Lib::HwlComputeSurfaceInfoTiled
4079 *
4080 *   @brief
4081 *       Internal function to calculate alignment for tiled surface
4082 *
4083 *   @return
4084 *       ADDR_E_RETURNCODE
4085 ************************************************************************************************************************
4086 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4087 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
4088      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4089      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4090      ) const
4091 {
4092     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
4093                                                                 &pOut->blockHeight,
4094                                                                 &pOut->blockSlices,
4095                                                                 pIn->bpp,
4096                                                                 pIn->numFrags,
4097                                                                 pIn->resourceType,
4098                                                                 pIn->swizzleMode);
4099 
4100     if (returnCode == ADDR_OK)
4101     {
4102         UINT_32 pitchAlignInElement = pOut->blockWidth;
4103 
4104         if ((IsTex2d(pIn->resourceType) == TRUE) &&
4105             (pIn->flags.display || pIn->flags.rotated) &&
4106             (pIn->numMipLevels <= 1) &&
4107             (pIn->numSamples <= 1) &&
4108             (pIn->numFrags <= 1))
4109         {
4110             // Display engine needs pitch align to be at least 32 pixels.
4111             pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
4112         }
4113 
4114         pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
4115 
4116         if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
4117         {
4118             if ((pIn->pitchInElement % pitchAlignInElement) != 0)
4119             {
4120                 returnCode = ADDR_INVALIDPARAMS;
4121             }
4122             else if (pIn->pitchInElement < pOut->pitch)
4123             {
4124                 returnCode = ADDR_INVALIDPARAMS;
4125             }
4126             else
4127             {
4128                 pOut->pitch = pIn->pitchInElement;
4129             }
4130         }
4131 
4132         UINT_32 heightAlign = 0;
4133 
4134         if (pIn->flags.qbStereo)
4135         {
4136             returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
4137         }
4138 
4139         if (returnCode == ADDR_OK)
4140         {
4141             pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
4142 
4143             if (heightAlign > 1)
4144             {
4145                 pOut->height = PowTwoAlign(pOut->height, heightAlign);
4146             }
4147 
4148             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
4149 
4150             pOut->epitchIsHeight   = FALSE;
4151             pOut->mipChainInTail   = FALSE;
4152             pOut->firstMipIdInTail = pIn->numMipLevels;
4153 
4154             pOut->mipChainPitch    = pOut->pitch;
4155             pOut->mipChainHeight   = pOut->height;
4156             pOut->mipChainSlice    = pOut->numSlices;
4157 
4158             if (pIn->numMipLevels > 1)
4159             {
4160                 pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
4161                                                          pIn->swizzleMode,
4162                                                          pIn->bpp,
4163                                                          pIn->width,
4164                                                          pIn->height,
4165                                                          pIn->numSlices,
4166                                                          pOut->blockWidth,
4167                                                          pOut->blockHeight,
4168                                                          pOut->blockSlices,
4169                                                          pIn->numMipLevels,
4170                                                          pOut->pMipInfo);
4171 
4172                 const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
4173 
4174                 if (endingMipId == 0)
4175                 {
4176                     const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
4177                                                            pIn->swizzleMode,
4178                                                            pOut->blockWidth,
4179                                                            pOut->blockHeight,
4180                                                            pOut->blockSlices);
4181 
4182                     pOut->epitchIsHeight = TRUE;
4183                     pOut->pitch          = tailMaxDim.w;
4184                     pOut->height         = tailMaxDim.h;
4185                     pOut->numSlices      = IsThick(pIn->resourceType, pIn->swizzleMode) ?
4186                                            tailMaxDim.d : pIn->numSlices;
4187                     pOut->mipChainInTail = TRUE;
4188                 }
4189                 else
4190                 {
4191                     UINT_32 mip0WidthInBlk  = pOut->pitch  / pOut->blockWidth;
4192                     UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
4193 
4194                     AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
4195                                                            pIn->swizzleMode,
4196                                                            mip0WidthInBlk,
4197                                                            mip0HeightInBlk,
4198                                                            pOut->numSlices / pOut->blockSlices);
4199                     if (majorMode == ADDR_MAJOR_Y)
4200                     {
4201                         UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
4202 
4203                         if ((mip1WidthInBlk == 1) && (endingMipId > 2))
4204                         {
4205                             mip1WidthInBlk++;
4206                         }
4207 
4208                         pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
4209 
4210                         pOut->epitchIsHeight = FALSE;
4211                     }
4212                     else
4213                     {
4214                         UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
4215 
4216                         if ((mip1HeightInBlk == 1) && (endingMipId > 2))
4217                         {
4218                             mip1HeightInBlk++;
4219                         }
4220 
4221                         pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
4222 
4223                         pOut->epitchIsHeight = TRUE;
4224                     }
4225                 }
4226 
4227                 if (pOut->pMipInfo != NULL)
4228                 {
4229                     UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
4230 
4231                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4232                     {
4233                         Dim3d   mipStartPos          = {0};
4234                         UINT_32 mipTailOffsetInBytes = 0;
4235 
4236                         mipStartPos = GetMipStartPos(pIn->resourceType,
4237                                                      pIn->swizzleMode,
4238                                                      pOut->pitch,
4239                                                      pOut->height,
4240                                                      pOut->numSlices,
4241                                                      pOut->blockWidth,
4242                                                      pOut->blockHeight,
4243                                                      pOut->blockSlices,
4244                                                      i,
4245                                                      elementBytesLog2,
4246                                                      &mipTailOffsetInBytes);
4247 
4248                         UINT_32 pitchInBlock     =
4249                             pOut->mipChainPitch / pOut->blockWidth;
4250                         UINT_32 sliceInBlock     =
4251                             (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
4252                         UINT_64 blockIndex       =
4253                             mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
4254                         UINT_64 macroBlockOffset =
4255                             blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
4256 
4257                         pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
4258                         pOut->pMipInfo[i].mipTailOffset    = mipTailOffsetInBytes;
4259                     }
4260                 }
4261             }
4262             else if (pOut->pMipInfo != NULL)
4263             {
4264                 pOut->pMipInfo[0].pitch  = pOut->pitch;
4265                 pOut->pMipInfo[0].height = pOut->height;
4266                 pOut->pMipInfo[0].depth  = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
4267                 pOut->pMipInfo[0].offset = 0;
4268             }
4269 
4270             pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
4271                               (pIn->bpp >> 3) * pIn->numFrags;
4272             pOut->surfSize  = pOut->sliceSize * pOut->mipChainSlice;
4273             pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
4274 
4275             if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
4276                 (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
4277                 (pIn->flags.texture == TRUE) &&
4278                 (pIn->flags.noMetadata == FALSE) &&
4279                 (pIn->flags.metaPipeUnaligned == FALSE))
4280             {
4281                 // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
4282                 // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
4283                 // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
4284                 // them, which may cause invalid metadata to be fetched.
4285                 pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes * m_se);
4286             }
4287 
4288             if (pIn->flags.prt)
4289             {
4290                 pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
4291             }
4292         }
4293     }
4294 
4295     return returnCode;
4296 }
4297 
4298 /**
4299 ************************************************************************************************************************
4300 *   Gfx9Lib::HwlComputeSurfaceInfoLinear
4301 *
4302 *   @brief
4303 *       Internal function to calculate alignment for linear surface
4304 *
4305 *   @return
4306 *       ADDR_E_RETURNCODE
4307 ************************************************************************************************************************
4308 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4309 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
4310      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4311      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4312      ) const
4313 {
4314     ADDR_E_RETURNCODE returnCode   = ADDR_OK;
4315     UINT_32           pitch        = 0;
4316     UINT_32           actualHeight = 0;
4317     UINT_32           elementBytes = pIn->bpp >> 3;
4318     const UINT_32     alignment    = pIn->flags.prt ? PrtAlignment : 256;
4319 
4320     if (IsTex1d(pIn->resourceType))
4321     {
4322         if (pIn->height > 1)
4323         {
4324             returnCode = ADDR_INVALIDPARAMS;
4325         }
4326         else
4327         {
4328             const UINT_32 pitchAlignInElement = alignment / elementBytes;
4329 
4330             pitch        = PowTwoAlign(pIn->width, pitchAlignInElement);
4331             actualHeight = pIn->numMipLevels;
4332 
4333             if (pIn->flags.prt == FALSE)
4334             {
4335                 returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
4336                                                         &pitch, &actualHeight);
4337             }
4338 
4339             if (returnCode == ADDR_OK)
4340             {
4341                 if (pOut->pMipInfo != NULL)
4342                 {
4343                     for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
4344                     {
4345                         pOut->pMipInfo[i].offset = pitch * elementBytes * i;
4346                         pOut->pMipInfo[i].pitch  = pitch;
4347                         pOut->pMipInfo[i].height = 1;
4348                         pOut->pMipInfo[i].depth  = 1;
4349                     }
4350                 }
4351             }
4352         }
4353     }
4354     else
4355     {
4356         returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
4357     }
4358 
4359     if ((pitch == 0) || (actualHeight == 0))
4360     {
4361         returnCode = ADDR_INVALIDPARAMS;
4362     }
4363 
4364     if (returnCode == ADDR_OK)
4365     {
4366         pOut->pitch          = pitch;
4367         pOut->height         = pIn->height;
4368         pOut->numSlices      = pIn->numSlices;
4369         pOut->mipChainPitch  = pitch;
4370         pOut->mipChainHeight = actualHeight;
4371         pOut->mipChainSlice  = pOut->numSlices;
4372         pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
4373         pOut->sliceSize      = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
4374         pOut->surfSize       = pOut->sliceSize * pOut->numSlices;
4375         pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
4376         pOut->blockWidth     = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4377         pOut->blockHeight    = 1;
4378         pOut->blockSlices    = 1;
4379     }
4380 
4381     // Post calculation validate
4382     ADDR_ASSERT(pOut->sliceSize > 0);
4383 
4384     return returnCode;
4385 }
4386 
4387 /**
4388 ************************************************************************************************************************
4389 *   Gfx9Lib::GetMipChainInfo
4390 *
4391 *   @brief
4392 *       Internal function to get out information about mip chain
4393 *
4394 *   @return
4395 *       Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
4396 ************************************************************************************************************************
4397 */
GetMipChainInfo(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 bpp,UINT_32 mip0Width,UINT_32 mip0Height,UINT_32 mip0Depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 numMipLevel,ADDR2_MIP_INFO * pMipInfo) const4398 UINT_32 Gfx9Lib::GetMipChainInfo(
4399     AddrResourceType  resourceType,
4400     AddrSwizzleMode   swizzleMode,
4401     UINT_32           bpp,
4402     UINT_32           mip0Width,
4403     UINT_32           mip0Height,
4404     UINT_32           mip0Depth,
4405     UINT_32           blockWidth,
4406     UINT_32           blockHeight,
4407     UINT_32           blockDepth,
4408     UINT_32           numMipLevel,
4409     ADDR2_MIP_INFO*   pMipInfo) const
4410 {
4411     const Dim3d tailMaxDim =
4412         GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4413 
4414     UINT_32 mipPitch         = mip0Width;
4415     UINT_32 mipHeight        = mip0Height;
4416     UINT_32 mipDepth         = IsTex3d(resourceType) ? mip0Depth : 1;
4417     UINT_32 offset           = 0;
4418     UINT_32 firstMipIdInTail = numMipLevel;
4419     BOOL_32 inTail           = FALSE;
4420     BOOL_32 finalDim         = FALSE;
4421     BOOL_32 is3dThick        = IsThick(resourceType, swizzleMode);
4422     BOOL_32 is3dThin         = IsTex3d(resourceType) && (is3dThick == FALSE);
4423 
4424     for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
4425     {
4426         if (inTail)
4427         {
4428             if (finalDim == FALSE)
4429             {
4430                 UINT_32 mipSize;
4431 
4432                 if (is3dThick)
4433                 {
4434                     mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
4435                 }
4436                 else
4437                 {
4438                     mipSize = mipPitch * mipHeight * (bpp >> 3);
4439                 }
4440 
4441                 if (mipSize <= 256)
4442                 {
4443                     UINT_32 index = Log2(bpp >> 3);
4444 
4445                     if (is3dThick)
4446                     {
4447                         mipPitch  = Block256_3dZ[index].w;
4448                         mipHeight = Block256_3dZ[index].h;
4449                         mipDepth  = Block256_3dZ[index].d;
4450                     }
4451                     else
4452                     {
4453                         mipPitch  = Block256_2d[index].w;
4454                         mipHeight = Block256_2d[index].h;
4455                     }
4456 
4457                     finalDim = TRUE;
4458                 }
4459             }
4460         }
4461         else
4462         {
4463             inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
4464                                  mipPitch, mipHeight, mipDepth);
4465 
4466             if (inTail)
4467             {
4468                 firstMipIdInTail = mipId;
4469                 mipPitch         = tailMaxDim.w;
4470                 mipHeight        = tailMaxDim.h;
4471 
4472                 if (is3dThick)
4473                 {
4474                     mipDepth = tailMaxDim.d;
4475                 }
4476             }
4477             else
4478             {
4479                 mipPitch  = PowTwoAlign(mipPitch,  blockWidth);
4480                 mipHeight = PowTwoAlign(mipHeight, blockHeight);
4481 
4482                 if (is3dThick)
4483                 {
4484                     mipDepth = PowTwoAlign(mipDepth,  blockDepth);
4485                 }
4486             }
4487         }
4488 
4489         if (pMipInfo != NULL)
4490         {
4491             pMipInfo[mipId].pitch  = mipPitch;
4492             pMipInfo[mipId].height = mipHeight;
4493             pMipInfo[mipId].depth  = mipDepth;
4494             pMipInfo[mipId].offset = offset;
4495         }
4496 
4497         offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
4498 
4499         if (finalDim)
4500         {
4501             if (is3dThin)
4502             {
4503                 mipDepth = Max(mipDepth >> 1, 1u);
4504             }
4505         }
4506         else
4507         {
4508             mipPitch  = Max(mipPitch >> 1, 1u);
4509             mipHeight = Max(mipHeight >> 1, 1u);
4510 
4511             if (is3dThick || is3dThin)
4512             {
4513                 mipDepth = Max(mipDepth >> 1, 1u);
4514             }
4515         }
4516     }
4517 
4518     return firstMipIdInTail;
4519 }
4520 
4521 /**
4522 ************************************************************************************************************************
4523 *   Gfx9Lib::GetMetaMiptailInfo
4524 *
4525 *   @brief
4526 *       Get mip tail coordinate information.
4527 *
4528 *   @return
4529 *       N/A
4530 ************************************************************************************************************************
4531 */
GetMetaMiptailInfo(ADDR2_META_MIP_INFO * pInfo,Dim3d mipCoord,UINT_32 numMipInTail,Dim3d * pMetaBlkDim) const4532 VOID Gfx9Lib::GetMetaMiptailInfo(
4533     ADDR2_META_MIP_INFO*    pInfo,          ///< [out] output structure to store per mip coord
4534     Dim3d                   mipCoord,       ///< [in] mip tail base coord
4535     UINT_32                 numMipInTail,   ///< [in] number of mips in tail
4536     Dim3d*                  pMetaBlkDim     ///< [in] meta block width/height/depth
4537     ) const
4538 {
4539     BOOL_32 isThick   = (pMetaBlkDim->d > 1);
4540     UINT_32 mipWidth  = pMetaBlkDim->w;
4541     UINT_32 mipHeight = pMetaBlkDim->h >> 1;
4542     UINT_32 mipDepth  = pMetaBlkDim->d;
4543     UINT_32 minInc;
4544 
4545     if (isThick)
4546     {
4547         minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
4548     }
4549     else if (pMetaBlkDim->h >= 1024)
4550     {
4551         minInc = 256;
4552     }
4553     else if (pMetaBlkDim->h == 512)
4554     {
4555         minInc = 128;
4556     }
4557     else
4558     {
4559         minInc = 64;
4560     }
4561 
4562     UINT_32 blk32MipId = 0xFFFFFFFF;
4563 
4564     for (UINT_32 mip = 0; mip < numMipInTail; mip++)
4565     {
4566         pInfo[mip].inMiptail = TRUE;
4567         pInfo[mip].startX = mipCoord.w;
4568         pInfo[mip].startY = mipCoord.h;
4569         pInfo[mip].startZ = mipCoord.d;
4570         pInfo[mip].width = mipWidth;
4571         pInfo[mip].height = mipHeight;
4572         pInfo[mip].depth = mipDepth;
4573 
4574         if (mipWidth <= 32)
4575         {
4576             if (blk32MipId == 0xFFFFFFFF)
4577             {
4578                 blk32MipId = mip;
4579             }
4580 
4581             mipCoord.w = pInfo[blk32MipId].startX;
4582             mipCoord.h = pInfo[blk32MipId].startY;
4583             mipCoord.d = pInfo[blk32MipId].startZ;
4584 
4585             switch (mip - blk32MipId)
4586             {
4587                 case 0:
4588                     mipCoord.w += 32;       // 16x16
4589                     break;
4590                 case 1:
4591                     mipCoord.h += 32;       // 8x8
4592                     break;
4593                 case 2:
4594                     mipCoord.h += 32;       // 4x4
4595                     mipCoord.w += 16;
4596                     break;
4597                 case 3:
4598                     mipCoord.h += 32;       // 2x2
4599                     mipCoord.w += 32;
4600                     break;
4601                 case 4:
4602                     mipCoord.h += 32;       // 1x1
4603                     mipCoord.w += 48;
4604                     break;
4605                 // The following are for BC/ASTC formats
4606                 case 5:
4607                     mipCoord.h += 48;       // 1/2 x 1/2
4608                     break;
4609                 case 6:
4610                     mipCoord.h += 48;       // 1/4 x 1/4
4611                     mipCoord.w += 16;
4612                     break;
4613                 case 7:
4614                     mipCoord.h += 48;       // 1/8 x 1/8
4615                     mipCoord.w += 32;
4616                     break;
4617                 case 8:
4618                     mipCoord.h += 48;       // 1/16 x 1/16
4619                     mipCoord.w += 48;
4620                     break;
4621                 default:
4622                     ADDR_ASSERT_ALWAYS();
4623                     break;
4624             }
4625 
4626             mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
4627             mipHeight = mipWidth;
4628 
4629             if (isThick)
4630             {
4631                 mipDepth = mipWidth;
4632             }
4633         }
4634         else
4635         {
4636             if (mipWidth <= minInc)
4637             {
4638                 // if we're below the minimal increment...
4639                 if (isThick)
4640                 {
4641                     // For 3d, just go in z direction
4642                     mipCoord.d += mipDepth;
4643                 }
4644                 else
4645                 {
4646                     // For 2d, first go across, then down
4647                     if ((mipWidth * 2) == minInc)
4648                     {
4649                         // if we're 2 mips below, that's when we go back in x, and down in y
4650                         mipCoord.w -= minInc;
4651                         mipCoord.h += minInc;
4652                     }
4653                     else
4654                     {
4655                         // otherwise, just go across in x
4656                         mipCoord.w += minInc;
4657                     }
4658                 }
4659             }
4660             else
4661             {
4662                 // On even mip, go down, otherwise, go across
4663                 if (mip & 1)
4664                 {
4665                     mipCoord.w += mipWidth;
4666                 }
4667                 else
4668                 {
4669                     mipCoord.h += mipHeight;
4670                 }
4671             }
4672             // Divide the width by 2
4673             mipWidth >>= 1;
4674             // After the first mip in tail, the mip is always a square
4675             mipHeight = mipWidth;
4676             // ...or for 3d, a cube
4677             if (isThick)
4678             {
4679                 mipDepth = mipWidth;
4680             }
4681         }
4682     }
4683 }
4684 
4685 /**
4686 ************************************************************************************************************************
4687 *   Gfx9Lib::GetMipStartPos
4688 *
4689 *   @brief
4690 *       Internal function to get out information about mip logical start position
4691 *
4692 *   @return
4693 *       logical start position in macro block width/heith/depth of one mip level within one slice
4694 ************************************************************************************************************************
4695 */
GetMipStartPos(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 width,UINT_32 height,UINT_32 depth,UINT_32 blockWidth,UINT_32 blockHeight,UINT_32 blockDepth,UINT_32 mipId,UINT_32 log2ElementBytes,UINT_32 * pMipTailBytesOffset) const4696 Dim3d Gfx9Lib::GetMipStartPos(
4697     AddrResourceType  resourceType,
4698     AddrSwizzleMode   swizzleMode,
4699     UINT_32           width,
4700     UINT_32           height,
4701     UINT_32           depth,
4702     UINT_32           blockWidth,
4703     UINT_32           blockHeight,
4704     UINT_32           blockDepth,
4705     UINT_32           mipId,
4706     UINT_32           log2ElementBytes,
4707     UINT_32*          pMipTailBytesOffset) const
4708 {
4709     Dim3d       mipStartPos = {0};
4710     const Dim3d tailMaxDim  = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
4711 
4712     // Report mip in tail if Mip0 is already in mip tail
4713     BOOL_32 inMipTail      = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
4714     UINT_32 log2BlkSize    = GetBlockSizeLog2(swizzleMode);
4715     UINT_32 mipIndexInTail = mipId;
4716 
4717     if (inMipTail == FALSE)
4718     {
4719         // Mip 0 dimension, unit in block
4720         UINT_32 mipWidthInBlk   = width  / blockWidth;
4721         UINT_32 mipHeightInBlk  = height / blockHeight;
4722         UINT_32 mipDepthInBlk   = depth  / blockDepth;
4723         AddrMajorMode majorMode = GetMajorMode(resourceType,
4724                                                swizzleMode,
4725                                                mipWidthInBlk,
4726                                                mipHeightInBlk,
4727                                                mipDepthInBlk);
4728 
4729         UINT_32 endingMip = mipId + 1;
4730 
4731         for (UINT_32 i = 1; i <= mipId; i++)
4732         {
4733             if ((i == 1) || (i == 3))
4734             {
4735                 if (majorMode == ADDR_MAJOR_Y)
4736                 {
4737                     mipStartPos.w += mipWidthInBlk;
4738                 }
4739                 else
4740                 {
4741                     mipStartPos.h += mipHeightInBlk;
4742                 }
4743             }
4744             else
4745             {
4746                 if (majorMode == ADDR_MAJOR_X)
4747                 {
4748                    mipStartPos.w += mipWidthInBlk;
4749                 }
4750                 else if (majorMode == ADDR_MAJOR_Y)
4751                 {
4752                    mipStartPos.h += mipHeightInBlk;
4753                 }
4754                 else
4755                 {
4756                    mipStartPos.d += mipDepthInBlk;
4757                 }
4758             }
4759 
4760             BOOL_32 inTail = FALSE;
4761 
4762             if (IsThick(resourceType, swizzleMode))
4763             {
4764                 UINT_32 dim = log2BlkSize % 3;
4765 
4766                 if (dim == 0)
4767                 {
4768                     inTail =
4769                         (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
4770                 }
4771                 else if (dim == 1)
4772                 {
4773                     inTail =
4774                         (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
4775                 }
4776                 else
4777                 {
4778                     inTail =
4779                         (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
4780                 }
4781             }
4782             else
4783             {
4784                 if (log2BlkSize & 1)
4785                 {
4786                     inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
4787                 }
4788                 else
4789                 {
4790                     inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
4791                 }
4792             }
4793 
4794             if (inTail)
4795             {
4796                 endingMip = i;
4797                 break;
4798             }
4799 
4800             mipWidthInBlk  = RoundHalf(mipWidthInBlk);
4801             mipHeightInBlk = RoundHalf(mipHeightInBlk);
4802             mipDepthInBlk  = RoundHalf(mipDepthInBlk);
4803         }
4804 
4805         if (mipId >= endingMip)
4806         {
4807             inMipTail      = TRUE;
4808             mipIndexInTail = mipId - endingMip;
4809         }
4810     }
4811 
4812     if (inMipTail)
4813     {
4814         UINT_32 index = mipIndexInTail + MaxMacroBits - log2BlkSize;
4815         ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
4816         *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
4817     }
4818 
4819     return mipStartPos;
4820 }
4821 
4822 /**
4823 ************************************************************************************************************************
4824 *   Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
4825 *
4826 *   @brief
4827 *       Internal function to calculate address from coord for tiled swizzle surface
4828 *
4829 *   @return
4830 *       ADDR_E_RETURNCODE
4831 ************************************************************************************************************************
4832 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4833 ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
4834      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4835      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4836      ) const
4837 {
4838     ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
4839     localIn.swizzleMode  = pIn->swizzleMode;
4840     localIn.flags        = pIn->flags;
4841     localIn.resourceType = pIn->resourceType;
4842     localIn.bpp          = pIn->bpp;
4843     localIn.width        = Max(pIn->unalignedWidth, 1u);
4844     localIn.height       = Max(pIn->unalignedHeight, 1u);
4845     localIn.numSlices    = Max(pIn->numSlices, 1u);
4846     localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4847     localIn.numSamples   = Max(pIn->numSamples, 1u);
4848     localIn.numFrags     = Max(pIn->numFrags, 1u);
4849     if (localIn.numMipLevels <= 1)
4850     {
4851         localIn.pitchInElement = pIn->pitchInElement;
4852     }
4853 
4854     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
4855     ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
4856 
4857     BOOL_32 valid = (returnCode == ADDR_OK) &&
4858                     (IsThin(pIn->resourceType, pIn->swizzleMode) ||
4859                      IsThick(pIn->resourceType, pIn->swizzleMode)) &&
4860                     ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
4861 
4862     if (valid)
4863     {
4864         UINT_32 log2ElementBytes   = Log2(pIn->bpp >> 3);
4865         Dim3d   mipStartPos        = {0};
4866         UINT_32 mipTailBytesOffset = 0;
4867 
4868         if (pIn->numMipLevels > 1)
4869         {
4870             // Mip-map chain cannot be MSAA surface
4871             ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
4872 
4873             mipStartPos = GetMipStartPos(pIn->resourceType,
4874                                          pIn->swizzleMode,
4875                                          localOut.pitch,
4876                                          localOut.height,
4877                                          localOut.numSlices,
4878                                          localOut.blockWidth,
4879                                          localOut.blockHeight,
4880                                          localOut.blockSlices,
4881                                          pIn->mipId,
4882                                          log2ElementBytes,
4883                                          &mipTailBytesOffset);
4884         }
4885 
4886         UINT_32 interleaveOffset = 0;
4887         UINT_32 pipeBits = 0;
4888         UINT_32 pipeXor = 0;
4889         UINT_32 bankBits = 0;
4890         UINT_32 bankXor = 0;
4891 
4892         if (IsThin(pIn->resourceType, pIn->swizzleMode))
4893         {
4894             UINT_32 blockOffset = 0;
4895             UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
4896 
4897             if (IsZOrderSwizzle(pIn->swizzleMode))
4898             {
4899                 // Morton generation
4900                 if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
4901                 {
4902                     UINT_32 totalLowBits = 6 - log2ElementBytes;
4903                     UINT_32 mortBits = totalLowBits / 2;
4904                     UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
4905                     // Are 9 bits enough?
4906                     UINT_32 highBitsValue =
4907                         MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
4908                     blockOffset = lowBitsValue | highBitsValue;
4909                     ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
4910                 }
4911                 else
4912                 {
4913                     blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
4914                 }
4915 
4916                 // Fill LSBs with sample bits
4917                 if (pIn->numSamples > 1)
4918                 {
4919                     blockOffset *= pIn->numSamples;
4920                     blockOffset |= pIn->sample;
4921                 }
4922 
4923                 // Shift according to BytesPP
4924                 blockOffset <<= log2ElementBytes;
4925             }
4926             else
4927             {
4928                 // Micro block offset
4929                 UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
4930                 blockOffset = microBlockOffset;
4931 
4932                 // Micro block dimension
4933                 ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
4934                 Dim2d microBlockDim = Block256_2d[log2ElementBytes];
4935                 // Morton generation, does 12 bit enough?
4936                 blockOffset |=
4937                     MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
4938 
4939                 // Sample bits start location
4940                 UINT_32 sampleStart = log2BlkSize - Log2(pIn->numSamples);
4941                 // Join sample bits information to the highest Macro block bits
4942                 if (IsNonPrtXor(pIn->swizzleMode))
4943                 {
4944                     // Non-prt-Xor : xor highest Macro block bits with sample bits
4945                     blockOffset = blockOffset ^ (pIn->sample << sampleStart);
4946                 }
4947                 else
4948                 {
4949                     // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
4950                     // after this op, the blockOffset only contains log2 Macro block size bits
4951                     blockOffset %= (1 << sampleStart);
4952                     blockOffset |= (pIn->sample << sampleStart);
4953                     ADDR_ASSERT((blockOffset >> log2BlkSize) == 0);
4954                 }
4955             }
4956 
4957             if (IsXor(pIn->swizzleMode))
4958             {
4959                 // Mask off bits above Macro block bits to keep page synonyms working for prt
4960                 if (IsPrt(pIn->swizzleMode))
4961                 {
4962                     blockOffset &= ((1 << log2BlkSize) - 1);
4963                 }
4964 
4965                 // Preserve offset inside pipe interleave
4966                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
4967                 blockOffset >>= m_pipeInterleaveLog2;
4968 
4969                 // Pipe/Se xor bits
4970                 pipeBits = GetPipeXorBits(log2BlkSize);
4971                 // Pipe xor
4972                 pipeXor = FoldXor2d(blockOffset, pipeBits);
4973                 blockOffset >>= pipeBits;
4974 
4975                 // Bank xor bits
4976                 bankBits = GetBankXorBits(log2BlkSize);
4977                 // Bank Xor
4978                 bankXor = FoldXor2d(blockOffset, bankBits);
4979                 blockOffset >>= bankBits;
4980 
4981                 // Put all the part back together
4982                 blockOffset <<= bankBits;
4983                 blockOffset |= bankXor;
4984                 blockOffset <<= pipeBits;
4985                 blockOffset |= pipeXor;
4986                 blockOffset <<= m_pipeInterleaveLog2;
4987                 blockOffset |= interleaveOffset;
4988             }
4989 
4990             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
4991             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
4992 
4993             blockOffset |= mipTailBytesOffset;
4994 
4995             if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
4996             {
4997                 // Apply slice xor if not MSAA/PRT
4998                 blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
4999                 blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
5000                                 (m_pipeInterleaveLog2 + pipeBits));
5001             }
5002 
5003             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
5004                                                   bankBits, pipeBits, &blockOffset);
5005 
5006             blockOffset %= (1 << log2BlkSize);
5007 
5008             UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
5009             UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
5010             UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
5011             UINT_64 macroBlockIndex =
5012                 (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
5013                 ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
5014                 ((pIn->x / localOut.blockWidth) + mipStartPos.w);
5015 
5016             pOut->addr = blockOffset | (macroBlockIndex << log2BlkSize);
5017         }
5018         else
5019         {
5020             UINT_32 log2BlkSize = GetBlockSizeLog2(pIn->swizzleMode);
5021 
5022             Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
5023 
5024             UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
5025                                               (pIn->y / microBlockDim.h),
5026                                               (pIn->slice / microBlockDim.d),
5027                                               8);
5028 
5029             blockOffset <<= 10;
5030             blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
5031 
5032             if (IsXor(pIn->swizzleMode))
5033             {
5034                 // Mask off bits above Macro block bits to keep page synonyms working for prt
5035                 if (IsPrt(pIn->swizzleMode))
5036                 {
5037                     blockOffset &= ((1 << log2BlkSize) - 1);
5038                 }
5039 
5040                 // Preserve offset inside pipe interleave
5041                 interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
5042                 blockOffset >>= m_pipeInterleaveLog2;
5043 
5044                 // Pipe/Se xor bits
5045                 pipeBits = GetPipeXorBits(log2BlkSize);
5046                 // Pipe xor
5047                 pipeXor = FoldXor3d(blockOffset, pipeBits);
5048                 blockOffset >>= pipeBits;
5049 
5050                 // Bank xor bits
5051                 bankBits = GetBankXorBits(log2BlkSize);
5052                 // Bank Xor
5053                 bankXor = FoldXor3d(blockOffset, bankBits);
5054                 blockOffset >>= bankBits;
5055 
5056                 // Put all the part back together
5057                 blockOffset <<= bankBits;
5058                 blockOffset |= bankXor;
5059                 blockOffset <<= pipeBits;
5060                 blockOffset |= pipeXor;
5061                 blockOffset <<= m_pipeInterleaveLog2;
5062                 blockOffset |= interleaveOffset;
5063             }
5064 
5065             ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
5066             ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2BlkSize)));
5067             blockOffset |= mipTailBytesOffset;
5068 
5069             returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
5070                                                   bankBits, pipeBits, &blockOffset);
5071 
5072             blockOffset %= (1 << log2BlkSize);
5073 
5074             UINT_32 xb = pIn->x / localOut.blockWidth  + mipStartPos.w;
5075             UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
5076             UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
5077 
5078             UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
5079             UINT_32 sliceSizeInBlock =
5080                 (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
5081             UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
5082 
5083             pOut->addr = blockOffset | (blockIndex << log2BlkSize);
5084         }
5085     }
5086     else
5087     {
5088         returnCode = ADDR_INVALIDPARAMS;
5089     }
5090 
5091     return returnCode;
5092 }
5093 
5094 /**
5095 ************************************************************************************************************************
5096 *   Gfx9Lib::ComputeSurfaceInfoLinear
5097 *
5098 *   @brief
5099 *       Internal function to calculate padding for linear swizzle 2D/3D surface
5100 *
5101 *   @return
5102 *       N/A
5103 ************************************************************************************************************************
5104 */
ComputeSurfaceLinearPadding(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pMipmap0PaddedWidth,UINT_32 * pSlice0PaddedHeight,ADDR2_MIP_INFO * pMipInfo) const5105 ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
5106     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,                    ///< [in] input srtucture
5107     UINT_32*                                pMipmap0PaddedWidth,    ///< [out] padded width in element
5108     UINT_32*                                pSlice0PaddedHeight,    ///< [out] padded height for HW
5109     ADDR2_MIP_INFO*                         pMipInfo                ///< [out] per mip information
5110     ) const
5111 {
5112     ADDR_E_RETURNCODE returnCode = ADDR_OK;
5113 
5114     UINT_32 elementBytes        = pIn->bpp >> 3;
5115     UINT_32 pitchAlignInElement = 0;
5116 
5117     if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
5118     {
5119         ADDR_ASSERT(pIn->numMipLevels <= 1);
5120         ADDR_ASSERT(pIn->numSlices <= 1);
5121         pitchAlignInElement = 1;
5122     }
5123     else
5124     {
5125         pitchAlignInElement = (256 / elementBytes);
5126     }
5127 
5128     UINT_32 mipChainWidth      = PowTwoAlign(pIn->width, pitchAlignInElement);
5129     UINT_32 slice0PaddedHeight = pIn->height;
5130 
5131     returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
5132                                             &mipChainWidth, &slice0PaddedHeight);
5133 
5134     if (returnCode == ADDR_OK)
5135     {
5136         UINT_32 mipChainHeight = 0;
5137         UINT_32 mipHeight      = pIn->height;
5138         UINT_32 mipDepth       = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
5139 
5140         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
5141         {
5142             if (pMipInfo != NULL)
5143             {
5144                 pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
5145                 pMipInfo[i].pitch  = mipChainWidth;
5146                 pMipInfo[i].height = mipHeight;
5147                 pMipInfo[i].depth  = mipDepth;
5148             }
5149 
5150             mipChainHeight += mipHeight;
5151             mipHeight = RoundHalf(mipHeight);
5152             mipHeight = Max(mipHeight, 1u);
5153         }
5154 
5155         *pMipmap0PaddedWidth = mipChainWidth;
5156         *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
5157     }
5158 
5159     return returnCode;
5160 }
5161 
5162 /**
5163 ************************************************************************************************************************
5164 *   Gfx9Lib::ComputeThinBlockDimension
5165 *
5166 *   @brief
5167 *       Internal function to get thin block width/height/depth in element from surface input params.
5168 *
5169 *   @return
5170 *       N/A
5171 ************************************************************************************************************************
5172 */
ComputeThinBlockDimension(UINT_32 * pWidth,UINT_32 * pHeight,UINT_32 * pDepth,UINT_32 bpp,UINT_32 numSamples,AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const5173 VOID Gfx9Lib::ComputeThinBlockDimension(
5174     UINT_32*         pWidth,
5175     UINT_32*         pHeight,
5176     UINT_32*         pDepth,
5177     UINT_32          bpp,
5178     UINT_32          numSamples,
5179     AddrResourceType resourceType,
5180     AddrSwizzleMode  swizzleMode) const
5181 {
5182     ADDR_ASSERT(IsThin(resourceType, swizzleMode));
5183 
5184     const UINT_32 log2BlkSize              = GetBlockSizeLog2(swizzleMode);
5185     const UINT_32 eleBytes                 = bpp >> 3;
5186     const UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
5187     const UINT_32 log2blkSizeIn256B        = log2BlkSize - 8;
5188     const UINT_32 widthAmp                 = log2blkSizeIn256B / 2;
5189     const UINT_32 heightAmp                = log2blkSizeIn256B - widthAmp;
5190 
5191     ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
5192 
5193     *pWidth  = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
5194     *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
5195     *pDepth  = 1;
5196 
5197     if (numSamples > 1)
5198     {
5199         const UINT_32 log2sample = Log2(numSamples);
5200         const UINT_32 q          = log2sample >> 1;
5201         const UINT_32 r          = log2sample & 1;
5202 
5203         if (log2BlkSize & 1)
5204         {
5205             *pWidth  >>= q;
5206             *pHeight >>= (q + r);
5207         }
5208         else
5209         {
5210             *pWidth  >>= (q + r);
5211             *pHeight >>= q;
5212         }
5213     }
5214 }
5215 
5216 } // V2
5217 } // Addr
5218