xref: /aosp_15_r20/external/mesa3d/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2 ************************************************************************************************************************
3 *
4 *  Copyright (C) 2007-2022 Advanced Micro Devices, Inc.  All rights reserved.
5 *  SPDX-License-Identifier: MIT
6 *
7 ***********************************************************************************************************************/
8 
9 /**
10 ************************************************************************************************************************
11 * @file  gfx10addrlib.cpp
12 * @brief Contain the implementation for the Gfx10Lib class.
13 ************************************************************************************************************************
14 */
15 
16 #include "gfx10addrlib.h"
17 #include "addrcommon.h"
18 #include "gfx10_gb_reg.h"
19 
20 #include "amdgpu_asic_addr.h"
21 
22 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
23 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
24 
25 namespace Addr
26 {
27 /**
28 ************************************************************************************************************************
29 *   Gfx10HwlInit
30 *
31 *   @brief
32 *       Creates an Gfx10Lib object.
33 *
34 *   @return
35 *       Returns an Gfx10Lib object pointer.
36 ************************************************************************************************************************
37 */
Gfx10HwlInit(const Client * pClient)38 Addr::Lib* Gfx10HwlInit(const Client* pClient)
39 {
40     return V2::Gfx10Lib::CreateObj(pClient);
41 }
42 
43 namespace V2
44 {
45 
46 ////////////////////////////////////////////////////////////////////////////////////////////////////
47 //                               Static Const Member
48 ////////////////////////////////////////////////////////////////////////////////////////////////////
49 
50 const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
51 {//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt Reserved
52     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR
53     {{0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_S
54     {{0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_256B_D
55     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
56 
57     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
58     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_S
59     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_4KB_D
60     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
61 
62     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
63     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_S
64     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}}, // ADDR_SW_64KB_D
65     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
66 
67     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
68     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
69     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
70     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
71 
72     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
73     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_S_T
74     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0,    0}}, // ADDR_SW_64KB_D_T
75     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
76 
77     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
78     {{0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_S_X
79     {{0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_4KB_D_X
80     {{0,    0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_4KB_R_X
81 
82     {{0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_Z_X
83     {{0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_S_X
84     {{0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0,    0}}, // ADDR_SW_64KB_D_X
85     {{0,    0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_64KB_R_X
86 
87     {{0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0,    0}}, // ADDR_SW_VAR_Z_X
88     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
89     {{0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // Reserved
90     {{0,    0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0}}, // ADDR_SW_VAR_R_X
91     {{1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}}, // ADDR_SW_LINEAR_GENERAL
92 };
93 
94 const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
95 
96 const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
97 const Dim3d Gfx10Lib::Block4K_Log2_3d[]  = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
98 
99 /**
100 ************************************************************************************************************************
101 *   Gfx10Lib::Gfx10Lib
102 *
103 *   @brief
104 *       Constructor
105 *
106 ************************************************************************************************************************
107 */
Gfx10Lib(const Client * pClient)108 Gfx10Lib::Gfx10Lib(const Client* pClient)
109     :
110     Lib(pClient),
111     m_numPkrLog2(0),
112     m_numSaLog2(0),
113     m_colorBaseIndex(0),
114     m_xmaskBaseIndex(0),
115     m_htileBaseIndex(0),
116     m_dccBaseIndex(0)
117 {
118     memset(&m_settings, 0, sizeof(m_settings));
119     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
120 }
121 
122 /**
123 ************************************************************************************************************************
124 *   Gfx10Lib::~Gfx10Lib
125 *
126 *   @brief
127 *       Destructor
128 ************************************************************************************************************************
129 */
~Gfx10Lib()130 Gfx10Lib::~Gfx10Lib()
131 {
132 }
133 
134 /**
135 ************************************************************************************************************************
136 *   Gfx10Lib::HwlComputeHtileInfo
137 *
138 *   @brief
139 *       Interface function stub of AddrComputeHtilenfo
140 *
141 *   @return
142 *       ADDR_E_RETURNCODE
143 ************************************************************************************************************************
144 */
HwlComputeHtileInfo(const ADDR2_COMPUTE_HTILE_INFO_INPUT * pIn,ADDR2_COMPUTE_HTILE_INFO_OUTPUT * pOut) const145 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
146     const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,    ///< [in] input structure
147     ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut    ///< [out] output structure
148     ) const
149 {
150     ADDR_E_RETURNCODE ret = ADDR_OK;
151 
152     if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
153          ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
154         (pIn->hTileFlags.pipeAligned != TRUE))
155     {
156         ret = ADDR_INVALIDPARAMS;
157     }
158     else
159     {
160         Dim3d         metaBlk     = {};
161         const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
162                                                    ADDR_RSRC_TEX_2D,
163                                                    pIn->swizzleMode,
164                                                    0,
165                                                    0,
166                                                    TRUE,
167                                                    &metaBlk);
168 
169         pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
170         pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
171         pOut->baseAlign     = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
172         pOut->metaBlkWidth  = metaBlk.w;
173         pOut->metaBlkHeight = metaBlk.h;
174 
175         if (pIn->numMipLevels > 1)
176         {
177             ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
178 
179             UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
180 
181             for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
182             {
183                 UINT_32 mipWidth, mipHeight;
184 
185                 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
186 
187                 mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
188                 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
189 
190                 const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
191                 const UINT_32 heightInM    = mipHeight / metaBlk.h;
192                 const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
193 
194                 if (pOut->pMipInfo != NULL)
195                 {
196                     pOut->pMipInfo[i].inMiptail = FALSE;
197                     pOut->pMipInfo[i].offset    = offset;
198                     pOut->pMipInfo[i].sliceSize = mipSliceSize;
199                 }
200 
201                 offset += mipSliceSize;
202             }
203 
204             pOut->sliceSize          = offset;
205             pOut->metaBlkNumPerSlice = offset / metaBlkSize;
206             pOut->htileBytes         = pOut->sliceSize * pIn->numSlices;
207 
208             if (pOut->pMipInfo != NULL)
209             {
210                 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
211                 {
212                     pOut->pMipInfo[i].inMiptail = TRUE;
213                     pOut->pMipInfo[i].offset    = 0;
214                     pOut->pMipInfo[i].sliceSize = 0;
215                 }
216 
217                 if (pIn->firstMipIdInTail != pIn->numMipLevels)
218                 {
219                     pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
220                 }
221             }
222         }
223         else
224         {
225             const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
226             const UINT_32 heightInM = pOut->height / metaBlk.h;
227 
228             pOut->metaBlkNumPerSlice    = pitchInM * heightInM;
229             pOut->sliceSize             = pOut->metaBlkNumPerSlice * metaBlkSize;
230             pOut->htileBytes            = pOut->sliceSize * pIn->numSlices;
231 
232             if (pOut->pMipInfo != NULL)
233             {
234                 pOut->pMipInfo[0].inMiptail = FALSE;
235                 pOut->pMipInfo[0].offset    = 0;
236                 pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
237             }
238         }
239 
240         // Get the HTILE address equation (copied from HtileAddrFromCoord).
241         // HTILE addressing depends on the number of samples, but this code doesn't support it yet.
242         const UINT_32 index = m_xmaskBaseIndex;
243         const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
244 
245         ADDR_C_ASSERT(sizeof(GFX10_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
246         pOut->equation.gfx10_bits = (UINT_16 *)GFX10_HTILE_SW_PATTERN[patIdxTable[index]];
247     }
248 
249     return ret;
250 }
251 
252 /**
253 ************************************************************************************************************************
254 *   Gfx10Lib::HwlComputeCmaskInfo
255 *
256 *   @brief
257 *       Interface function stub of AddrComputeCmaskInfo
258 *
259 *   @return
260 *       ADDR_E_RETURNCODE
261 ************************************************************************************************************************
262 */
HwlComputeCmaskInfo(const ADDR2_COMPUTE_CMASK_INFO_INPUT * pIn,ADDR2_COMPUTE_CMASK_INFO_OUTPUT * pOut) const263 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
264     const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,    ///< [in] input structure
265     ADDR2_COMPUTE_CMASK_INFO_OUTPUT*      pOut    ///< [out] output structure
266     ) const
267 {
268     ADDR_E_RETURNCODE ret = ADDR_OK;
269 
270     if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
271         (pIn->cMaskFlags.pipeAligned != TRUE)   ||
272         ((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
273          ((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
274     {
275         ret = ADDR_INVALIDPARAMS;
276     }
277     else
278     {
279         Dim3d         metaBlk     = {};
280         const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
281                                                    ADDR_RSRC_TEX_2D,
282                                                    pIn->swizzleMode,
283                                                    0,
284                                                    0,
285                                                    TRUE,
286                                                    &metaBlk);
287 
288         pOut->pitch         = PowTwoAlign(pIn->unalignedWidth,  metaBlk.w);
289         pOut->height        = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
290         pOut->baseAlign     = metaBlkSize;
291         pOut->metaBlkWidth  = metaBlk.w;
292         pOut->metaBlkHeight = metaBlk.h;
293 
294         if (pIn->numMipLevels > 1)
295         {
296             ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
297 
298             UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
299 
300             for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
301             {
302                 UINT_32 mipWidth, mipHeight;
303 
304                 GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
305 
306                 mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
307                 mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
308 
309                 const UINT_32 pitchInM  = mipWidth  / metaBlk.w;
310                 const UINT_32 heightInM = mipHeight / metaBlk.h;
311 
312                 if (pOut->pMipInfo != NULL)
313                 {
314                     pOut->pMipInfo[i].inMiptail = FALSE;
315                     pOut->pMipInfo[i].offset    = metaBlkPerSlice * metaBlkSize;
316                     pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
317                 }
318 
319                 metaBlkPerSlice += pitchInM * heightInM;
320             }
321 
322             pOut->metaBlkNumPerSlice = metaBlkPerSlice;
323 
324             if (pOut->pMipInfo != NULL)
325             {
326                 for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
327                 {
328                     pOut->pMipInfo[i].inMiptail = TRUE;
329                     pOut->pMipInfo[i].offset    = 0;
330                     pOut->pMipInfo[i].sliceSize = 0;
331                 }
332 
333                 if (pIn->firstMipIdInTail != pIn->numMipLevels)
334                 {
335                     pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
336                 }
337             }
338         }
339         else
340         {
341             const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
342             const UINT_32 heightInM = pOut->height / metaBlk.h;
343 
344             pOut->metaBlkNumPerSlice = pitchInM * heightInM;
345 
346             if (pOut->pMipInfo != NULL)
347             {
348                 pOut->pMipInfo[0].inMiptail = FALSE;
349                 pOut->pMipInfo[0].offset    = 0;
350                 pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
351             }
352         }
353 
354         pOut->sliceSize  = pOut->metaBlkNumPerSlice * metaBlkSize;
355         pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
356 
357         // Get the CMASK address equation (copied from CmaskAddrFromCoord)
358         const UINT_32  fmaskBpp      = GetFmaskBpp(1, 1);
359         const UINT_32  fmaskElemLog2 = Log2(fmaskBpp >> 3);
360         const UINT_32  index         = m_xmaskBaseIndex + fmaskElemLog2;
361         const UINT_8*  patIdxTable   =
362             (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
363             (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
364 
365         ADDR_C_ASSERT(sizeof(GFX10_CMASK_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
366         pOut->equation.gfx10_bits = (UINT_16*)GFX10_CMASK_SW_PATTERN[patIdxTable[index]];
367     }
368 
369     return ret;
370 }
371 
372 /**
373 ************************************************************************************************************************
374 *   Gfx10Lib::HwlComputeDccInfo
375 *
376 *   @brief
377 *       Interface function to compute DCC key info
378 *
379 *   @return
380 *       ADDR_E_RETURNCODE
381 ************************************************************************************************************************
382 */
HwlComputeDccInfo(const ADDR2_COMPUTE_DCCINFO_INPUT * pIn,ADDR2_COMPUTE_DCCINFO_OUTPUT * pOut) const383 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
384     const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,    ///< [in] input structure
385     ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut    ///< [out] output structure
386     ) const
387 {
388     ADDR_E_RETURNCODE ret = ADDR_OK;
389 
390     if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode))
391     {
392         // Hardware support dcc for 256 swizzle mode, but address lib will not support it because we only
393         // select 256 swizzle mode for small surface, and it's not helpful to enable dcc for small surface.
394         ret = ADDR_INVALIDPARAMS;
395     }
396     else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
397     {
398         // DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
399         ret = ADDR_INVALIDPARAMS;
400     }
401     else
402     {
403         const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
404 
405         {
406             // only SW_*_R_X surfaces may be DCC compressed when attached to the CB
407             ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
408 
409             const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
410 
411             pOut->compressBlkWidth  = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
412             pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
413             pOut->compressBlkDepth  = isThick ? Block256_3d[elemLog2].d : 1;
414         }
415 
416         if (ret == ADDR_OK)
417         {
418             Dim3d         metaBlk     = {};
419             const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
420             const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
421                                                        pIn->resourceType,
422                                                        pIn->swizzleMode,
423                                                        elemLog2,
424                                                        numFragLog2,
425                                                        pIn->dccKeyFlags.pipeAligned,
426                                                        &metaBlk);
427 
428             pOut->dccRamBaseAlign   = metaBlkSize;
429             pOut->metaBlkWidth      = metaBlk.w;
430             pOut->metaBlkHeight     = metaBlk.h;
431             pOut->metaBlkDepth      = metaBlk.d;
432             pOut->metaBlkSize       = metaBlkSize;
433 
434             pOut->pitch             = PowTwoAlign(pIn->unalignedWidth,     metaBlk.w);
435             pOut->height            = PowTwoAlign(pIn->unalignedHeight,    metaBlk.h);
436             pOut->depth             = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
437 
438             if (pIn->numMipLevels > 1)
439             {
440                 ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
441 
442                 UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
443 
444                 for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
445                 {
446                     UINT_32 mipWidth, mipHeight;
447 
448                     GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
449 
450                     mipWidth  = PowTwoAlign(mipWidth,  metaBlk.w);
451                     mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
452 
453                     const UINT_32 pitchInM     = mipWidth  / metaBlk.w;
454                     const UINT_32 heightInM    = mipHeight / metaBlk.h;
455                     const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
456 
457                     if (pOut->pMipInfo != NULL)
458                     {
459                         pOut->pMipInfo[i].inMiptail = FALSE;
460                         pOut->pMipInfo[i].offset    = offset;
461                         pOut->pMipInfo[i].sliceSize = mipSliceSize;
462                     }
463 
464                     offset += mipSliceSize;
465                 }
466 
467                 pOut->dccRamSliceSize    = offset;
468                 pOut->metaBlkNumPerSlice = offset / metaBlkSize;
469                 pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
470 
471                 if (pOut->pMipInfo != NULL)
472                 {
473                     for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
474                     {
475                         pOut->pMipInfo[i].inMiptail = TRUE;
476                         pOut->pMipInfo[i].offset    = 0;
477                         pOut->pMipInfo[i].sliceSize = 0;
478                     }
479 
480                     if (pIn->firstMipIdInTail != pIn->numMipLevels)
481                     {
482                         pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
483                     }
484                 }
485             }
486             else
487             {
488                 const UINT_32 pitchInM  = pOut->pitch  / metaBlk.w;
489                 const UINT_32 heightInM = pOut->height / metaBlk.h;
490 
491                 pOut->metaBlkNumPerSlice = pitchInM * heightInM;
492                 pOut->dccRamSliceSize    = pOut->metaBlkNumPerSlice * metaBlkSize;
493                 pOut->dccRamSize         = pOut->dccRamSliceSize * (pOut->depth  / metaBlk.d);
494 
495                 if (pOut->pMipInfo != NULL)
496                 {
497                     pOut->pMipInfo[0].inMiptail = FALSE;
498                     pOut->pMipInfo[0].offset    = 0;
499                     pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
500                 }
501             }
502 
503             // Get the DCC address equation (copied from DccAddrFromCoord)
504             const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
505             const UINT_32 numPipeLog2 = m_pipesLog2;
506             UINT_32       index       = m_dccBaseIndex + elemLog2;
507             const UINT_8* patIdxTable;
508 
509             if (m_settings.supportRbPlus)
510             {
511                 patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
512 
513                 if (pIn->dccKeyFlags.pipeAligned)
514                 {
515                     index += MaxNumOfBpp;
516 
517                     if (m_numPkrLog2 < 2)
518                     {
519                         index += m_pipesLog2 * MaxNumOfBpp;
520                     }
521                     else
522                     {
523                         // 4 groups for "m_numPkrLog2 < 2" case
524                         index += 4 * MaxNumOfBpp;
525 
526                         const UINT_32 dccPipePerPkr = 3;
527 
528                         index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
529                                  (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
530                     }
531                 }
532             }
533             else
534             {
535                 patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
536 
537                 if (pIn->dccKeyFlags.pipeAligned)
538                 {
539                     index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
540                 }
541                 else
542                 {
543                     index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
544                 }
545             }
546 
547             ADDR_C_ASSERT(sizeof(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
548             pOut->equation.gfx10_bits = (UINT_16*)GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]];
549         }
550     }
551 
552     return ret;
553 }
554 
555 /**
556 ************************************************************************************************************************
557 *   Gfx10Lib::HwlComputeCmaskAddrFromCoord
558 *
559 *   @brief
560 *       Interface function stub of AddrComputeCmaskAddrFromCoord
561 *
562 *   @return
563 *       ADDR_E_RETURNCODE
564 ************************************************************************************************************************
565 */
HwlComputeCmaskAddrFromCoord(const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT * pOut)566 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
567     const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
568     ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
569 {
570     // Only support pipe aligned CMask
571     ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);
572 
573     ADDR2_COMPUTE_CMASK_INFO_INPUT input = {};
574     input.size            = sizeof(input);
575     input.cMaskFlags      = pIn->cMaskFlags;
576     input.colorFlags      = pIn->colorFlags;
577     input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
578     input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
579     input.numSlices       = Max(pIn->numSlices,       1u);
580     input.swizzleMode     = pIn->swizzleMode;
581     input.resourceType    = pIn->resourceType;
582 
583     ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {};
584     output.size = sizeof(output);
585 
586     ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
587 
588     if (returnCode == ADDR_OK)
589     {
590         const UINT_32  fmaskBpp      = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
591         const UINT_32  fmaskElemLog2 = Log2(fmaskBpp >> 3);
592         const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
593         const UINT_32  index         = m_xmaskBaseIndex + fmaskElemLog2;
594         const UINT_8*  patIdxTable   =
595             (pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
596             (m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
597 
598         const UINT_32  blkSizeLog2  = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
599         const UINT_32  blkMask      = (1 << blkSizeLog2) - 1;
600         const UINT_32  blkOffset    = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]],
601                                                                       blkSizeLog2 + 1, // +1 for nibble offset
602                                                                       pIn->x,
603                                                                       pIn->y,
604                                                                       pIn->slice,
605                                                                       0);
606         const UINT_32 xb       = pIn->x / output.metaBlkWidth;
607         const UINT_32 yb       = pIn->y / output.metaBlkHeight;
608         const UINT_32 pb       = output.pitch / output.metaBlkWidth;
609         const UINT_32 blkIndex = (yb * pb) + xb;
610         const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
611 
612         pOut->addr = (output.sliceSize * pIn->slice) +
613                      (blkIndex * (1 << blkSizeLog2)) +
614                      ((blkOffset >> 1) ^ pipeXor);
615         pOut->bitPosition = (blkOffset & 1) << 2;
616     }
617 
618     return returnCode;
619 }
620 
621 /**
622 ************************************************************************************************************************
623 *   Gfx10Lib::HwlComputeHtileAddrFromCoord
624 *
625 *   @brief
626 *       Interface function stub of AddrComputeHtileAddrFromCoord
627 *
628 *   @return
629 *       ADDR_E_RETURNCODE
630 ************************************************************************************************************************
631 */
HwlComputeHtileAddrFromCoord(const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT * pOut)632 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
633     const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
634     ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
635 {
636     ADDR_E_RETURNCODE returnCode = ADDR_OK;
637 
638     if (pIn->numMipLevels > 1)
639     {
640         returnCode = ADDR_NOTIMPLEMENTED;
641     }
642     else
643     {
644         ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
645         input.size            = sizeof(input);
646         input.hTileFlags      = pIn->hTileFlags;
647         input.depthFlags      = pIn->depthflags;
648         input.swizzleMode     = pIn->swizzleMode;
649         input.unalignedWidth  = Max(pIn->unalignedWidth,  1u);
650         input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
651         input.numSlices       = Max(pIn->numSlices,       1u);
652         input.numMipLevels    = 1;
653 
654         ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
655         output.size = sizeof(output);
656 
657         returnCode = ComputeHtileInfo(&input, &output);
658 
659         if (returnCode == ADDR_OK)
660         {
661             const UINT_32  numSampleLog2 = Log2(pIn->numSamples);
662             const UINT_32  pipeMask      = (1 << m_pipesLog2) - 1;
663             const UINT_32  index         = m_htileBaseIndex + numSampleLog2;
664             const UINT_8*  patIdxTable   = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
665 
666             const UINT_32  blkSizeLog2   = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
667             const UINT_32  blkMask       = (1 << blkSizeLog2) - 1;
668             const UINT_32  blkOffset     = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]],
669                                                                            blkSizeLog2 + 1, // +1 for nibble offset
670                                                                            pIn->x,
671                                                                            pIn->y,
672                                                                            pIn->slice,
673                                                                            0);
674             const UINT_32 xb       = pIn->x / output.metaBlkWidth;
675             const UINT_32 yb       = pIn->y / output.metaBlkHeight;
676             const UINT_32 pb       = output.pitch / output.metaBlkWidth;
677             const UINT_32 blkIndex = (yb * pb) + xb;
678             const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
679 
680             pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
681                          (blkIndex * (1 << blkSizeLog2)) +
682                          ((blkOffset >> 1) ^ pipeXor);
683         }
684     }
685 
686     return returnCode;
687 }
688 
689 /**
690 ************************************************************************************************************************
691 *   Gfx10Lib::HwlComputeHtileCoordFromAddr
692 *
693 *   @brief
694 *       Interface function stub of AddrComputeHtileCoordFromAddr
695 *
696 *   @return
697 *       ADDR_E_RETURNCODE
698 ************************************************************************************************************************
699 */
HwlComputeHtileCoordFromAddr(const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT * pIn,ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT * pOut)700 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
701     const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
702     ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut)   ///< [out] output structure
703 {
704     ADDR_NOT_IMPLEMENTED();
705 
706     return ADDR_OK;
707 }
708 
709 /**
710 ************************************************************************************************************************
711 *   Gfx10Lib::HwlSupportComputeDccAddrFromCoord
712 *
713 *   @brief
714 *       Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
715 *
716 *   @return
717 *       ADDR_E_RETURNCODE
718 ************************************************************************************************************************
719 */
HwlSupportComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn)720 ADDR_E_RETURNCODE Gfx10Lib::HwlSupportComputeDccAddrFromCoord(
721     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
722 {
723     ADDR_E_RETURNCODE returnCode = ADDR_OK;
724 
725     if ((pIn->resourceType       != ADDR_RSRC_TEX_2D) ||
726         (pIn->swizzleMode        != ADDR_SW_64KB_R_X) ||
727         (pIn->dccKeyFlags.linear == TRUE)             ||
728         (pIn->numFrags           >  1)                ||
729         (pIn->numMipLevels       >  1)                ||
730         (pIn->mipId              >  0))
731     {
732         returnCode = ADDR_NOTSUPPORTED;
733     }
734     else if ((pIn->pitch == 0)         ||
735              (pIn->metaBlkWidth == 0)  ||
736              (pIn->metaBlkHeight == 0) ||
737              (pIn->slice > 0 && pIn->dccRamSliceSize == 0))
738     {
739         returnCode = ADDR_NOTSUPPORTED;
740     }
741 
742     return returnCode;
743 }
744 
745 /**
746 ************************************************************************************************************************
747 *   Gfx10Lib::HwlComputeDccAddrFromCoord
748 *
749 *   @brief
750 *       Interface function stub of AddrComputeDccAddrFromCoord
751 *
752 *   @return
753 *       N/A
754 ************************************************************************************************************************
755 */
HwlComputeDccAddrFromCoord(const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT * pOut)756 VOID Gfx10Lib::HwlComputeDccAddrFromCoord(
757     const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,  ///< [in] input structure
758     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut) ///< [out] output structure
759 {
760     const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
761     const UINT_32 numPipeLog2 = m_pipesLog2;
762     const UINT_32 pipeMask    = (1 << numPipeLog2) - 1;
763     UINT_32       index       = m_dccBaseIndex + elemLog2;
764     const UINT_8* patIdxTable;
765 
766     if (m_settings.supportRbPlus)
767     {
768         patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
769 
770         if (pIn->dccKeyFlags.pipeAligned)
771         {
772             index += MaxNumOfBpp;
773 
774             if (m_numPkrLog2 < 2)
775             {
776                 index += m_pipesLog2 * MaxNumOfBpp;
777             }
778             else
779             {
780                 // 4 groups for "m_numPkrLog2 < 2" case
781                 index += 4 * MaxNumOfBpp;
782 
783                 const UINT_32 dccPipePerPkr = 3;
784 
785                 index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
786                          (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
787             }
788         }
789     }
790     else
791     {
792         patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
793 
794         if (pIn->dccKeyFlags.pipeAligned)
795         {
796             index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
797         }
798         else
799         {
800             index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
801         }
802     }
803 
804     const UINT_32  blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
805     const UINT_32  blkMask     = (1 << blkSizeLog2) - 1;
806     const UINT_32  blkOffset   =
807         ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
808                                         blkSizeLog2 + 1, // +1 for nibble offset
809                                         pIn->x,
810                                         pIn->y,
811                                         pIn->slice,
812                                         0);
813     const UINT_32 xb       = pIn->x / pIn->metaBlkWidth;
814     const UINT_32 yb       = pIn->y / pIn->metaBlkHeight;
815     const UINT_32 pb       = pIn->pitch / pIn->metaBlkWidth;
816     const UINT_32 blkIndex = (yb * pb) + xb;
817     const UINT_32 pipeXor  = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
818 
819     pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
820                  (blkIndex * (1 << blkSizeLog2)) +
821                  ((blkOffset >> 1) ^ pipeXor);
822 }
823 
824 /**
825 ************************************************************************************************************************
826 *   Gfx10Lib::HwlInitGlobalParams
827 *
828 *   @brief
829 *       Initializes global parameters
830 *
831 *   @return
832 *       TRUE if all settings are valid
833 *
834 ************************************************************************************************************************
835 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)836 BOOL_32 Gfx10Lib::HwlInitGlobalParams(
837     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
838 {
839     BOOL_32              valid = TRUE;
840     GB_ADDR_CONFIG_GFX10 gbAddrConfig;
841 
842     gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
843 
844     // These values are copied from CModel code
845     switch (gbAddrConfig.bits.NUM_PIPES)
846     {
847         case ADDR_CONFIG_1_PIPE:
848             m_pipes     = 1;
849             m_pipesLog2 = 0;
850             break;
851         case ADDR_CONFIG_2_PIPE:
852             m_pipes     = 2;
853             m_pipesLog2 = 1;
854             break;
855         case ADDR_CONFIG_4_PIPE:
856             m_pipes     = 4;
857             m_pipesLog2 = 2;
858             break;
859         case ADDR_CONFIG_8_PIPE:
860             m_pipes     = 8;
861             m_pipesLog2 = 3;
862             break;
863         case ADDR_CONFIG_16_PIPE:
864             m_pipes     = 16;
865             m_pipesLog2 = 4;
866             break;
867         case ADDR_CONFIG_32_PIPE:
868             m_pipes     = 32;
869             m_pipesLog2 = 5;
870             break;
871         case ADDR_CONFIG_64_PIPE:
872             m_pipes     = 64;
873             m_pipesLog2 = 6;
874             break;
875         default:
876             ADDR_ASSERT_ALWAYS();
877             valid = FALSE;
878             break;
879     }
880 
881     switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
882     {
883         case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
884             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
885             m_pipeInterleaveLog2  = 8;
886             break;
887         case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
888             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
889             m_pipeInterleaveLog2  = 9;
890             break;
891         case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
892             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
893             m_pipeInterleaveLog2  = 10;
894             break;
895         case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
896             m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
897             m_pipeInterleaveLog2  = 11;
898             break;
899         default:
900             ADDR_ASSERT_ALWAYS();
901             valid = FALSE;
902             break;
903     }
904 
905     // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
906     // any larger value requires a post-process (left shift) on the output pipeBankXor bits.
907     // And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
908     ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
909 
910     switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
911     {
912         case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
913             m_maxCompFrag     = 1;
914             m_maxCompFragLog2 = 0;
915             break;
916         case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
917             m_maxCompFrag     = 2;
918             m_maxCompFragLog2 = 1;
919             break;
920         case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
921             m_maxCompFrag     = 4;
922             m_maxCompFragLog2 = 2;
923             break;
924         case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
925             m_maxCompFrag     = 8;
926             m_maxCompFragLog2 = 3;
927             break;
928         default:
929             ADDR_ASSERT_ALWAYS();
930             valid = FALSE;
931             break;
932     }
933 
934     {
935         // Skip unaligned case
936         m_xmaskBaseIndex += MaxNumOfBppCMask;
937         m_htileBaseIndex += MaxNumOfAA;
938 
939         m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfBppCMask;
940         m_htileBaseIndex += m_pipesLog2 * MaxNumOfAA;
941         m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
942 
943         if (m_settings.supportRbPlus)
944         {
945             m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
946             m_numSaLog2  = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
947 
948             ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
949 
950             ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) ==
951                           sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0]));
952 
953             if (m_numPkrLog2 >= 2)
954             {
955                 m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
956                 m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfBppCMask;
957                 m_htileBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
958             }
959         }
960         else
961         {
962             const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
963                                         static_cast<UINT_32>(ADDR_CONFIG_1_PIPE)  +
964                                         1;
965 
966             ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
967             ADDR_C_ASSERT(sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0]) ==
968                           (numPipeType + 1) * MaxNumOfBppCMask);
969         }
970     }
971 
972     if (m_settings.supportRbPlus)
973     {
974         // VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
975         // corresponding SW_64KB_* mode
976         m_blockVarSizeLog2 = m_pipesLog2 + 14;
977     }
978 
979     if (valid)
980     {
981         InitEquationTable();
982     }
983 
984     return valid;
985 }
986 
987 /**
988 ************************************************************************************************************************
989 *   Gfx10Lib::HwlConvertChipFamily
990 *
991 *   @brief
992 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
993 *   @return
994 *       ChipFamily
995 ************************************************************************************************************************
996 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)997 ChipFamily Gfx10Lib::HwlConvertChipFamily(
998     UINT_32 chipFamily,        ///< [in] chip family defined in atiih.h
999     UINT_32 chipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1000 {
1001     ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
1002 
1003     m_settings.dccUnsup3DSwDis  = 1;
1004     m_settings.dsMipmapHtileFix = 1;
1005 
1006     switch (chipFamily)
1007     {
1008         case FAMILY_NV:
1009             if (ASICREV_IS_NAVI10_P(chipRevision))
1010             {
1011                 m_settings.dsMipmapHtileFix = 0;
1012                 m_settings.isDcn20          = 1;
1013             }
1014 
1015             if (ASICREV_IS_NAVI12_P(chipRevision))
1016             {
1017                 m_settings.isDcn20 = 1;
1018             }
1019 
1020             if (ASICREV_IS_NAVI14_M(chipRevision))
1021             {
1022                 m_settings.isDcn20 = 1;
1023             }
1024 
1025             if (ASICREV_IS_NAVI21_M(chipRevision))
1026             {
1027                 m_settings.supportRbPlus   = 1;
1028                 m_settings.dccUnsup3DSwDis = 0;
1029             }
1030 
1031             if (ASICREV_IS_NAVI22_P(chipRevision))
1032             {
1033                 m_settings.supportRbPlus   = 1;
1034                 m_settings.dccUnsup3DSwDis = 0;
1035             }
1036 
1037             if (ASICREV_IS_NAVI23_P(chipRevision))
1038             {
1039                 m_settings.supportRbPlus   = 1;
1040                 m_settings.dccUnsup3DSwDis = 0;
1041             }
1042 
1043             if (ASICREV_IS_NAVI24_P(chipRevision))
1044             {
1045                 m_settings.supportRbPlus   = 1;
1046                 m_settings.dccUnsup3DSwDis = 0;
1047             }
1048             break;
1049 
1050         case FAMILY_VGH:
1051             if (ASICREV_IS_VANGOGH(chipRevision))
1052             {
1053                 m_settings.supportRbPlus   = 1;
1054                 m_settings.dccUnsup3DSwDis = 0;
1055             }
1056             else
1057             {
1058                 ADDR_ASSERT(!"Unknown chip revision");
1059             }
1060             break;
1061         case FAMILY_RMB:
1062             if (ASICREV_IS_REMBRANDT(chipRevision))
1063             {
1064                 m_settings.supportRbPlus   = 1;
1065                 m_settings.dccUnsup3DSwDis = 0;
1066             }
1067             else
1068             {
1069                 ADDR_ASSERT(!"Unknown chip revision");
1070             }
1071             break;
1072         case FAMILY_RPL:
1073             if (ASICREV_IS_RAPHAEL(chipRevision))
1074             {
1075                 m_settings.supportRbPlus   = 1;
1076                 m_settings.dccUnsup3DSwDis = 0;
1077             }
1078             break;
1079         case FAMILY_MDN:
1080             if (ASICREV_IS_MENDOCINO(chipRevision))
1081             {
1082                 m_settings.supportRbPlus   = 1;
1083                 m_settings.dccUnsup3DSwDis = 0;
1084             }
1085             else
1086             {
1087                 ADDR_ASSERT(!"Unknown chip revision");
1088             }
1089             break;
1090         default:
1091             ADDR_ASSERT(!"Unknown chip family");
1092             break;
1093     }
1094 
1095     m_configFlags.use32bppFor422Fmt = TRUE;
1096 
1097     return family;
1098 }
1099 
1100 /**
1101 ************************************************************************************************************************
1102 *   Gfx10Lib::GetBlk256SizeLog2
1103 *
1104 *   @brief
1105 *       Get block 256 size
1106 *
1107 *   @return
1108 *       N/A
1109 ************************************************************************************************************************
1110 */
GetBlk256SizeLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1111 void Gfx10Lib::GetBlk256SizeLog2(
1112     AddrResourceType resourceType,      ///< [in] Resource type
1113     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1114     UINT_32          elemLog2,          ///< [in] element size log2
1115     UINT_32          numSamplesLog2,    ///< [in] number of samples
1116     Dim3d*           pBlock             ///< [out] block size
1117     ) const
1118 {
1119     if (IsThin(resourceType, swizzleMode))
1120     {
1121         UINT_32 blockBits = 8 - elemLog2;
1122 
1123         if (IsZOrderSwizzle(swizzleMode))
1124         {
1125             blockBits -= numSamplesLog2;
1126         }
1127 
1128         pBlock->w = (blockBits >> 1) + (blockBits & 1);
1129         pBlock->h = (blockBits >> 1);
1130         pBlock->d = 0;
1131     }
1132     else
1133     {
1134         ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1135 
1136         UINT_32 blockBits = 8 - elemLog2;
1137 
1138         pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1139         pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1140         pBlock->h = (blockBits / 3);
1141     }
1142 }
1143 
1144 /**
1145 ************************************************************************************************************************
1146 *   Gfx10Lib::GetCompressedBlockSizeLog2
1147 *
1148 *   @brief
1149 *       Get compress block size
1150 *
1151 *   @return
1152 *       N/A
1153 ************************************************************************************************************************
1154 */
GetCompressedBlockSizeLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,Dim3d * pBlock) const1155 void Gfx10Lib::GetCompressedBlockSizeLog2(
1156     Gfx10DataType    dataType,          ///< [in] Data type
1157     AddrResourceType resourceType,      ///< [in] Resource type
1158     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1159     UINT_32          elemLog2,          ///< [in] element size log2
1160     UINT_32          numSamplesLog2,    ///< [in] number of samples
1161     Dim3d*           pBlock             ///< [out] block size
1162     ) const
1163 {
1164     if (dataType == Gfx10DataColor)
1165     {
1166         GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1167     }
1168     else
1169     {
1170         ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1171         pBlock->w = 3;
1172         pBlock->h = 3;
1173         pBlock->d = 0;
1174     }
1175 }
1176 
1177 /**
1178 ************************************************************************************************************************
1179 *   Gfx10Lib::GetMetaOverlapLog2
1180 *
1181 *   @brief
1182 *       Get meta block overlap
1183 *
1184 *   @return
1185 *       N/A
1186 ************************************************************************************************************************
1187 */
GetMetaOverlapLog2(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2) const1188 INT_32 Gfx10Lib::GetMetaOverlapLog2(
1189     Gfx10DataType    dataType,          ///< [in] Data type
1190     AddrResourceType resourceType,      ///< [in] Resource type
1191     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1192     UINT_32          elemLog2,          ///< [in] element size log2
1193     UINT_32          numSamplesLog2     ///< [in] number of samples
1194     ) const
1195 {
1196     Dim3d compBlock;
1197     Dim3d microBlock;
1198 
1199     GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1200     GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);
1201 
1202     const INT_32 compSizeLog2   = compBlock.w  + compBlock.h  + compBlock.d;
1203     const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1204     const INT_32 maxSizeLog2    = Max(compSizeLog2, blk256SizeLog2);
1205     const INT_32 numPipesLog2   = GetEffectiveNumPipes();
1206     INT_32       overlap        = numPipesLog2 - maxSizeLog2;
1207 
1208     if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1209     {
1210         overlap++;
1211     }
1212 
1213     // In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1214     if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1215     {
1216         overlap--;
1217     }
1218     overlap = Max(overlap, 0);
1219     return overlap;
1220 }
1221 
1222 /**
1223 ************************************************************************************************************************
1224 *   Gfx10Lib::Get3DMetaOverlapLog2
1225 *
1226 *   @brief
1227 *       Get 3d meta block overlap
1228 *
1229 *   @return
1230 *       N/A
1231 ************************************************************************************************************************
1232 */
Get3DMetaOverlapLog2(AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2) const1233 INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1234     AddrResourceType resourceType,      ///< [in] Resource type
1235     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1236     UINT_32          elemLog2           ///< [in] element size log2
1237     ) const
1238 {
1239     Dim3d microBlock;
1240     GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);
1241 
1242     INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1243 
1244     if (m_settings.supportRbPlus)
1245     {
1246         overlap++;
1247     }
1248 
1249     if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1250     {
1251         overlap = 0;
1252     }
1253     return overlap;
1254 }
1255 
1256 /**
1257 ************************************************************************************************************************
1258 *   Gfx10Lib::GetPipeRotateAmount
1259 *
1260 *   @brief
1261 *       Get pipe rotate amount
1262 *
1263 *   @return
1264 *       Pipe rotate amount
1265 ************************************************************************************************************************
1266 */
1267 
GetPipeRotateAmount(AddrResourceType resourceType,AddrSwizzleMode swizzleMode) const1268 INT_32 Gfx10Lib::GetPipeRotateAmount(
1269     AddrResourceType resourceType,      ///< [in] Resource type
1270     AddrSwizzleMode  swizzleMode        ///< [in] Swizzle mode
1271     ) const
1272 {
1273     INT_32 amount = 0;
1274 
1275     if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1276     {
1277         amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1278                  1 : m_pipesLog2 - (m_numSaLog2 + 1);
1279     }
1280 
1281     return amount;
1282 }
1283 
1284 /**
1285 ************************************************************************************************************************
1286 *   Gfx10Lib::GetMetaBlkSize
1287 *
1288 *   @brief
1289 *       Get metadata block size
1290 *
1291 *   @return
1292 *       Meta block size
1293 ************************************************************************************************************************
1294 */
GetMetaBlkSize(Gfx10DataType dataType,AddrResourceType resourceType,AddrSwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numSamplesLog2,BOOL_32 pipeAlign,Dim3d * pBlock) const1295 UINT_32 Gfx10Lib::GetMetaBlkSize(
1296     Gfx10DataType    dataType,          ///< [in] Data type
1297     AddrResourceType resourceType,      ///< [in] Resource type
1298     AddrSwizzleMode  swizzleMode,       ///< [in] Swizzle mode
1299     UINT_32          elemLog2,          ///< [in] element size log2
1300     UINT_32          numSamplesLog2,    ///< [in] number of samples
1301     BOOL_32          pipeAlign,         ///< [in] pipe align
1302     Dim3d*           pBlock             ///< [out] block size
1303     ) const
1304 {
1305     INT_32 metablkSizeLog2;
1306 
1307     {
1308         const INT_32 metaElemSizeLog2   = GetMetaElementSizeLog2(dataType);
1309         const INT_32 metaCacheSizeLog2  = GetMetaCacheSizeLog2(dataType);
1310         const INT_32 compBlkSizeLog2    = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1311         const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1312                                           numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1313         const INT_32 dataBlkSizeLog2    = GetBlockSizeLog2(swizzleMode);
1314         INT_32       numPipesLog2       = m_pipesLog2;
1315 
1316         if (IsThin(resourceType, swizzleMode))
1317         {
1318             if ((pipeAlign == FALSE) ||
1319                 (IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1320                 (IsDisplaySwizzle(resourceType, swizzleMode)  == TRUE))
1321             {
1322                 if (pipeAlign)
1323                 {
1324                     metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1325                     metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1326                 }
1327                 else
1328                 {
1329                     metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1330                 }
1331             }
1332             else
1333             {
1334                 if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1335                 {
1336                     numPipesLog2++;
1337                 }
1338 
1339                 INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1340 
1341                 if (numPipesLog2 >= 4)
1342                 {
1343                     INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1344 
1345                     // In 16Bpe 8xaa, we have an extra overlap bit
1346                     if ((pipeRotateLog2 > 0)  &&
1347                         (elemLog2 == 4)       &&
1348                         (numSamplesLog2 == 3) &&
1349                         (IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1350                     {
1351                         overlapLog2++;
1352                     }
1353 
1354                     metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1355                     metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1356 
1357                     if (m_settings.supportRbPlus    &&
1358                         IsRtOptSwizzle(swizzleMode) &&
1359                         (numPipesLog2 == 6)         &&
1360                         (numSamplesLog2 == 3)       &&
1361                         (m_maxCompFragLog2 == 3)    &&
1362                         (metablkSizeLog2 < 15))
1363                     {
1364                         metablkSizeLog2 = 15;
1365                     }
1366                 }
1367                 else
1368                 {
1369                     metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1370                 }
1371 
1372                 if (dataType == Gfx10DataDepthStencil)
1373                 {
1374                     // For htile surfaces, pad meta block size to 2K * num_pipes
1375                     metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1376                 }
1377 
1378                 const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1379 
1380                 if  (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1381                 {
1382                     const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1383 
1384                     metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1385                 }
1386             }
1387 
1388             const INT_32 metablkBitsLog2 =
1389                 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1390             pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1391             pBlock->h = 1 << (metablkBitsLog2 >> 1);
1392             pBlock->d = 1;
1393         }
1394         else
1395         {
1396             ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1397 
1398             if (pipeAlign)
1399             {
1400                 if (m_settings.supportRbPlus         &&
1401                     (m_pipesLog2 == m_numSaLog2 + 1) &&
1402                     (m_pipesLog2 > 1)                &&
1403                     IsRbAligned(resourceType, swizzleMode))
1404                 {
1405                     numPipesLog2++;
1406                 }
1407 
1408                 const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1409 
1410                 metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1411                 metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1412                 metablkSizeLog2 = Max(metablkSizeLog2, 12);
1413             }
1414             else
1415             {
1416                 metablkSizeLog2 = 12;
1417             }
1418 
1419             const INT_32 metablkBitsLog2 =
1420                 metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1421             pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1422             pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1423             pBlock->d = 1 << (metablkBitsLog2 / 3);
1424         }
1425     }
1426 
1427     return (1 << static_cast<UINT_32>(metablkSizeLog2));
1428 }
1429 
1430 /**
1431 ************************************************************************************************************************
1432 *   Gfx10Lib::ConvertSwizzlePatternToEquation
1433 *
1434 *   @brief
1435 *       Convert swizzle pattern to equation.
1436 *
1437 *   @return
1438 *       N/A
1439 ************************************************************************************************************************
1440 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,AddrResourceType rsrcType,AddrSwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const1441 VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1442     UINT_32                elemLog2,  ///< [in] element bytes log2
1443     AddrResourceType       rsrcType,  ///< [in] resource type
1444     AddrSwizzleMode        swMode,    ///< [in] swizzle mode
1445     const ADDR_SW_PATINFO* pPatInfo,  ///< [in] swizzle pattern infor
1446     ADDR_EQUATION*         pEquation) ///< [out] equation converted from swizzle pattern
1447     const
1448 {
1449     // Get full swizzle pattern and store it as an ADDR_BIT_SETTING list
1450     ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT];
1451     GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1452 
1453     const ADDR_BIT_SETTING* pSwizzle      = fullSwizzlePattern;
1454     const UINT_32           blockSizeLog2 = GetBlockSizeLog2(swMode);
1455     memset(pEquation, 0, sizeof(ADDR_EQUATION));
1456     pEquation->numBits            = blockSizeLog2;
1457     pEquation->numBitComponents   = pPatInfo->maxItemCount;
1458     pEquation->stackedDepthSlices = FALSE;
1459 
1460     for (UINT_32 i = 0; i < elemLog2; i++)
1461     {
1462         pEquation->addr[i].channel = 0;
1463         pEquation->addr[i].valid   = 1;
1464         pEquation->addr[i].index   = i;
1465     }
1466 
1467     if (IsXor(swMode) == FALSE)
1468     {
1469         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1470         {
1471             ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1472 
1473             if (pSwizzle[i].x != 0)
1474             {
1475                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1476 
1477                 pEquation->addr[i].channel = 0;
1478                 pEquation->addr[i].valid   = 1;
1479                 pEquation->addr[i].index   = Log2(pSwizzle[i].x) + elemLog2;
1480             }
1481             else if (pSwizzle[i].y != 0)
1482             {
1483                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1484 
1485                 pEquation->addr[i].channel = 1;
1486                 pEquation->addr[i].valid   = 1;
1487                 pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1488             }
1489             else
1490             {
1491                 ADDR_ASSERT(pSwizzle[i].z != 0);
1492                 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1493 
1494                 pEquation->addr[i].channel = 2;
1495                 pEquation->addr[i].valid   = 1;
1496                 pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1497             }
1498 
1499             pEquation->xor1[i].value = 0;
1500             pEquation->xor2[i].value = 0;
1501         }
1502     }
1503     else if (IsThin(rsrcType, swMode))
1504     {
1505         Dim3d dim;
1506         ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1507 
1508         const UINT_32 blkXLog2 = Log2(dim.w);
1509         const UINT_32 blkYLog2 = Log2(dim.h);
1510         const UINT_32 blkXMask = dim.w - 1;
1511         const UINT_32 blkYMask = dim.h - 1;
1512 
1513         ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1514         UINT_32          xMask = 0;
1515         UINT_32          yMask = 0;
1516         UINT_32          bMask = (1 << elemLog2) - 1;
1517 
1518         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1519         {
1520             if (IsPow2(pSwizzle[i].value))
1521             {
1522                 if (pSwizzle[i].x != 0)
1523                 {
1524                     ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1525                     xMask |= pSwizzle[i].x;
1526 
1527                     const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1528 
1529                     ADDR_ASSERT(xLog2 < blkXLog2);
1530 
1531                     pEquation->addr[i].channel = 0;
1532                     pEquation->addr[i].valid   = 1;
1533                     pEquation->addr[i].index   = xLog2 + elemLog2;
1534                 }
1535                 else
1536                 {
1537                     ADDR_ASSERT(pSwizzle[i].y != 0);
1538                     ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1539                     yMask |= pSwizzle[i].y;
1540 
1541                     pEquation->addr[i].channel = 1;
1542                     pEquation->addr[i].valid   = 1;
1543                     pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1544 
1545                     ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1546                 }
1547 
1548                 swizzle[i].value = 0;
1549                 bMask |= 1 << i;
1550             }
1551             else
1552             {
1553                 if (pSwizzle[i].z != 0)
1554                 {
1555                     ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1556 
1557                     pEquation->xor2[i].channel = 2;
1558                     pEquation->xor2[i].valid   = 1;
1559                     pEquation->xor2[i].index   = Log2(pSwizzle[i].z);
1560                 }
1561 
1562                 swizzle[i].x = pSwizzle[i].x;
1563                 swizzle[i].y = pSwizzle[i].y;
1564                 swizzle[i].z = swizzle[i].s = 0;
1565 
1566                 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1567 
1568                 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1569 
1570                 if (xHi != 0)
1571                 {
1572                     ADDR_ASSERT(IsPow2(xHi));
1573                     ADDR_ASSERT(pEquation->xor1[i].value == 0);
1574 
1575                     pEquation->xor1[i].channel = 0;
1576                     pEquation->xor1[i].valid   = 1;
1577                     pEquation->xor1[i].index   = Log2(xHi) + elemLog2;
1578 
1579                     swizzle[i].x &= blkXMask;
1580                 }
1581 
1582                 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1583 
1584                 if (yHi != 0)
1585                 {
1586                     ADDR_ASSERT(IsPow2(yHi));
1587 
1588                     if (xHi == 0)
1589                     {
1590                         ADDR_ASSERT(pEquation->xor1[i].value == 0);
1591                         pEquation->xor1[i].channel = 1;
1592                         pEquation->xor1[i].valid   = 1;
1593                         pEquation->xor1[i].index   = Log2(yHi);
1594                     }
1595                     else
1596                     {
1597                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1598                         pEquation->xor2[i].channel = 1;
1599                         pEquation->xor2[i].valid   = 1;
1600                         pEquation->xor2[i].index   = Log2(yHi);
1601                     }
1602 
1603                     swizzle[i].y &= blkYMask;
1604                 }
1605 
1606                 if (swizzle[i].value == 0)
1607                 {
1608                     bMask |= 1 << i;
1609                 }
1610             }
1611         }
1612 
1613         const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1614         const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;
1615 
1616         ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1617 
1618         while (bMask != blockMask)
1619         {
1620             for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1621             {
1622                 if ((bMask & (1 << i)) == 0)
1623                 {
1624                     if (IsPow2(swizzle[i].value))
1625                     {
1626                         if (swizzle[i].x != 0)
1627                         {
1628                             ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1629                             xMask |= swizzle[i].x;
1630 
1631                             const UINT_32 xLog2 = Log2(swizzle[i].x);
1632 
1633                             ADDR_ASSERT(xLog2 < blkXLog2);
1634 
1635                             pEquation->addr[i].channel = 0;
1636                             pEquation->addr[i].valid   = 1;
1637                             pEquation->addr[i].index   = xLog2 + elemLog2;
1638                         }
1639                         else
1640                         {
1641                             ADDR_ASSERT(swizzle[i].y != 0);
1642                             ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1643                             yMask |= swizzle[i].y;
1644 
1645                             pEquation->addr[i].channel = 1;
1646                             pEquation->addr[i].valid   = 1;
1647                             pEquation->addr[i].index   = Log2(swizzle[i].y);
1648 
1649                             ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1650                         }
1651 
1652                         swizzle[i].value = 0;
1653                         bMask |= 1 << i;
1654                     }
1655                     else
1656                     {
1657                         const UINT_32 x = swizzle[i].x & xMask;
1658                         const UINT_32 y = swizzle[i].y & yMask;
1659 
1660                         if (x != 0)
1661                         {
1662                             ADDR_ASSERT(IsPow2(x));
1663 
1664                             if (pEquation->xor1[i].value == 0)
1665                             {
1666                                 pEquation->xor1[i].channel = 0;
1667                                 pEquation->xor1[i].valid   = 1;
1668                                 pEquation->xor1[i].index   = Log2(x) + elemLog2;
1669                             }
1670                             else
1671                             {
1672                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1673                                 pEquation->xor2[i].channel = 0;
1674                                 pEquation->xor2[i].valid   = 1;
1675                                 pEquation->xor2[i].index   = Log2(x) + elemLog2;
1676                             }
1677                         }
1678 
1679                         if (y != 0)
1680                         {
1681                             ADDR_ASSERT(IsPow2(y));
1682 
1683                             if (pEquation->xor1[i].value == 0)
1684                             {
1685                                 pEquation->xor1[i].channel = 1;
1686                                 pEquation->xor1[i].valid   = 1;
1687                                 pEquation->xor1[i].index   = Log2(y);
1688                             }
1689                             else
1690                             {
1691                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1692                                 pEquation->xor2[i].channel = 1;
1693                                 pEquation->xor2[i].valid   = 1;
1694                                 pEquation->xor2[i].index   = Log2(y);
1695                             }
1696                         }
1697 
1698                         swizzle[i].x &= ~x;
1699                         swizzle[i].y &= ~y;
1700                     }
1701                 }
1702             }
1703         }
1704 
1705         ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1706     }
1707     else
1708     {
1709         const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
1710         const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
1711         const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
1712         const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1713         const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1714         const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1715 
1716         ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1717         UINT_32          xMask = 0;
1718         UINT_32          yMask = 0;
1719         UINT_32          zMask = 0;
1720         UINT_32          bMask = (1 << elemLog2) - 1;
1721 
1722         for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1723         {
1724             if (IsPow2(pSwizzle[i].value))
1725             {
1726                 if (pSwizzle[i].x != 0)
1727                 {
1728                     ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1729                     xMask |= pSwizzle[i].x;
1730 
1731                     const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1732 
1733                     ADDR_ASSERT(xLog2 < blkXLog2);
1734 
1735                     pEquation->addr[i].channel = 0;
1736                     pEquation->addr[i].valid   = 1;
1737                     pEquation->addr[i].index   = xLog2 + elemLog2;
1738                 }
1739                 else if (pSwizzle[i].y != 0)
1740                 {
1741                     ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1742                     yMask |= pSwizzle[i].y;
1743 
1744                     pEquation->addr[i].channel = 1;
1745                     pEquation->addr[i].valid   = 1;
1746                     pEquation->addr[i].index   = Log2(pSwizzle[i].y);
1747 
1748                     ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1749                 }
1750                 else
1751                 {
1752                     ADDR_ASSERT(pSwizzle[i].z != 0);
1753                     ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1754                     zMask |= pSwizzle[i].z;
1755 
1756                     pEquation->addr[i].channel = 2;
1757                     pEquation->addr[i].valid   = 1;
1758                     pEquation->addr[i].index   = Log2(pSwizzle[i].z);
1759 
1760                     ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1761                 }
1762 
1763                 swizzle[i].value = 0;
1764                 bMask |= 1 << i;
1765             }
1766             else
1767             {
1768                 swizzle[i].x = pSwizzle[i].x;
1769                 swizzle[i].y = pSwizzle[i].y;
1770                 swizzle[i].z = pSwizzle[i].z;
1771                 swizzle[i].s = 0;
1772 
1773                 ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1774 
1775                 const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1776                 const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1777                 const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1778 
1779                 ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1780 
1781                 if (xHi != 0)
1782                 {
1783                     ADDR_ASSERT(IsPow2(xHi));
1784                     ADDR_ASSERT(pEquation->xor1[i].value == 0);
1785 
1786                     pEquation->xor1[i].channel = 0;
1787                     pEquation->xor1[i].valid   = 1;
1788                     pEquation->xor1[i].index   = Log2(xHi) + elemLog2;
1789 
1790                     swizzle[i].x &= blkXMask;
1791                 }
1792 
1793                 if (yHi != 0)
1794                 {
1795                     ADDR_ASSERT(IsPow2(yHi));
1796 
1797                     if (pEquation->xor1[i].value == 0)
1798                     {
1799                         pEquation->xor1[i].channel = 1;
1800                         pEquation->xor1[i].valid   = 1;
1801                         pEquation->xor1[i].index   = Log2(yHi);
1802                     }
1803                     else
1804                     {
1805                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1806                         pEquation->xor2[i].channel = 1;
1807                         pEquation->xor2[i].valid   = 1;
1808                         pEquation->xor2[i].index   = Log2(yHi);
1809                     }
1810 
1811                     swizzle[i].y &= blkYMask;
1812                 }
1813 
1814                 if (zHi != 0)
1815                 {
1816                     ADDR_ASSERT(IsPow2(zHi));
1817 
1818                     if (pEquation->xor1[i].value == 0)
1819                     {
1820                         pEquation->xor1[i].channel = 2;
1821                         pEquation->xor1[i].valid   = 1;
1822                         pEquation->xor1[i].index   = Log2(zHi);
1823                     }
1824                     else
1825                     {
1826                         ADDR_ASSERT(pEquation->xor2[i].value == 0);
1827                         pEquation->xor2[i].channel = 2;
1828                         pEquation->xor2[i].valid   = 1;
1829                         pEquation->xor2[i].index   = Log2(zHi);
1830                     }
1831 
1832                     swizzle[i].z &= blkZMask;
1833                 }
1834 
1835                 if (swizzle[i].value == 0)
1836                 {
1837                     bMask |= 1 << i;
1838                 }
1839             }
1840         }
1841 
1842         const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1843         const UINT_32 blockMask   = (1 << blockSizeLog2) - 1;
1844 
1845         ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1846 
1847         while (bMask != blockMask)
1848         {
1849             for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1850             {
1851                 if ((bMask & (1 << i)) == 0)
1852                 {
1853                     if (IsPow2(swizzle[i].value))
1854                     {
1855                         if (swizzle[i].x != 0)
1856                         {
1857                             ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1858                             xMask |= swizzle[i].x;
1859 
1860                             const UINT_32 xLog2 = Log2(swizzle[i].x);
1861 
1862                             ADDR_ASSERT(xLog2 < blkXLog2);
1863 
1864                             pEquation->addr[i].channel = 0;
1865                             pEquation->addr[i].valid   = 1;
1866                             pEquation->addr[i].index   = xLog2 + elemLog2;
1867                         }
1868                         else if (swizzle[i].y != 0)
1869                         {
1870                             ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1871                             yMask |= swizzle[i].y;
1872 
1873                             pEquation->addr[i].channel = 1;
1874                             pEquation->addr[i].valid   = 1;
1875                             pEquation->addr[i].index   = Log2(swizzle[i].y);
1876 
1877                             ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1878                         }
1879                         else
1880                         {
1881                             ADDR_ASSERT(swizzle[i].z != 0);
1882                             ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1883                             zMask |= swizzle[i].z;
1884 
1885                             pEquation->addr[i].channel = 2;
1886                             pEquation->addr[i].valid   = 1;
1887                             pEquation->addr[i].index   = Log2(swizzle[i].z);
1888 
1889                             ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1890                         }
1891 
1892                         swizzle[i].value = 0;
1893                         bMask |= 1 << i;
1894                     }
1895                     else
1896                     {
1897                         const UINT_32 x = swizzle[i].x & xMask;
1898                         const UINT_32 y = swizzle[i].y & yMask;
1899                         const UINT_32 z = swizzle[i].z & zMask;
1900 
1901                         if (x != 0)
1902                         {
1903                             ADDR_ASSERT(IsPow2(x));
1904 
1905                             if (pEquation->xor1[i].value == 0)
1906                             {
1907                                 pEquation->xor1[i].channel = 0;
1908                                 pEquation->xor1[i].valid   = 1;
1909                                 pEquation->xor1[i].index   = Log2(x) + elemLog2;
1910                             }
1911                             else
1912                             {
1913                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1914                                 pEquation->xor2[i].channel = 0;
1915                                 pEquation->xor2[i].valid   = 1;
1916                                 pEquation->xor2[i].index   = Log2(x) + elemLog2;
1917                             }
1918                         }
1919 
1920                         if (y != 0)
1921                         {
1922                             ADDR_ASSERT(IsPow2(y));
1923 
1924                             if (pEquation->xor1[i].value == 0)
1925                             {
1926                                 pEquation->xor1[i].channel = 1;
1927                                 pEquation->xor1[i].valid   = 1;
1928                                 pEquation->xor1[i].index   = Log2(y);
1929                             }
1930                             else
1931                             {
1932                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1933                                 pEquation->xor2[i].channel = 1;
1934                                 pEquation->xor2[i].valid   = 1;
1935                                 pEquation->xor2[i].index   = Log2(y);
1936                             }
1937                         }
1938 
1939                         if (z != 0)
1940                         {
1941                             ADDR_ASSERT(IsPow2(z));
1942 
1943                             if (pEquation->xor1[i].value == 0)
1944                             {
1945                                 pEquation->xor1[i].channel = 2;
1946                                 pEquation->xor1[i].valid   = 1;
1947                                 pEquation->xor1[i].index   = Log2(z);
1948                             }
1949                             else
1950                             {
1951                                 ADDR_ASSERT(pEquation->xor2[i].value == 0);
1952                                 pEquation->xor2[i].channel = 2;
1953                                 pEquation->xor2[i].valid   = 1;
1954                                 pEquation->xor2[i].index   = Log2(z);
1955                             }
1956                         }
1957 
1958                         swizzle[i].x &= ~x;
1959                         swizzle[i].y &= ~y;
1960                         swizzle[i].z &= ~z;
1961                     }
1962                 }
1963             }
1964         }
1965 
1966         ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1967     }
1968 }
1969 
1970 /**
1971 ************************************************************************************************************************
1972 *   Gfx10Lib::InitEquationTable
1973 *
1974 *   @brief
1975 *       Initialize Equation table.
1976 *
1977 *   @return
1978 *       N/A
1979 ************************************************************************************************************************
1980 */
InitEquationTable()1981 VOID Gfx10Lib::InitEquationTable()
1982 {
1983     memset(m_equationTable, 0, sizeof(m_equationTable));
1984 
1985     // Iterate through resourceTypes, up to MaxRsrcType where a "resourceType" refers to AddrResourceType (1D/2D/3D)
1986     // resources. This starts with rsrcTypeIdx = 0, however there is an offset added that will start us off at
1987     // computing 2D resources.
1988     for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1989     {
1990         // Add offset. Start iterating from ADDR_RSRC_TEX_2D
1991         const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1992 
1993         // Iterate through the maximum number of swizzlemodes a type can hold
1994         for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1995         {
1996             const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1997 
1998             // Iterate through the different bits-per-pixel settings (8bpp/16bpp/32bpp/64bpp/128bpp)
1999             for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
2000             {
2001                 UINT_32                equationIndex = ADDR_INVALID_EQUATION_INDEX;
2002                 // May or may not return a ADDR_SW_PATINFO for a completely different swizzle mode, essentially
2003                 // overwriting the choice.
2004                 const ADDR_SW_PATINFO* pPatInfo      = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
2005 
2006                 if (pPatInfo != NULL)
2007                 {
2008                     ADDR_ASSERT(IsValidSwMode(swMode));
2009                     if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex
2010                     {
2011                         ADDR_EQUATION equation = {};
2012 
2013                         // Passing in pPatInfo to get the addr equation
2014                         ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
2015 
2016                         equationIndex = m_numEquations;
2017                         ADDR_ASSERT(equationIndex < EquationTableSize);
2018                         // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo
2019                         m_equationTable[equationIndex] = equation;
2020                         // Increment m_numEquations
2021                         m_numEquations++;
2022                     }
2023                     else // There is no equationIndex
2024                     {
2025                         // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
2026                         ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
2027                         ADDR_ASSERT(rsrcTypeIdx == 1);
2028                         ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
2029                         ADDR_ASSERT(m_settings.supportRbPlus == 1);
2030                     }
2031                 }
2032                 // equationIndex, which is used to look up equations in m_equationTable, will be cached for every
2033                 // iteration in this nested for-loop
2034                 m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
2035             }
2036         }
2037     }
2038 }
2039 
2040 /**
2041 ************************************************************************************************************************
2042 *   Gfx10Lib::HwlGetEquationIndex
2043 *
2044 *   @brief
2045 *       Interface function stub of GetEquationIndex
2046 *
2047 *   @return
2048 *       ADDR_E_RETURNCODE
2049 ************************************************************************************************************************
2050 */
HwlGetEquationIndex(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const2051 UINT_32 Gfx10Lib::HwlGetEquationIndex(
2052     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
2053     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
2054     ) const
2055 {
2056     UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
2057 
2058     if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
2059         (pIn->resourceType == ADDR_RSRC_TEX_3D))
2060     {
2061         const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
2062         const UINT_32 swModeIdx   = static_cast<UINT_32>(pIn->swizzleMode);
2063         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
2064 
2065         equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
2066     }
2067 
2068     if (pOut->pMipInfo != NULL)
2069     {
2070         for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2071         {
2072             pOut->pMipInfo[i].equationIndex = equationIdx;
2073         }
2074     }
2075 
2076     return equationIdx;
2077 }
2078 
2079 /**
2080 ************************************************************************************************************************
2081 *   Gfx10Lib::GetValidDisplaySwizzleModes
2082 *
2083 *   @brief
2084 *       Get valid swizzle modes mask for displayable surface
2085 *
2086 *   @return
2087 *       Valid swizzle modes mask for displayable surface
2088 ************************************************************************************************************************
2089 */
GetValidDisplaySwizzleModes(UINT_32 bpp) const2090 UINT_32 Gfx10Lib::GetValidDisplaySwizzleModes(
2091     UINT_32 bpp
2092     ) const
2093 {
2094     UINT_32 swModeMask = 0;
2095 
2096     if (bpp <= 64)
2097     {
2098         if (m_settings.isDcn20)
2099         {
2100             swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask;
2101         }
2102         else
2103         {
2104             swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask;
2105         }
2106     }
2107 
2108     return swModeMask;
2109 }
2110 
2111 /**
2112 ************************************************************************************************************************
2113 *   Gfx10Lib::IsValidDisplaySwizzleMode
2114 *
2115 *   @brief
2116 *       Check if a swizzle mode is supported by display engine
2117 *
2118 *   @return
2119 *       TRUE is swizzle mode is supported by display engine
2120 ************************************************************************************************************************
2121 */
IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2122 BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
2123     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
2124     ) const
2125 {
2126     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
2127 
2128     return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
2129 }
2130 
2131 /**
2132 ************************************************************************************************************************
2133 *   Gfx10Lib::GetMaxNumMipsInTail
2134 *
2135 *   @brief
2136 *       Return max number of mips in tails
2137 *
2138 *   @return
2139 *       Max number of mips in tails
2140 ************************************************************************************************************************
2141 */
GetMaxNumMipsInTail(UINT_32 blockSizeLog2,BOOL_32 isThin) const2142 UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
2143     UINT_32 blockSizeLog2,     ///< block size log2
2144     BOOL_32 isThin             ///< is thin or thick
2145     ) const
2146 {
2147     UINT_32 effectiveLog2 = blockSizeLog2;
2148 
2149     if (isThin == FALSE)
2150     {
2151         effectiveLog2 -= (blockSizeLog2 - 8) / 3;
2152     }
2153 
2154     return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
2155 }
2156 
2157 /**
2158 ************************************************************************************************************************
2159 *   Gfx10Lib::HwlComputePipeBankXor
2160 *
2161 *   @brief
2162 *       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2163 *
2164 *   @return
2165 *       PipeBankXor value
2166 ************************************************************************************************************************
2167 */
HwlComputePipeBankXor(const ADDR2_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const2168 ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
2169     const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,     ///< [in] input structure
2170     ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut     ///< [out] output structure
2171     ) const
2172 {
2173     if (IsNonPrtXor(pIn->swizzleMode))
2174     {
2175         const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode));
2176 
2177         // No pipe xor...
2178         const UINT_32 pipeXor = 0;
2179         UINT_32       bankXor = 0;
2180 
2181         const UINT_32         XorPatternLen = 8;
2182         static const UINT_32  XorBankRot1b[XorPatternLen] = {0,  1,  0,  1,  0,  1,  0,  1};
2183         static const UINT_32  XorBankRot2b[XorPatternLen] = {0,  2,  1,  3,  2,  0,  3,  1};
2184         static const UINT_32  XorBankRot3b[XorPatternLen] = {0,  4,  2,  6,  1,  5,  3,  7};
2185         static const UINT_32  XorBankRot4b[XorPatternLen] = {0,  8,  4, 12,  2, 10,  6, 14};
2186         static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b};
2187 
2188         switch (bankBits)
2189         {
2190             case 1:
2191             case 2:
2192             case 3:
2193             case 4:
2194                 bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits);
2195                 break;
2196             default:
2197                 // valid bank bits should be 0~4
2198                 ADDR_ASSERT_ALWAYS();
2199             case 0:
2200                 break;
2201         }
2202 
2203         pOut->pipeBankXor = bankXor | pipeXor;
2204     }
2205     else
2206     {
2207         pOut->pipeBankXor = 0;
2208     }
2209 
2210     return ADDR_OK;
2211 }
2212 
2213 /**
2214 ************************************************************************************************************************
2215 *   Gfx10Lib::HwlComputeSlicePipeBankXor
2216 *
2217 *   @brief
2218 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2219 *
2220 *   @return
2221 *       PipeBankXor value
2222 ************************************************************************************************************************
2223 */
HwlComputeSlicePipeBankXor(const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const2224 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
2225     const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,   ///< [in] input structure
2226     ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut   ///< [out] output structure
2227     ) const
2228 {
2229     if (IsNonPrtXor(pIn->swizzleMode))
2230     {
2231         const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2232         const UINT_32 pipeBits  = GetPipeXorBits(blockBits);
2233         const UINT_32 pipeXor   = ReverseBitVector(pIn->slice, pipeBits);
2234 
2235         pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
2236 
2237         if (pIn->bpe != 0)
2238         {
2239             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
2240                                                                     pIn->resourceType,
2241                                                                     Log2(pIn->bpe >> 3),
2242                                                                     1);
2243 
2244             if (pPatInfo != NULL)
2245             {
2246                 ADDR_BIT_SETTING fullSwizzlePattern[20];
2247                 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
2248 
2249                 const UINT_32 pipeBankXorOffset =
2250                     ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
2251                                                     blockBits,
2252                                                     0,
2253                                                     0,
2254                                                     pIn->slice,
2255                                                     0);
2256 
2257                 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
2258 
2259                 // Should have no bit set under pipe interleave
2260                 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
2261 
2262                 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
2263             }
2264         }
2265     }
2266     else
2267     {
2268         pOut->pipeBankXor = 0;
2269     }
2270 
2271     return ADDR_OK;
2272 }
2273 
2274 /**
2275 ************************************************************************************************************************
2276 *   Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2277 *
2278 *   @brief
2279 *       Compute sub resource offset to support swizzle pattern
2280 *
2281 *   @return
2282 *       Offset
2283 ************************************************************************************************************************
2284 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const2285 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2286     const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,    ///< [in] input structure
2287     ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut    ///< [out] output structure
2288     ) const
2289 {
2290     ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2291 
2292     pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
2293 
2294     return ADDR_OK;
2295 }
2296 
2297 /**
2298 ************************************************************************************************************************
2299 *   Gfx10Lib::HwlComputeNonBlockCompressedView
2300 *
2301 *   @brief
2302 *       Compute non-block-compressed view for a given mipmap level/slice.
2303 *
2304 *   @return
2305 *       ADDR_E_RETURNCODE
2306 ************************************************************************************************************************
2307 */
HwlComputeNonBlockCompressedView(const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const2308 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView(
2309     const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,    ///< [in] input structure
2310     ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut    ///< [out] output structure
2311     ) const
2312 {
2313     ADDR_E_RETURNCODE returnCode = ADDR_OK;
2314 
2315     if (IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE)
2316     {
2317         // Only thin swizzle mode can have a NonBC view...
2318         returnCode = ADDR_INVALIDPARAMS;
2319     }
2320     else if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) &&
2321              ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
2322     {
2323         // Only support BC1~BC7, ASTC, or ETC2 for now...
2324         returnCode = ADDR_NOTSUPPORTED;
2325     }
2326     else
2327     {
2328         UINT_32 bcWidth, bcHeight;
2329         UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
2330 
2331         ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
2332         infoIn.flags        = pIn->flags;
2333         infoIn.swizzleMode  = pIn->swizzleMode;
2334         infoIn.resourceType = pIn->resourceType;
2335         infoIn.bpp          = bpp;
2336         infoIn.width        = RoundUpQuotient(pIn->width, bcWidth);
2337         infoIn.height       = RoundUpQuotient(pIn->height, bcHeight);
2338         infoIn.numSlices    = pIn->numSlices;
2339         infoIn.numMipLevels = pIn->numMipLevels;
2340         infoIn.numSamples   = 1;
2341         infoIn.numFrags     = 1;
2342 
2343         ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};
2344         ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
2345 
2346         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
2347         infoOut.pMipInfo = mipInfo;
2348 
2349         const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;
2350 
2351         if (tiled)
2352         {
2353             returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
2354         }
2355         else
2356         {
2357             returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
2358         }
2359 
2360         if (returnCode == ADDR_OK)
2361         {
2362             ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
2363             subOffIn.swizzleMode      = infoIn.swizzleMode;
2364             subOffIn.resourceType     = infoIn.resourceType;
2365             subOffIn.slice            = pIn->slice;
2366             subOffIn.sliceSize        = infoOut.sliceSize;
2367             subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
2368             subOffIn.mipTailOffset    = mipInfo[pIn->mipId].mipTailOffset;
2369 
2370             ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
2371 
2372             // For any mipmap level, move nonBc view base address by offset
2373             HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
2374             pOut->offset = subOffOut.offset;
2375 
2376             ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
2377             slicePbXorIn.bpe             = infoIn.bpp;
2378             slicePbXorIn.swizzleMode     = infoIn.swizzleMode;
2379             slicePbXorIn.resourceType    = infoIn.resourceType;
2380             slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
2381             slicePbXorIn.slice           = pIn->slice;
2382 
2383             ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
2384 
2385             // For any mipmap level, nonBc view should use computed pbXor
2386             HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
2387             pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
2388 
2389             const BOOL_32 inTail           = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
2390             const UINT_32 requestMipWidth  = RoundUpQuotient(Max(pIn->width >> pIn->mipId, 1u), bcWidth);
2391             const UINT_32 requestMipHeight = RoundUpQuotient(Max(pIn->height >> pIn->mipId, 1u), bcHeight);
2392 
2393             if (inTail)
2394             {
2395                 // For mipmap level that is in mip tail block, hack a lot of things...
2396                 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
2397                 // are fit in tail block:
2398 
2399                 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
2400                 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
2401 
2402                 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
2403                 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
2404 
2405                 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
2406                 pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);
2407 
2408                 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
2409                 pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
2410             }
2411             // This check should cover at least mipId == 0
2412             else if (requestMipWidth << pIn->mipId == infoIn.width)
2413             {
2414                 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
2415                 // - only one mipmap level and mipId = 0
2416                 pOut->mipId        = 0;
2417                 pOut->numMipLevels = 1;
2418 
2419                 // (mip0) width = requestMipWidth
2420                 pOut->unalignedWidth = requestMipWidth;
2421 
2422                 // (mip0) height = requestMipHeight
2423                 pOut->unalignedHeight = requestMipHeight;
2424             }
2425             else
2426             {
2427                 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
2428                 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
2429                 // because single mip view may have different pitch value than original (multiple) mip view...
2430                 // A simple case would be:
2431                 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
2432                 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
2433                 //   mip0 width = 0x101/mip1 width = 0x80
2434                 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
2435                 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
2436 
2437                 // - 2 levels and mipId = 1
2438                 pOut->mipId        = 1;
2439                 pOut->numMipLevels = 2;
2440 
2441                 const UINT_32 upperMipWidth  = RoundUpQuotient(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth);
2442                 const UINT_32 upperMipHeight = RoundUpQuotient(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight);
2443 
2444                 const BOOL_32 needToAvoidInTail =
2445                     tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
2446                     TRUE : FALSE;
2447 
2448                 const UINT_32 hwMipWidth  = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
2449                 const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);
2450 
2451                 const BOOL_32 needExtraWidth =
2452                     ((upperMipWidth < requestMipWidth * 2) ||
2453                      ((upperMipWidth == requestMipWidth * 2) &&
2454                       ((needToAvoidInTail == TRUE) ||
2455                        (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;
2456 
2457                 const BOOL_32 needExtraHeight =
2458                     ((upperMipHeight < requestMipHeight * 2) ||
2459                      ((upperMipHeight == requestMipHeight * 2) &&
2460                       ((needToAvoidInTail == TRUE) ||
2461                        (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;
2462 
2463                 // (mip0) width = requestLastMipLevelWidth
2464                 pOut->unalignedWidth  = upperMipWidth + (needExtraWidth ? 1: 0);
2465 
2466                 // (mip0) height = requestLastMipLevelHeight
2467                 pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
2468             }
2469 
2470             // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
2471             ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
2472             // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
2473             ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
2474         }
2475     }
2476 
2477     return returnCode;
2478 }
2479 
2480 /**
2481 ************************************************************************************************************************
2482 *   Gfx10Lib::ValidateNonSwModeParams
2483 *
2484 *   @brief
2485 *       Validate compute surface info params except swizzle mode
2486 *
2487 *   @return
2488 *       TRUE if parameters are valid, FALSE otherwise
2489 ************************************************************************************************************************
2490 */
ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2491 BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2492     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2493 {
2494     BOOL_32 valid = TRUE;
2495 
2496     if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2497     {
2498         ADDR_ASSERT_ALWAYS();
2499         valid = FALSE;
2500     }
2501 
2502     if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2503     {
2504         ADDR_ASSERT_ALWAYS();
2505         valid = FALSE;
2506     }
2507 
2508     const ADDR2_SURFACE_FLAGS flags    = pIn->flags;
2509     const AddrResourceType    rsrcType = pIn->resourceType;
2510     const BOOL_32             mipmap   = (pIn->numMipLevels > 1);
2511     const BOOL_32             msaa     = (pIn->numFrags > 1);
2512     const BOOL_32             display  = flags.display;
2513     const BOOL_32             tex3d    = IsTex3d(rsrcType);
2514     const BOOL_32             tex2d    = IsTex2d(rsrcType);
2515     const BOOL_32             tex1d    = IsTex1d(rsrcType);
2516     const BOOL_32             stereo   = flags.qbStereo;
2517 
2518     // Resource type check
2519     if (tex1d)
2520     {
2521         if (msaa || display || stereo)
2522         {
2523             ADDR_ASSERT_ALWAYS();
2524             valid = FALSE;
2525         }
2526     }
2527     else if (tex2d)
2528     {
2529         if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2530         {
2531             ADDR_ASSERT_ALWAYS();
2532             valid = FALSE;
2533         }
2534     }
2535     else if (tex3d)
2536     {
2537         if (msaa || display || stereo)
2538         {
2539             ADDR_ASSERT_ALWAYS();
2540             valid = FALSE;
2541         }
2542     }
2543     else
2544     {
2545         ADDR_ASSERT_ALWAYS();
2546         valid = FALSE;
2547     }
2548 
2549     return valid;
2550 }
2551 
2552 /**
2553 ************************************************************************************************************************
2554 *   Gfx10Lib::ValidateSwModeParams
2555 *
2556 *   @brief
2557 *       Validate compute surface info related to swizzle mode
2558 *
2559 *   @return
2560 *       TRUE if parameters are valid, FALSE otherwise
2561 ************************************************************************************************************************
2562 */
ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2563 BOOL_32 Gfx10Lib::ValidateSwModeParams(
2564     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2565 {
2566     BOOL_32 valid = TRUE;
2567 
2568     if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2569     {
2570         ADDR_ASSERT_ALWAYS();
2571         valid = FALSE;
2572     }
2573     else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2574     {
2575         {
2576             ADDR_ASSERT_ALWAYS();
2577             valid = FALSE;
2578         }
2579     }
2580 
2581     const ADDR2_SURFACE_FLAGS flags       = pIn->flags;
2582     const AddrResourceType    rsrcType    = pIn->resourceType;
2583     const AddrSwizzleMode     swizzle     = pIn->swizzleMode;
2584     const BOOL_32             msaa        = (pIn->numFrags > 1);
2585     const BOOL_32             zbuffer     = flags.depth || flags.stencil;
2586     const BOOL_32             color       = flags.color;
2587     const BOOL_32             display     = flags.display;
2588     const BOOL_32             tex3d       = IsTex3d(rsrcType);
2589     const BOOL_32             tex2d       = IsTex2d(rsrcType);
2590     const BOOL_32             tex1d       = IsTex1d(rsrcType);
2591     const BOOL_32             thin3d      = flags.view3dAs2dArray;
2592     const BOOL_32             linear      = IsLinear(swizzle);
2593     const BOOL_32             blk256B     = IsBlock256b(swizzle);
2594     const BOOL_32             blkVar      = IsBlockVariable(swizzle);
2595     const BOOL_32             isNonPrtXor = IsNonPrtXor(swizzle);
2596     const BOOL_32             prt         = flags.prt;
2597     const BOOL_32             fmask       = flags.fmask;
2598 
2599     // Misc check
2600     if ((pIn->numFrags > 1) &&
2601         (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2602     {
2603         // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2604         ADDR_ASSERT_ALWAYS();
2605         valid = FALSE;
2606     }
2607 
2608     if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2609     {
2610         ADDR_ASSERT_ALWAYS();
2611         valid = FALSE;
2612     }
2613 
2614     if ((pIn->bpp == 96) && (linear == FALSE))
2615     {
2616         ADDR_ASSERT_ALWAYS();
2617         valid = FALSE;
2618     }
2619 
2620     const UINT_32 swizzleMask = 1 << swizzle;
2621 
2622     // Resource type check
2623     if (tex1d)
2624     {
2625         if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2626         {
2627             ADDR_ASSERT_ALWAYS();
2628             valid = FALSE;
2629         }
2630     }
2631     else if (tex2d)
2632     {
2633         if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0)
2634         {
2635             {
2636                 ADDR_ASSERT_ALWAYS();
2637                 valid = FALSE;
2638             }
2639         }
2640         else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
2641                  (fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
2642         {
2643             ADDR_ASSERT_ALWAYS();
2644             valid = FALSE;
2645         }
2646     }
2647     else if (tex3d)
2648     {
2649         if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2650             (prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2651             (thin3d && ((swizzleMask & Gfx10Rsrc3dViewAs2dSwModeMask) == 0)))
2652         {
2653             ADDR_ASSERT_ALWAYS();
2654             valid = FALSE;
2655         }
2656     }
2657 
2658     // Swizzle type check
2659     if (linear)
2660     {
2661         if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2662         {
2663             ADDR_ASSERT_ALWAYS();
2664             valid = FALSE;
2665         }
2666     }
2667     else if (IsZOrderSwizzle(swizzle))
2668     {
2669         if ((pIn->bpp > 64)                         ||
2670             (msaa && (color || (pIn->bpp > 32)))    ||
2671             ElemLib::IsBlockCompressed(pIn->format) ||
2672             ElemLib::IsMacroPixelPacked(pIn->format))
2673         {
2674             ADDR_ASSERT_ALWAYS();
2675             valid = FALSE;
2676         }
2677     }
2678     else if (IsStandardSwizzle(rsrcType, swizzle))
2679     {
2680         if (zbuffer || msaa)
2681         {
2682             ADDR_ASSERT_ALWAYS();
2683             valid = FALSE;
2684         }
2685     }
2686     else if (IsDisplaySwizzle(rsrcType, swizzle))
2687     {
2688         if (zbuffer || msaa)
2689         {
2690             ADDR_ASSERT_ALWAYS();
2691             valid = FALSE;
2692         }
2693     }
2694     else if (IsRtOptSwizzle(swizzle))
2695     {
2696         if (zbuffer)
2697         {
2698             ADDR_ASSERT_ALWAYS();
2699             valid = FALSE;
2700         }
2701     }
2702     else
2703     {
2704         {
2705             ADDR_ASSERT_ALWAYS();
2706             valid = FALSE;
2707         }
2708     }
2709 
2710     // Block type check
2711     if (blk256B)
2712     {
2713         if (zbuffer || tex3d || msaa)
2714         {
2715             ADDR_ASSERT_ALWAYS();
2716             valid = FALSE;
2717         }
2718     }
2719     else if (blkVar)
2720     {
2721         if (m_blockVarSizeLog2 == 0)
2722         {
2723             ADDR_ASSERT_ALWAYS();
2724             valid = FALSE;
2725         }
2726     }
2727 
2728     return valid;
2729 }
2730 
2731 /**
2732 ************************************************************************************************************************
2733 *   Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2734 *
2735 *   @brief
2736 *       Compute surface info sanity check
2737 *
2738 *   @return
2739 *       Offset
2740 ************************************************************************************************************************
2741 */
HwlComputeSurfaceInfoSanityCheck(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn) const2742 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2743     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn     ///< [in] input structure
2744     ) const
2745 {
2746     return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2747 }
2748 
2749 /**
2750 ************************************************************************************************************************
2751 *   Gfx10Lib::HwlGetPreferredSurfaceSetting
2752 *
2753 *   @brief
2754 *       Internal function to get suggested surface information for client to use
2755 *
2756 *   @return
2757 *       ADDR_E_RETURNCODE
2758 ************************************************************************************************************************
2759 */
HwlGetPreferredSurfaceSetting(const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT * pIn,ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT * pOut) const2760 ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2761     const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input structure
2762     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut  ///< [out] output structure
2763     ) const
2764 {
2765     ADDR_E_RETURNCODE returnCode = ADDR_OK;
2766 
2767     if (pIn->flags.fmask)
2768     {
2769         const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
2770         const BOOL_32 forbidVarBlockType  = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));
2771 
2772         if (forbid64KbBlockType && forbidVarBlockType)
2773         {
2774             // Invalid combination...
2775             ADDR_ASSERT_ALWAYS();
2776             returnCode = ADDR_INVALIDPARAMS;
2777         }
2778         else
2779         {
2780             pOut->resourceType                   = ADDR_RSRC_TEX_2D;
2781             pOut->validBlockSet.value            = 0;
2782             pOut->validBlockSet.macroThin64KB    = forbid64KbBlockType ? 0 : 1;
2783             pOut->validBlockSet.var              = forbidVarBlockType  ? 0 : 1;
2784             pOut->validSwModeSet.value           = 0;
2785             pOut->validSwModeSet.sw64KB_Z_X      = forbid64KbBlockType ? 0 : 1;
2786             pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType  ? 0 : 1;
2787             pOut->canXor                         = TRUE;
2788             pOut->validSwTypeSet.value           = AddrSwSetZ;
2789             pOut->clientPreferredSwSet           = pOut->validSwTypeSet;
2790 
2791             BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);
2792 
2793             if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
2794             {
2795                 const UINT_8  maxFmaskSwizzleModeType = 2;
2796                 const UINT_32 ratioLow                = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2797                 const UINT_32 ratioHi                 = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2798                 const UINT_32 fmaskBpp                = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
2799                 const UINT_32 numSlices               = Max(pIn->numSlices, 1u);
2800                 const UINT_32 width                   = Max(pIn->width, 1u);
2801                 const UINT_32 height                  = Max(pIn->height, 1u);
2802                 const UINT_64 sizeAlignInElement      = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);
2803 
2804                 AddrSwizzleMode swMode[maxFmaskSwizzleModeType]  = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
2805                 Dim3d           blkDim[maxFmaskSwizzleModeType]  = {{}, {}};
2806                 Dim3d           padDim[maxFmaskSwizzleModeType]  = {{}, {}};
2807                 UINT_64         padSize[maxFmaskSwizzleModeType] = {};
2808 
2809                 for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
2810                 {
2811                     ComputeBlockDimensionForSurf(&blkDim[i].w,
2812                                                  &blkDim[i].h,
2813                                                  &blkDim[i].d,
2814                                                  fmaskBpp,
2815                                                  1,
2816                                                  pOut->resourceType,
2817                                                  swMode[i]);
2818 
2819                     padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2820                     padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2821                 }
2822 
2823                 if (Addr2BlockTypeWithinMemoryBudget(padSize[0],
2824                                                 padSize[1],
2825                                                 ratioLow,
2826                                                 ratioHi,
2827                                                 pIn->memoryBudget,
2828                                                 GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0])))
2829                 {
2830                     use64KbBlockType = FALSE;
2831                 }
2832             }
2833             else if (forbidVarBlockType)
2834             {
2835                 use64KbBlockType = TRUE;
2836             }
2837 
2838             if (use64KbBlockType)
2839             {
2840                 pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2841             }
2842             else
2843             {
2844                 pOut->swizzleMode = ADDR_SW_VAR_Z_X;
2845             }
2846         }
2847     }
2848     else
2849     {
2850         UINT_32 bpp    = pIn->bpp;
2851         UINT_32 width  = Max(pIn->width, 1u);
2852         UINT_32 height = Max(pIn->height, 1u);
2853 
2854         // Set format to INVALID will skip this conversion
2855         if (pIn->format != ADDR_FMT_INVALID)
2856         {
2857             ElemMode elemMode = ADDR_UNCOMPRESSED;
2858             UINT_32 expandX, expandY;
2859 
2860             // Get compression/expansion factors and element mode which indicates compression/expansion
2861             bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2862                                                 &elemMode,
2863                                                 &expandX,
2864                                                 &expandY);
2865 
2866             UINT_32 basePitch = 0;
2867             GetElemLib()->AdjustSurfaceInfo(elemMode,
2868                                             expandX,
2869                                             expandY,
2870                                             &bpp,
2871                                             &basePitch,
2872                                             &width,
2873                                             &height);
2874         }
2875 
2876         const UINT_32 numSlices    = Max(pIn->numSlices,    1u);
2877         const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2878         const UINT_32 numSamples   = Max(pIn->numSamples,   1u);
2879         const UINT_32 numFrags     = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2880         const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
2881 
2882         // Pre sanity check on non swizzle mode parameters
2883         ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2884         localIn.flags        = pIn->flags;
2885         localIn.resourceType = pIn->resourceType;
2886         localIn.format       = pIn->format;
2887         localIn.bpp          = bpp;
2888         localIn.width        = width;
2889         localIn.height       = height;
2890         localIn.numSlices    = numSlices;
2891         localIn.numMipLevels = numMipLevels;
2892         localIn.numSamples   = numSamples;
2893         localIn.numFrags     = numFrags;
2894 
2895         if (ValidateNonSwModeParams(&localIn))
2896         {
2897             // Forbid swizzle mode(s) by client setting
2898             ADDR2_SWMODE_SET allowedSwModeSet = {};
2899             allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2900             allowedSwModeSet.value |= pIn->forbiddenBlock.micro  ? 0 : Gfx10Blk256BSwModeMask;
2901             allowedSwModeSet.value |=
2902                 pIn->forbiddenBlock.macroThin4KB ? 0 :
2903                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
2904             allowedSwModeSet.value |=
2905                 pIn->forbiddenBlock.macroThick4KB ? 0 :
2906                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
2907             allowedSwModeSet.value |=
2908                 pIn->forbiddenBlock.macroThin64KB ? 0 :
2909                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
2910             allowedSwModeSet.value |=
2911                 pIn->forbiddenBlock.macroThick64KB ? 0 :
2912                 ((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
2913             allowedSwModeSet.value |=
2914                 pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
2915 
2916             if (pIn->preferredSwSet.value != 0)
2917             {
2918                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2919                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2920                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2921                 allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2922             }
2923 
2924             if (pIn->noXor)
2925             {
2926                 allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2927             }
2928 
2929             if (pIn->maxAlign > 0)
2930             {
2931                 if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
2932                 {
2933                     allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2934                 }
2935 
2936                 if (pIn->maxAlign < Size64K)
2937                 {
2938                     allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2939                 }
2940 
2941                 if (pIn->maxAlign < Size4K)
2942                 {
2943                     allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2944                 }
2945 
2946                 if (pIn->maxAlign < Size256)
2947                 {
2948                     allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2949                 }
2950             }
2951 
2952             // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2953             switch (pIn->resourceType)
2954             {
2955                 case ADDR_RSRC_TEX_1D:
2956                     allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2957                     break;
2958 
2959                 case ADDR_RSRC_TEX_2D:
2960                     allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2961                     break;
2962 
2963                 case ADDR_RSRC_TEX_3D:
2964                     allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2965 
2966                     if (pIn->flags.view3dAs2dArray)
2967                     {
2968                         // SW_LINEAR can be used for 3D thin images, including BCn image format.
2969                         allowedSwModeSet.value &= Gfx10Rsrc3dViewAs2dSwModeMask;
2970                     }
2971                     break;
2972 
2973                 default:
2974                     ADDR_ASSERT_ALWAYS();
2975                     allowedSwModeSet.value = 0;
2976                     break;
2977             }
2978 
2979             if (ElemLib::IsBlockCompressed(pIn->format)  ||
2980                 ElemLib::IsMacroPixelPacked(pIn->format) ||
2981                 (bpp > 64)                               ||
2982                 (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2983             {
2984                 allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2985             }
2986 
2987             if (pIn->format == ADDR_FMT_32_32_32)
2988             {
2989                 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2990             }
2991 
2992             if (msaa)
2993             {
2994                 allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
2995             }
2996 
2997             if (pIn->flags.depth || pIn->flags.stencil)
2998             {
2999                 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3000             }
3001 
3002             if (pIn->flags.display)
3003             {
3004                 allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
3005             }
3006 
3007             if (allowedSwModeSet.value != 0)
3008             {
3009 #if DEBUG
3010                 // Post sanity check, at least AddrLib should accept the output generated by its own
3011                 UINT_32 validateSwModeSet = allowedSwModeSet.value;
3012 
3013                 for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3014                 {
3015                     if (validateSwModeSet & 1)
3016                     {
3017                         localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3018                         ADDR_ASSERT(ValidateSwModeParams(&localIn));
3019                     }
3020 
3021                     validateSwModeSet >>= 1;
3022                 }
3023 #endif
3024 
3025                 pOut->resourceType   = pIn->resourceType;
3026                 pOut->validSwModeSet = allowedSwModeSet;
3027                 pOut->canXor         = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
3028                 pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3029                 pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3030 
3031                 pOut->clientPreferredSwSet = pIn->preferredSwSet;
3032 
3033                 if (pOut->clientPreferredSwSet.value == 0)
3034                 {
3035                     pOut->clientPreferredSwSet.value = AddrSwSetAll;
3036                 }
3037 
3038                 // Apply optional restrictions
3039                 if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
3040                 {
3041                     if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
3042                     {
3043                         // MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
3044                         // the GL2 in VAR mode, so it should be avoided.
3045                         allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
3046                     }
3047                     else
3048                     {
3049                         // We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
3050                         // But we have to suffer from low performance because there is no other choice...
3051                         ADDR_ASSERT_ALWAYS();
3052                     }
3053                 }
3054 
3055                 if (pIn->flags.needEquation)
3056                 {
3057                     UINT_32 components = pIn->flags.allowExtEquation ?  ADDR_MAX_EQUATION_COMP :
3058                                                                         ADDR_MAX_LEGACY_EQUATION_COMP;
3059                     FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components);
3060                 }
3061 
3062                 if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
3063                 {
3064                     pOut->swizzleMode = ADDR_SW_LINEAR;
3065                 }
3066                 else
3067                 {
3068                     const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
3069 
3070                     if ((height > 1) && (computeMinSize == FALSE))
3071                     {
3072                         // Always ignore linear swizzle mode if:
3073                         // 1. This is a (2D/3D) resource with height > 1
3074                         // 2. Client doesn't require computing minimize size
3075                         allowedSwModeSet.swLinear = 0;
3076                     }
3077 
3078                     // A bitfield where each bit represents a block type. Each swizzle mode maps to a block.
3079                     ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3080 
3081                     // Determine block size if there are 2 or more block type candidates
3082                     if (IsPow2(allowedBlockSet.value) == FALSE)
3083                     {
3084                         // Tracks a valid SwizzleMode for each valid block type
3085                         AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
3086 
3087                         swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
3088 
3089                         if (m_blockVarSizeLog2 != 0)
3090                         {
3091                             swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X;
3092                         }
3093 
3094                         if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3095                         {
3096                             swMode[AddrBlockThick4KB]  = ADDR_SW_4KB_S;
3097                             swMode[AddrBlockThin64KB]  = ADDR_SW_64KB_R_X;
3098                             swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3099                         }
3100                         else
3101                         {
3102                             swMode[AddrBlockMicro]    = ADDR_SW_256B_S;
3103                             swMode[AddrBlockThin4KB]  = ADDR_SW_4KB_S;
3104                             swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
3105                         }
3106 
3107                         // Tracks the size of each valid swizzle mode's surface in bytes
3108                         UINT_64 padSize[AddrBlockMaxTiledType] = {};
3109 
3110                         const UINT_32 ratioLow           = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
3111                         const UINT_32 ratioHi            = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
3112                         const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3113                         UINT_32       minSizeBlk         = AddrBlockMicro; // Tracks the most optimal block to use
3114                         UINT_64       minSize            = 0;              // Tracks the minimum acceptable block type
3115 
3116                         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3117 
3118                         // Iterate through all block types
3119                         for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
3120                         {
3121                             if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
3122                             {
3123                                 localIn.swizzleMode = swMode[i];
3124 
3125                                 if (localIn.swizzleMode == ADDR_SW_LINEAR)
3126                                 {
3127                                     returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
3128                                 }
3129                                 else
3130                                 {
3131                                     returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
3132                                 }
3133 
3134                                 if (returnCode == ADDR_OK)
3135                                 {
3136                                     padSize[i] = localOut.surfSize;
3137 
3138                                     if (minSize == 0)
3139                                     {
3140                                         minSize    = padSize[i];
3141                                         minSizeBlk = i;
3142                                     }
3143                                     else
3144                                     {
3145                                         // Checks if the block type is within the memory budget but favors larger blocks
3146                                         if (Addr2BlockTypeWithinMemoryBudget(
3147                                                 minSize,
3148                                                 padSize[i],
3149                                                 ratioLow,
3150                                                 ratioHi,
3151                                                 0.0,
3152                                                 GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])))
3153                                         {
3154                                             minSize    = padSize[i];
3155                                             minSizeBlk = i;
3156                                         }
3157                                     }
3158                                 }
3159                                 else
3160                                 {
3161                                     ADDR_ASSERT_ALWAYS();
3162                                     break;
3163                                 }
3164                             }
3165                         }
3166 
3167                         if (pIn->memoryBudget > 1.0)
3168                         {
3169                             // If minimum size is given by swizzle mode with bigger-block type, then don't ever check
3170                             // smaller-block type again in coming loop
3171                             switch (minSizeBlk)
3172                             {
3173                                 case AddrBlockThick64KB:
3174                                     allowedBlockSet.macroThin64KB = 0;
3175                                 case AddrBlockThinVar:
3176                                 case AddrBlockThin64KB:
3177                                     allowedBlockSet.macroThick4KB = 0;
3178                                 case AddrBlockThick4KB:
3179                                     allowedBlockSet.macroThin4KB = 0;
3180                                 case AddrBlockThin4KB:
3181                                     allowedBlockSet.micro  = 0;
3182                                 case AddrBlockMicro:
3183                                     allowedBlockSet.linear = 0;
3184                                 case AddrBlockLinear:
3185                                     break;
3186 
3187                                 default:
3188                                     ADDR_ASSERT_ALWAYS();
3189                                     break;
3190                             }
3191 
3192                             for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3193                             {
3194                                 if ((i != minSizeBlk) &&
3195                                     Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i)))
3196                                 {
3197                                     if (Addr2BlockTypeWithinMemoryBudget(
3198                                             minSize,
3199                                             padSize[i],
3200                                             0,
3201                                             0,
3202                                             pIn->memoryBudget,
3203                                             GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])) == FALSE)
3204                                     {
3205                                         // Clear the block type if the memory waste is unacceptable
3206                                         allowedBlockSet.value &= ~(1u << (i - 1));
3207                                     }
3208                                 }
3209                             }
3210 
3211                             // Remove VAR block type if bigger block type is allowed
3212                             if (GetBlockSizeLog2(swMode[AddrBlockThinVar]) < GetBlockSizeLog2(ADDR_SW_64KB_R_X))
3213                             {
3214                                 if (allowedBlockSet.macroThick64KB || allowedBlockSet.macroThin64KB)
3215                                 {
3216                                     allowedBlockSet.var = 0;
3217                                 }
3218                             }
3219 
3220                             // Remove linear block type if 2 or more block types are allowed
3221                             if (IsPow2(allowedBlockSet.value) == FALSE)
3222                             {
3223                                 allowedBlockSet.linear = 0;
3224                             }
3225 
3226                             // Select the biggest allowed block type
3227                             minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
3228 
3229                             if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
3230                             {
3231                                 minSizeBlk = AddrBlockLinear;
3232                             }
3233                         }
3234 
3235                         switch (minSizeBlk)
3236                         {
3237                             case AddrBlockLinear:
3238                                 allowedSwModeSet.value &= Gfx10LinearSwModeMask;
3239                                 break;
3240 
3241                             case AddrBlockMicro:
3242                                 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3243                                 allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
3244                                 break;
3245 
3246                             case AddrBlockThin4KB:
3247                                 ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3248                                 allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
3249                                 break;
3250 
3251                             case AddrBlockThick4KB:
3252                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3253                                 allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
3254                                 break;
3255 
3256                             case AddrBlockThin64KB:
3257                                 allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3258                                                           Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
3259                                 break;
3260 
3261                             case AddrBlockThick64KB:
3262                                 ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3263                                 allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
3264                                 break;
3265 
3266                             case AddrBlockThinVar:
3267                                 allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
3268                                 break;
3269 
3270                             default:
3271                                 ADDR_ASSERT_ALWAYS();
3272                                 allowedSwModeSet.value = 0;
3273                                 break;
3274                         }
3275                     }
3276 
3277                     // Block type should be determined.
3278                     ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3279 
3280                     ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3281 
3282                     // Determine swizzle type if there are 2 or more swizzle type candidates
3283                     if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
3284                     {
3285                         if (ElemLib::IsBlockCompressed(pIn->format))
3286                         {
3287                             if (allowedSwSet.sw_D)
3288                             {
3289                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3290                             }
3291                             else if (allowedSwSet.sw_S)
3292                             {
3293                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3294                             }
3295                             else
3296                             {
3297                                 ADDR_ASSERT(allowedSwSet.sw_R);
3298                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3299                             }
3300                         }
3301                         else if (ElemLib::IsMacroPixelPacked(pIn->format))
3302                         {
3303                             if (allowedSwSet.sw_S)
3304                             {
3305                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3306                             }
3307                             else if (allowedSwSet.sw_D)
3308                             {
3309                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3310                             }
3311                             else
3312                             {
3313                                 ADDR_ASSERT(allowedSwSet.sw_R);
3314                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3315                             }
3316                         }
3317                         else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
3318                         {
3319                             if (pIn->flags.color &&
3320                                 GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
3321                                 allowedSwSet.sw_D)
3322                             {
3323                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3324                             }
3325                             else if (allowedSwSet.sw_S)
3326                             {
3327                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3328                             }
3329                             else if (allowedSwSet.sw_R)
3330                             {
3331                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3332                             }
3333                             else
3334                             {
3335                                 ADDR_ASSERT(allowedSwSet.sw_Z);
3336                                 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3337                             }
3338                         }
3339                         else
3340                         {
3341                             if (allowedSwSet.sw_R)
3342                             {
3343                                 allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3344                             }
3345                             else if (allowedSwSet.sw_D)
3346                             {
3347                                 allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3348                             }
3349                             else if (allowedSwSet.sw_S)
3350                             {
3351                                 allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3352                             }
3353                             else
3354                             {
3355                                 ADDR_ASSERT(allowedSwSet.sw_Z);
3356                                 allowedSwModeSet.value &= Gfx10ZSwModeMask;
3357                             }
3358                         }
3359 
3360                         // Swizzle type should be determined.
3361                         ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3362                     }
3363 
3364                     // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
3365                     // swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3366                     // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3367                     pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3368                 }
3369             }
3370             else
3371             {
3372                 // Invalid combination...
3373                 ADDR_ASSERT_ALWAYS();
3374                 returnCode = ADDR_INVALIDPARAMS;
3375             }
3376         }
3377         else
3378         {
3379             // Invalid combination...
3380             ADDR_ASSERT_ALWAYS();
3381             returnCode = ADDR_INVALIDPARAMS;
3382         }
3383     }
3384 
3385     return returnCode;
3386 }
3387 
3388 /**
3389 ************************************************************************************************************************
3390 *   Gfx10Lib::ComputeStereoInfo
3391 *
3392 *   @brief
3393 *       Compute height alignment and right eye pipeBankXor for stereo surface
3394 *
3395 *   @return
3396 *       Error code
3397 *
3398 ************************************************************************************************************************
3399 */
ComputeStereoInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const3400 ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
3401     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,        ///< Compute surface info
3402     UINT_32*                                pAlignY,    ///< Stereo requested additional alignment in Y
3403     UINT_32*                                pRightXor   ///< Right eye xor
3404     ) const
3405 {
3406     ADDR_E_RETURNCODE ret = ADDR_OK;
3407 
3408     *pRightXor = 0;
3409 
3410     if (IsNonPrtXor(pIn->swizzleMode))
3411     {
3412         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3413         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
3414         const UINT_32 rsrcType    = static_cast<UINT_32>(pIn->resourceType) - 1;
3415         const UINT_32 swMode      = static_cast<UINT_32>(pIn->swizzleMode);
3416         const UINT_32 eqIndex     = m_equationLookupTable[rsrcType][swMode][elemLog2];
3417 
3418         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3419         {
3420             UINT_32 yMax     = 0;
3421             UINT_32 yPosMask = 0;
3422 
3423             // First get "max y bit"
3424             for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3425             {
3426                 ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
3427 
3428                 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3429                     (m_equationTable[eqIndex].addr[i].index > yMax))
3430                 {
3431                     yMax = m_equationTable[eqIndex].addr[i].index;
3432                 }
3433 
3434                 if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3435                     (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3436                     (m_equationTable[eqIndex].xor1[i].index > yMax))
3437                 {
3438                     yMax = m_equationTable[eqIndex].xor1[i].index;
3439                 }
3440 
3441                 if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3442                     (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3443                     (m_equationTable[eqIndex].xor2[i].index > yMax))
3444                 {
3445                     yMax = m_equationTable[eqIndex].xor2[i].index;
3446                 }
3447             }
3448 
3449             // Then loop again for populating a position mask of "max Y bit"
3450             for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3451             {
3452                 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3453                     (m_equationTable[eqIndex].addr[i].index == yMax))
3454                 {
3455                     yPosMask |= 1u << i;
3456                 }
3457                 else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3458                          (m_equationTable[eqIndex].xor1[i].channel == 1) &&
3459                          (m_equationTable[eqIndex].xor1[i].index == yMax))
3460                 {
3461                     yPosMask |= 1u << i;
3462                 }
3463                 else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3464                          (m_equationTable[eqIndex].xor2[i].channel == 1) &&
3465                          (m_equationTable[eqIndex].xor2[i].index == yMax))
3466                 {
3467                     yPosMask |= 1u << i;
3468                 }
3469             }
3470 
3471             const UINT_32 additionalAlign = 1 << yMax;
3472 
3473             if (additionalAlign >= *pAlignY)
3474             {
3475                 *pAlignY = additionalAlign;
3476 
3477                 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
3478 
3479                 if ((alignedHeight >> yMax) & 1)
3480                 {
3481                     *pRightXor = yPosMask >> m_pipeInterleaveLog2;
3482                 }
3483             }
3484         }
3485         else
3486         {
3487             ret = ADDR_INVALIDPARAMS;
3488         }
3489     }
3490 
3491     return ret;
3492 }
3493 
3494 /**
3495 ************************************************************************************************************************
3496 *   Gfx10Lib::HwlComputeSurfaceInfoTiled
3497 *
3498 *   @brief
3499 *       Internal function to calculate alignment for tiled surface
3500 *
3501 *   @return
3502 *       ADDR_E_RETURNCODE
3503 ************************************************************************************************************************
3504 */
HwlComputeSurfaceInfoTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3505 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
3506      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3507      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3508      ) const
3509 {
3510     ADDR_E_RETURNCODE ret;
3511 
3512     // Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3513     pOut->mipChainPitch    = 0;
3514     pOut->mipChainHeight   = 0;
3515     pOut->mipChainSlice    = 0;
3516     pOut->epitchIsHeight   = FALSE;
3517 
3518     // Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3519     pOut->mipChainInTail   = FALSE;
3520     pOut->firstMipIdInTail = pIn->numMipLevels;
3521 
3522     if (IsBlock256b(pIn->swizzleMode))
3523     {
3524         ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3525     }
3526     else
3527     {
3528         ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3529     }
3530 
3531     return ret;
3532 }
3533 
3534 /**
3535 ************************************************************************************************************************
3536 *   Gfx10Lib::ComputeSurfaceInfoMicroTiled
3537 *
3538 *   @brief
3539 *       Internal function to calculate alignment for micro tiled surface
3540 *
3541 *   @return
3542 *       ADDR_E_RETURNCODE
3543 ************************************************************************************************************************
3544 */
ComputeSurfaceInfoMicroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3545 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3546      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3547      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3548      ) const
3549 {
3550     ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3551                                                          &pOut->blockHeight,
3552                                                          &pOut->blockSlices,
3553                                                          pIn->bpp,
3554                                                          pIn->numFrags,
3555                                                          pIn->resourceType,
3556                                                          pIn->swizzleMode);
3557 
3558     if (ret == ADDR_OK)
3559     {
3560         const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3561 
3562         pOut->pitch     = PowTwoAlign(pIn->width,  pOut->blockWidth);
3563         pOut->height    = PowTwoAlign(pIn->height, pOut->blockHeight);
3564         pOut->numSlices = pIn->numSlices;
3565         pOut->baseAlign = blockSize;
3566 
3567         if (pIn->numMipLevels > 1)
3568         {
3569             const UINT_32 mip0Width    = pIn->width;
3570             const UINT_32 mip0Height   = pIn->height;
3571             UINT_64       mipSliceSize = 0;
3572 
3573             for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3574             {
3575                 UINT_32 mipWidth, mipHeight;
3576 
3577                 GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3578 
3579                 const UINT_32 mipActualWidth  = PowTwoAlign(mipWidth,  pOut->blockWidth);
3580                 const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3581 
3582                 if (pOut->pMipInfo != NULL)
3583                 {
3584                     pOut->pMipInfo[i].pitch            = mipActualWidth;
3585                     pOut->pMipInfo[i].height           = mipActualHeight;
3586                     pOut->pMipInfo[i].depth            = 1;
3587                     pOut->pMipInfo[i].offset           = mipSliceSize;
3588                     pOut->pMipInfo[i].mipTailOffset    = 0;
3589                     pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3590                 }
3591 
3592                 mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3593             }
3594 
3595             pOut->sliceSize = mipSliceSize;
3596             pOut->surfSize  = mipSliceSize * pOut->numSlices;
3597         }
3598         else
3599         {
3600             pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3601             pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3602 
3603             if (pOut->pMipInfo != NULL)
3604             {
3605                 pOut->pMipInfo[0].pitch            = pOut->pitch;
3606                 pOut->pMipInfo[0].height           = pOut->height;
3607                 pOut->pMipInfo[0].depth            = 1;
3608                 pOut->pMipInfo[0].offset           = 0;
3609                 pOut->pMipInfo[0].mipTailOffset    = 0;
3610                 pOut->pMipInfo[0].macroBlockOffset = 0;
3611             }
3612         }
3613 
3614     }
3615 
3616     return ret;
3617 }
3618 
3619 /**
3620 ************************************************************************************************************************
3621 *   Gfx10Lib::ComputeSurfaceInfoMacroTiled
3622 *
3623 *   @brief
3624 *       Internal function to calculate alignment for macro tiled surface
3625 *
3626 *   @return
3627 *       ADDR_E_RETURNCODE
3628 ************************************************************************************************************************
3629 */
ComputeSurfaceInfoMacroTiled(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const3630 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3631      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
3632      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
3633      ) const
3634 {
3635     ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3636                                                                 &pOut->blockHeight,
3637                                                                 &pOut->blockSlices,
3638                                                                 pIn->bpp,
3639                                                                 pIn->numFrags,
3640                                                                 pIn->resourceType,
3641                                                                 pIn->swizzleMode);
3642 
3643     if (returnCode == ADDR_OK)
3644     {
3645         UINT_32 heightAlign = pOut->blockHeight;
3646 
3647         if (pIn->flags.qbStereo)
3648         {
3649             UINT_32 rightXor = 0;
3650 
3651             returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);
3652 
3653             if (returnCode == ADDR_OK)
3654             {
3655                 pOut->pStereoInfo->rightSwizzle = rightXor;
3656             }
3657         }
3658 
3659         if (returnCode == ADDR_OK)
3660         {
3661             const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3662             const UINT_32 blockSize     = 1 << blockSizeLog2;
3663 
3664             pOut->pitch     = PowTwoAlign(pIn->width,     pOut->blockWidth);
3665             pOut->height    = PowTwoAlign(pIn->height,    heightAlign);
3666             pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3667             pOut->baseAlign = blockSize;
3668 
3669             if (pIn->numMipLevels > 1)
3670             {
3671                 const Dim3d  tailMaxDim         = GetMipTailDim(pIn->resourceType,
3672                                                                 pIn->swizzleMode,
3673                                                                 pOut->blockWidth,
3674                                                                 pOut->blockHeight,
3675                                                                 pOut->blockSlices);
3676                 const UINT_32 mip0Width         = pIn->width;
3677                 const UINT_32 mip0Height        = pIn->height;
3678                 const BOOL_32 isThin            = IsThin(pIn->resourceType, pIn->swizzleMode);
3679                 const UINT_32 mip0Depth         = isThin ? 1 : pIn->numSlices;
3680                 const UINT_32 maxMipsInTail     = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3681                 const UINT_32 index             = Log2(pIn->bpp >> 3);
3682                 UINT_32       firstMipInTail    = pIn->numMipLevels;
3683                 UINT_64       mipChainSliceSize = 0;
3684                 UINT_64       mipSize[MaxMipLevels];
3685                 UINT_64       mipSliceSize[MaxMipLevels];
3686 
3687                 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
3688                 Dim3d fixedTailMaxDim = tailMaxDim;
3689 
3690                 if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3691                 {
3692                     fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3693                     fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3694                 }
3695 
3696                 for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3697                 {
3698                     UINT_32 mipWidth, mipHeight, mipDepth;
3699 
3700                     GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3701 
3702                     if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3703                     {
3704                         firstMipInTail     = i;
3705                         mipChainSliceSize += blockSize / pOut->blockSlices;
3706                         break;
3707                     }
3708                     else
3709                     {
3710                         const UINT_32 pitch     = PowTwoAlign(mipWidth,  pOut->blockWidth);
3711                         const UINT_32 height    = PowTwoAlign(mipHeight, pOut->blockHeight);
3712                         const UINT_32 depth     = PowTwoAlign(mipDepth,  pOut->blockSlices);
3713                         const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3714 
3715                         mipSize[i]         = sliceSize * depth;
3716                         mipSliceSize[i]    = sliceSize * pOut->blockSlices;
3717                         mipChainSliceSize += sliceSize;
3718 
3719                         if (pOut->pMipInfo != NULL)
3720                         {
3721                             pOut->pMipInfo[i].pitch  = pitch;
3722                             pOut->pMipInfo[i].height = height;
3723                             pOut->pMipInfo[i].depth  = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3724                         }
3725                     }
3726                 }
3727 
3728                 pOut->sliceSize        = mipChainSliceSize;
3729                 pOut->surfSize         = mipChainSliceSize * pOut->numSlices;
3730                 pOut->mipChainInTail   = (firstMipInTail == 0) ? TRUE : FALSE;
3731                 pOut->firstMipIdInTail = firstMipInTail;
3732 
3733                 if (pOut->pMipInfo != NULL)
3734                 {
3735                     UINT_64 offset         = 0;
3736                     UINT_64 macroBlkOffset = 0;
3737                     UINT_32 tailMaxDepth   = 0;
3738 
3739                     if (firstMipInTail != pIn->numMipLevels)
3740                     {
3741                         UINT_32 mipWidth, mipHeight;
3742 
3743                         GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3744                                    &mipWidth, &mipHeight, &tailMaxDepth);
3745 
3746                         offset         = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3747                         macroBlkOffset = blockSize;
3748                     }
3749 
3750                     for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3751                     {
3752                         pOut->pMipInfo[i].offset           = offset;
3753                         pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3754                         pOut->pMipInfo[i].mipTailOffset    = 0;
3755 
3756                         offset         += mipSize[i];
3757                         macroBlkOffset += mipSliceSize[i];
3758                     }
3759 
3760                     UINT_32 pitch  = tailMaxDim.w;
3761                     UINT_32 height = tailMaxDim.h;
3762                     UINT_32 depth  = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3763 
3764                     tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3765 
3766                     for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3767                     {
3768                         const UINT_32 m         = maxMipsInTail - 1 - (i - firstMipInTail);
3769                         const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3770 
3771                         pOut->pMipInfo[i].offset           = mipOffset * tailMaxDepth;
3772                         pOut->pMipInfo[i].mipTailOffset    = mipOffset;
3773                         pOut->pMipInfo[i].macroBlockOffset = 0;
3774 
3775                         pOut->pMipInfo[i].pitch  = pitch;
3776                         pOut->pMipInfo[i].height = height;
3777                         pOut->pMipInfo[i].depth  = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3778 
3779                         UINT_32 mipX = ((mipOffset >> 9)  & 1)  |
3780                                        ((mipOffset >> 10) & 2)  |
3781                                        ((mipOffset >> 11) & 4)  |
3782                                        ((mipOffset >> 12) & 8)  |
3783                                        ((mipOffset >> 13) & 16) |
3784                                        ((mipOffset >> 14) & 32);
3785                         UINT_32 mipY = ((mipOffset >> 8)  & 1)  |
3786                                        ((mipOffset >> 9)  & 2)  |
3787                                        ((mipOffset >> 10) & 4)  |
3788                                        ((mipOffset >> 11) & 8)  |
3789                                        ((mipOffset >> 12) & 16) |
3790                                        ((mipOffset >> 13) & 32);
3791 
3792                         if (blockSizeLog2 & 1)
3793                         {
3794                             const UINT_32 temp = mipX;
3795                             mipX = mipY;
3796                             mipY = temp;
3797 
3798                             if (index & 1)
3799                             {
3800                                 mipY = (mipY << 1) | (mipX & 1);
3801                                 mipX = mipX >> 1;
3802                             }
3803                         }
3804 
3805                         if (isThin)
3806                         {
3807                             pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3808                             pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3809                             pOut->pMipInfo[i].mipTailCoordZ = 0;
3810 
3811                             pitch  = Max(pitch  >> 1, Block256_2d[index].w);
3812                             height = Max(height >> 1, Block256_2d[index].h);
3813                         }
3814                         else
3815                         {
3816                             pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3817                             pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3818                             pOut->pMipInfo[i].mipTailCoordZ = 0;
3819 
3820                             pitch  = Max(pitch  >> 1, Block256_3d[index].w);
3821                             height = Max(height >> 1, Block256_3d[index].h);
3822                         }
3823                     }
3824                 }
3825             }
3826             else
3827             {
3828                 pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
3829                 pOut->surfSize  = pOut->sliceSize * pOut->numSlices;
3830 
3831                 if (pOut->pMipInfo != NULL)
3832                 {
3833                     pOut->pMipInfo[0].pitch            = pOut->pitch;
3834                     pOut->pMipInfo[0].height           = pOut->height;
3835                     pOut->pMipInfo[0].depth            = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3836                     pOut->pMipInfo[0].offset           = 0;
3837                     pOut->pMipInfo[0].mipTailOffset    = 0;
3838                     pOut->pMipInfo[0].macroBlockOffset = 0;
3839                     pOut->pMipInfo[0].mipTailCoordX    = 0;
3840                     pOut->pMipInfo[0].mipTailCoordY    = 0;
3841                     pOut->pMipInfo[0].mipTailCoordZ    = 0;
3842                 }
3843             }
3844         }
3845     }
3846 
3847     return returnCode;
3848 }
3849 
3850 /**
3851 ************************************************************************************************************************
3852 *   Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3853 *
3854 *   @brief
3855 *       Internal function to calculate address from coord for tiled swizzle surface
3856 *
3857 *   @return
3858 *       ADDR_E_RETURNCODE
3859 ************************************************************************************************************************
3860 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const3861 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3862      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
3863      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
3864      ) const
3865 {
3866     ADDR_E_RETURNCODE ret;
3867 
3868     if (IsBlock256b(pIn->swizzleMode))
3869     {
3870         ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3871     }
3872     else
3873     {
3874         ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3875     }
3876 
3877     return ret;
3878 }
3879 
3880 /**
3881 ************************************************************************************************************************
3882 *   Gfx10Lib::ComputeOffsetFromEquation
3883 *
3884 *   @brief
3885 *       Compute offset from equation
3886 *
3887 *   @return
3888 *       Offset
3889 ************************************************************************************************************************
3890 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z) const3891 UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
3892     const ADDR_EQUATION* pEq,   ///< Equation
3893     UINT_32              x,     ///< x coord in bytes
3894     UINT_32              y,     ///< y coord in pixel
3895     UINT_32              z      ///< z coord in slice
3896     ) const
3897 {
3898     UINT_32 offset = 0;
3899 
3900     for (UINT_32 i = 0; i < pEq->numBits; i++)
3901     {
3902         UINT_32 v = 0;
3903 
3904         for (UINT_32 c = 0; c < pEq->numBitComponents; c++)
3905         {
3906             if (pEq->comps[c][i].valid)
3907             {
3908                 if (pEq->comps[c][i].channel == 0)
3909                 {
3910                     v ^= (x >> pEq->comps[c][i].index) & 1;
3911                 }
3912                 else if (pEq->comps[c][i].channel == 1)
3913                 {
3914                     v ^= (y >> pEq->comps[c][i].index) & 1;
3915                 }
3916                 else
3917                 {
3918                     ADDR_ASSERT(pEq->comps[c][i].channel == 2);
3919                     v ^= (z >> pEq->comps[c][i].index) & 1;
3920                 }
3921             }
3922         }
3923 
3924         offset |= (v << i);
3925     }
3926 
3927     return offset;
3928 }
3929 
3930 /**
3931 ************************************************************************************************************************
3932 *   Gfx10Lib::ComputeOffsetFromSwizzlePattern
3933 *
3934 *   @brief
3935 *       Compute offset from swizzle pattern
3936 *
3937 *   @return
3938 *       Offset
3939 ************************************************************************************************************************
3940 */
ComputeOffsetFromSwizzlePattern(const UINT_64 * pPattern,UINT_32 numBits,UINT_32 x,UINT_32 y,UINT_32 z,UINT_32 s) const3941 UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3942     const UINT_64* pPattern,    ///< Swizzle pattern
3943     UINT_32        numBits,     ///< Number of bits in pattern
3944     UINT_32        x,           ///< x coord in pixel
3945     UINT_32        y,           ///< y coord in pixel
3946     UINT_32        z,           ///< z coord in slice
3947     UINT_32        s            ///< sample id
3948     ) const
3949 {
3950     UINT_32                 offset          = 0;
3951     const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3952 
3953     for (UINT_32 i = 0; i < numBits; i++)
3954     {
3955         UINT_32 v = 0;
3956 
3957         if (pSwizzlePattern[i].x != 0)
3958         {
3959             UINT_16 mask  = pSwizzlePattern[i].x;
3960             UINT_32 xBits = x;
3961 
3962             while (mask != 0)
3963             {
3964                 if (mask & 1)
3965                 {
3966                     v ^= xBits & 1;
3967                 }
3968 
3969                 xBits >>= 1;
3970                 mask  >>= 1;
3971             }
3972         }
3973 
3974         if (pSwizzlePattern[i].y != 0)
3975         {
3976             UINT_16 mask  = pSwizzlePattern[i].y;
3977             UINT_32 yBits = y;
3978 
3979             while (mask != 0)
3980             {
3981                 if (mask & 1)
3982                 {
3983                     v ^= yBits & 1;
3984                 }
3985 
3986                 yBits >>= 1;
3987                 mask  >>= 1;
3988             }
3989         }
3990 
3991         if (pSwizzlePattern[i].z != 0)
3992         {
3993             UINT_16 mask  = pSwizzlePattern[i].z;
3994             UINT_32 zBits = z;
3995 
3996             while (mask != 0)
3997             {
3998                 if (mask & 1)
3999                 {
4000                     v ^= zBits & 1;
4001                 }
4002 
4003                 zBits >>= 1;
4004                 mask  >>= 1;
4005             }
4006         }
4007 
4008         if (pSwizzlePattern[i].s != 0)
4009         {
4010             UINT_16 mask  = pSwizzlePattern[i].s;
4011             UINT_32 sBits = s;
4012 
4013             while (mask != 0)
4014             {
4015                 if (mask & 1)
4016                 {
4017                     v ^= sBits & 1;
4018                 }
4019 
4020                 sBits >>= 1;
4021                 mask  >>= 1;
4022             }
4023         }
4024 
4025         offset |= (v << i);
4026     }
4027 
4028     return offset;
4029 }
4030 
4031 /**
4032 ************************************************************************************************************************
4033 *   Gfx10Lib::GetSwizzlePatternInfo
4034 *
4035 *   @brief
4036 *       Get swizzle pattern
4037 *
4038 *   @return
4039 *       Swizzle pattern information
4040 ************************************************************************************************************************
4041 */
GetSwizzlePatternInfo(AddrSwizzleMode swizzleMode,AddrResourceType resourceType,UINT_32 elemLog2,UINT_32 numFrag) const4042 const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
4043     AddrSwizzleMode  swizzleMode,       ///< Swizzle mode
4044     AddrResourceType resourceType,      ///< Resource type
4045     UINT_32          elemLog2,          ///< Element size in bytes log2
4046     UINT_32          numFrag            ///< Number of fragment
4047     ) const
4048 {
4049     // Now elemLog2 is going to be used to access the correct index insode of the pPatInfo array so we will start from
4050     // the right location
4051     const UINT_32          index       = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
4052     const ADDR_SW_PATINFO* patInfo     = NULL;
4053     const UINT_32          swizzleMask = 1 << swizzleMode;
4054 
4055     if (IsBlockVariable(swizzleMode))
4056     {
4057         if (m_blockVarSizeLog2 != 0)
4058         {
4059             ADDR_ASSERT(m_settings.supportRbPlus);
4060 
4061             if (IsRtOptSwizzle(swizzleMode))
4062             {
4063                 if (numFrag == 1)
4064                 {
4065                     patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO;
4066                 }
4067                 else if (numFrag == 2)
4068                 {
4069                     patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO;
4070                 }
4071                 else if (numFrag == 4)
4072                 {
4073                     patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO;
4074                 }
4075                 else
4076                 {
4077                     ADDR_ASSERT(numFrag == 8);
4078                     patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO;
4079                 }
4080             }
4081             else if (IsZOrderSwizzle(swizzleMode))
4082             {
4083                 if (numFrag == 1)
4084                 {
4085                     patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;
4086                 }
4087                 else if (numFrag == 2)
4088                 {
4089                     patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;
4090                 }
4091                 else if (numFrag == 4)
4092                 {
4093                     patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;
4094                 }
4095                 else
4096                 {
4097                     ADDR_ASSERT(numFrag == 8);
4098                     patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;
4099                 }
4100             }
4101         }
4102     }
4103     else if (IsLinear(swizzleMode) == FALSE)
4104     {
4105         if (resourceType == ADDR_RSRC_TEX_3D)
4106         {
4107             ADDR_ASSERT(numFrag == 1);
4108 
4109             if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)
4110             {
4111                 if (IsRtOptSwizzle(swizzleMode))
4112                 {
4113                     if (swizzleMode == ADDR_SW_4KB_R_X)
4114                     {
4115                         patInfo = NULL;
4116                     }
4117                     else
4118                     {
4119                         patInfo = m_settings.supportRbPlus ?
4120                                   GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4121                     }
4122                 }
4123                 else if (IsZOrderSwizzle(swizzleMode))
4124                 {
4125                     patInfo = m_settings.supportRbPlus ?
4126                               GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4127                 }
4128                 else if (IsDisplaySwizzle(resourceType, swizzleMode))
4129                 {
4130                     ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
4131                     patInfo = m_settings.supportRbPlus ?
4132                               GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO;
4133                 }
4134                 else
4135                 {
4136                     ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
4137 
4138                     if (IsBlock4kb(swizzleMode))
4139                     {
4140                         if (swizzleMode == ADDR_SW_4KB_S)
4141                         {
4142                             patInfo = m_settings.supportRbPlus ?
4143                                       GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO;
4144                         }
4145                         else
4146                         {
4147                             ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4148                             patInfo = m_settings.supportRbPlus ?
4149                                       GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO;
4150                         }
4151                     }
4152                     else
4153                     {
4154                         if (swizzleMode == ADDR_SW_64KB_S)
4155                         {
4156                             patInfo = m_settings.supportRbPlus ?
4157                                       GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO;
4158                         }
4159                         else if (swizzleMode == ADDR_SW_64KB_S_X)
4160                         {
4161                             patInfo = m_settings.supportRbPlus ?
4162                                       GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO;
4163                         }
4164                         else
4165                         {
4166                             ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4167                             patInfo = m_settings.supportRbPlus ?
4168                                       GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO;
4169                         }
4170                     }
4171                 }
4172             }
4173         }
4174         else
4175         {
4176             if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)
4177             {
4178                 if (IsBlock256b(swizzleMode))
4179                 {
4180                     if (swizzleMode == ADDR_SW_256B_S)
4181                     {
4182                         patInfo = m_settings.supportRbPlus ?
4183                                   GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO;
4184                     }
4185                     else
4186                     {
4187                         ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
4188                         patInfo = m_settings.supportRbPlus ?
4189                                   GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO;
4190                     }
4191                 }
4192                 else if (IsBlock4kb(swizzleMode))
4193                 {
4194                     if (IsStandardSwizzle(resourceType, swizzleMode))
4195                     {
4196                         if (swizzleMode == ADDR_SW_4KB_S)
4197                         {
4198                             patInfo = m_settings.supportRbPlus ?
4199                                       GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO;
4200                         }
4201                         else
4202                         {
4203                             ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4204                             patInfo = m_settings.supportRbPlus ?
4205                                       GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO;
4206                         }
4207                     }
4208                     else
4209                     {
4210                         if (swizzleMode == ADDR_SW_4KB_D)
4211                         {
4212                             patInfo = m_settings.supportRbPlus ?
4213                                       GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO;
4214                         }
4215                         else if (swizzleMode == ADDR_SW_4KB_R_X)
4216                         {
4217                             patInfo = NULL;
4218                         }
4219                         else
4220                         {
4221                             ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
4222                             patInfo = m_settings.supportRbPlus ?
4223                                       GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO;
4224                         }
4225                     }
4226                 }
4227                 else
4228                 {
4229                     if (IsRtOptSwizzle(swizzleMode))
4230                     {
4231                         if (numFrag == 1)
4232                         {
4233                             patInfo = m_settings.supportRbPlus ?
4234                                       GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4235                         }
4236                         else if (numFrag == 2)
4237                         {
4238                             patInfo = m_settings.supportRbPlus ?
4239                                       GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO;
4240                         }
4241                         else if (numFrag == 4)
4242                         {
4243                             patInfo = m_settings.supportRbPlus ?
4244                                       GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO;
4245                         }
4246                         else
4247                         {
4248                             ADDR_ASSERT(numFrag == 8);
4249                             patInfo = m_settings.supportRbPlus ?
4250                                       GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO;
4251                         }
4252                     }
4253                     else if (IsZOrderSwizzle(swizzleMode))
4254                     {
4255                         if (numFrag == 1)
4256                         {
4257                             patInfo = m_settings.supportRbPlus ?
4258                                       GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4259                         }
4260                         else if (numFrag == 2)
4261                         {
4262                             patInfo = m_settings.supportRbPlus ?
4263                                       GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO;
4264                         }
4265                         else if (numFrag == 4)
4266                         {
4267                             patInfo = m_settings.supportRbPlus ?
4268                                       GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO;
4269                         }
4270                         else
4271                         {
4272                             ADDR_ASSERT(numFrag == 8);
4273                             patInfo = m_settings.supportRbPlus ?
4274                                       GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO;
4275                         }
4276                     }
4277                     else if (IsDisplaySwizzle(resourceType, swizzleMode))
4278                     {
4279                         if (swizzleMode == ADDR_SW_64KB_D)
4280                         {
4281                             patInfo = m_settings.supportRbPlus ?
4282                                       GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO;
4283                         }
4284                         else if (swizzleMode == ADDR_SW_64KB_D_X)
4285                         {
4286                             patInfo = m_settings.supportRbPlus ?
4287                                       GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO;
4288                         }
4289                         else
4290                         {
4291                             ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
4292                             patInfo = m_settings.supportRbPlus ?
4293                                       GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO;
4294                         }
4295                     }
4296                     else
4297                     {
4298                         if (swizzleMode == ADDR_SW_64KB_S)
4299                         {
4300                             patInfo = m_settings.supportRbPlus ?
4301                                       GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO;
4302                         }
4303                         else if (swizzleMode == ADDR_SW_64KB_S_X)
4304                         {
4305                             patInfo = m_settings.supportRbPlus ?
4306                                       GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO;
4307                         }
4308                         else
4309                         {
4310                             ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4311                             patInfo = m_settings.supportRbPlus ?
4312                                       GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO;
4313                         }
4314                     }
4315                 }
4316             }
4317         }
4318     }
4319 
4320     return (patInfo != NULL) ? &patInfo[index] : NULL;
4321 }
4322 
4323 /**
4324 ************************************************************************************************************************
4325 *   Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
4326 *
4327 *   @brief
4328 *       Internal function to calculate address from coord for micro tiled swizzle surface
4329 *
4330 *   @return
4331 *       ADDR_E_RETURNCODE
4332 ************************************************************************************************************************
4333 */
ComputeSurfaceAddrFromCoordMicroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4334 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
4335      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4336      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4337      ) const
4338 {
4339     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
4340     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4341     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
4342     ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
4343 
4344     localIn.swizzleMode  = pIn->swizzleMode;
4345     localIn.flags        = pIn->flags;
4346     localIn.resourceType = pIn->resourceType;
4347     localIn.bpp          = pIn->bpp;
4348     localIn.width        = Max(pIn->unalignedWidth,  1u);
4349     localIn.height       = Max(pIn->unalignedHeight, 1u);
4350     localIn.numSlices    = Max(pIn->numSlices,       1u);
4351     localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
4352     localIn.numSamples   = Max(pIn->numSamples,      1u);
4353     localIn.numFrags     = Max(pIn->numFrags,        1u);
4354     localOut.pMipInfo    = mipInfo;
4355 
4356     ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
4357 
4358     if (ret == ADDR_OK)
4359     {
4360         const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4361         const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
4362         const UINT_32 swMode   = static_cast<UINT_32>(pIn->swizzleMode);
4363         const UINT_32 eqIndex  = m_equationLookupTable[rsrcType][swMode][elemLog2];
4364 
4365         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4366         {
4367             const UINT_32 pb           = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4368             const UINT_32 yb           = pIn->y / localOut.blockHeight;
4369             const UINT_32 xb           = pIn->x / localOut.blockWidth;
4370             const UINT_32 blockIndex   = yb * pb + xb;
4371             const UINT_32 blockSize    = 256;
4372             const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4373                                                                    pIn->x << elemLog2,
4374                                                                    pIn->y,
4375                                                                    0);
4376             pOut->addr = localOut.sliceSize * pIn->slice +
4377                          mipInfo[pIn->mipId].macroBlockOffset +
4378                          (blockIndex * blockSize) +
4379                          blk256Offset;
4380         }
4381         else
4382         {
4383             ret = ADDR_INVALIDPARAMS;
4384         }
4385     }
4386 
4387     return ret;
4388 }
4389 
4390 /**
4391 ************************************************************************************************************************
4392 *   Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
4393 *
4394 *   @brief
4395 *       Internal function to calculate address from coord for macro tiled swizzle surface
4396 *
4397 *   @return
4398 *       ADDR_E_RETURNCODE
4399 ************************************************************************************************************************
4400 */
ComputeSurfaceAddrFromCoordMacroTiled(const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const4401 ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
4402      const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
4403      ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
4404      ) const
4405 {
4406     ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {};
4407     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4408     ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
4409     ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
4410 
4411     localIn.swizzleMode  = pIn->swizzleMode;
4412     localIn.flags        = pIn->flags;
4413     localIn.resourceType = pIn->resourceType;
4414     localIn.bpp          = pIn->bpp;
4415     localIn.width        = Max(pIn->unalignedWidth,  1u);
4416     localIn.height       = Max(pIn->unalignedHeight, 1u);
4417     localIn.numSlices    = Max(pIn->numSlices,       1u);
4418     localIn.numMipLevels = Max(pIn->numMipLevels,    1u);
4419     localIn.numSamples   = Max(pIn->numSamples,      1u);
4420     localIn.numFrags     = Max(pIn->numFrags,        1u);
4421     localOut.pMipInfo    = mipInfo;
4422 
4423     ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
4424 
4425     if (ret == ADDR_OK)
4426     {
4427         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
4428         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4429         const UINT_32 blkMask     = (1 << blkSizeLog2) - 1;
4430         const UINT_32 pipeMask    = (1 << m_pipesLog2) - 1;
4431         const UINT_32 bankMask    = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
4432         const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
4433                                     (((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
4434 
4435         if (localIn.numFrags > 1)
4436         {
4437             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4438                                                                     pIn->resourceType,
4439                                                                     elemLog2,
4440                                                                     localIn.numFrags);
4441 
4442             if (pPatInfo != NULL)
4443             {
4444                 const UINT_32 pb        = localOut.pitch / localOut.blockWidth;
4445                 const UINT_32 yb        = pIn->y / localOut.blockHeight;
4446                 const UINT_32 xb        = pIn->x / localOut.blockWidth;
4447                 const UINT_64 blkIdx    = yb * pb + xb;
4448 
4449                 ADDR_BIT_SETTING fullSwizzlePattern[20];
4450                 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4451 
4452                 const UINT_32 blkOffset =
4453                     ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
4454                                                     blkSizeLog2,
4455                                                     pIn->x,
4456                                                     pIn->y,
4457                                                     pIn->slice,
4458                                                     pIn->sample);
4459 
4460                 pOut->addr = (localOut.sliceSize * pIn->slice) +
4461                              (blkIdx << blkSizeLog2) +
4462                              (blkOffset ^ pipeBankXor);
4463             }
4464             else
4465             {
4466                 ret = ADDR_INVALIDPARAMS;
4467             }
4468         }
4469         else
4470         {
4471             const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
4472             const UINT_32 swMode  = static_cast<UINT_32>(pIn->swizzleMode);
4473             const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
4474 
4475             if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4476             {
4477                 const BOOL_32 inTail    = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
4478                 const BOOL_32 isThin    = IsThin(pIn->resourceType, pIn->swizzleMode);
4479                 const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
4480                 const UINT_32 sliceId   = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
4481                 const UINT_32 x         = inTail ? (pIn->x     + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
4482                 const UINT_32 y         = inTail ? (pIn->y     + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
4483                 const UINT_32 z         = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
4484                 const UINT_32 pb        = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4485                 const UINT_32 yb        = pIn->y / localOut.blockHeight;
4486                 const UINT_32 xb        = pIn->x / localOut.blockWidth;
4487                 const UINT_64 blkIdx    = yb * pb + xb;
4488                 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4489                                                                     x << elemLog2,
4490                                                                     y,
4491                                                                     z);
4492                 pOut->addr = sliceSize * sliceId +
4493                              mipInfo[pIn->mipId].macroBlockOffset +
4494                              (blkIdx << blkSizeLog2) +
4495                              (blkOffset ^ pipeBankXor);
4496             }
4497             else
4498             {
4499                 ret = ADDR_INVALIDPARAMS;
4500             }
4501         }
4502     }
4503 
4504     return ret;
4505 }
4506 
4507 /**
4508 ************************************************************************************************************************
4509 *   Gfx10Lib::HwlComputeMaxBaseAlignments
4510 *
4511 *   @brief
4512 *       Gets maximum alignments
4513 *   @return
4514 *       maximum alignments
4515 ************************************************************************************************************************
4516 */
HwlComputeMaxBaseAlignments() const4517 UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
4518 {
4519     return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;
4520 }
4521 
4522 /**
4523 ************************************************************************************************************************
4524 *   Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4525 *
4526 *   @brief
4527 *       Gets maximum alignments for metadata
4528 *   @return
4529 *       maximum alignments for metadata
4530 ************************************************************************************************************************
4531 */
HwlComputeMaxMetaBaseAlignments() const4532 UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4533 {
4534     Dim3d metaBlk;
4535 
4536     const AddrSwizzleMode ValidSwizzleModeForXmask[] =
4537     {
4538         ADDR_SW_64KB_Z_X,
4539         m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,
4540     };
4541 
4542     UINT_32 maxBaseAlignHtile = 0;
4543     UINT_32 maxBaseAlignCmask = 0;
4544 
4545     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)
4546     {
4547         for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4548         {
4549             for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4550             {
4551                 // Max base alignment for Htile
4552                 const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,
4553                                                                 ADDR_RSRC_TEX_2D,
4554                                                                 ValidSwizzleModeForXmask[swIdx],
4555                                                                 bppLog2,
4556                                                                 numFragLog2,
4557                                                                 TRUE,
4558                                                                 &metaBlk);
4559 
4560                 maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4561             }
4562         }
4563 
4564         // Max base alignment for Cmask
4565         const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,
4566                                                         ADDR_RSRC_TEX_2D,
4567                                                         ValidSwizzleModeForXmask[swIdx],
4568                                                         0,
4569                                                         0,
4570                                                         TRUE,
4571                                                         &metaBlk);
4572 
4573         maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);
4574     }
4575 
4576     // Max base alignment for 2D Dcc
4577     const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4578     {
4579         ADDR_SW_64KB_S_X,
4580         ADDR_SW_64KB_D_X,
4581         ADDR_SW_64KB_R_X,
4582         m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4583     };
4584 
4585     UINT_32 maxBaseAlignDcc2D = 0;
4586 
4587     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4588     {
4589         for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4590         {
4591             for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4592             {
4593                 const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
4594                                                              ADDR_RSRC_TEX_2D,
4595                                                              ValidSwizzleModeForDcc2D[swIdx],
4596                                                              bppLog2,
4597                                                              numFragLog2,
4598                                                              TRUE,
4599                                                              &metaBlk);
4600 
4601                 maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4602             }
4603         }
4604     }
4605 
4606     // Max base alignment for 3D Dcc
4607     const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4608     {
4609         ADDR_SW_64KB_Z_X,
4610         ADDR_SW_64KB_S_X,
4611         ADDR_SW_64KB_D_X,
4612         ADDR_SW_64KB_R_X,
4613         m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4614     };
4615 
4616     UINT_32 maxBaseAlignDcc3D = 0;
4617 
4618     for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4619     {
4620         for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4621         {
4622             const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
4623                                                          ADDR_RSRC_TEX_3D,
4624                                                          ValidSwizzleModeForDcc3D[swIdx],
4625                                                          bppLog2,
4626                                                          0,
4627                                                          TRUE,
4628                                                          &metaBlk);
4629 
4630             maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4631         }
4632     }
4633 
4634     return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4635 }
4636 
4637 /**
4638 ************************************************************************************************************************
4639 *   Gfx10Lib::GetMetaElementSizeLog2
4640 *
4641 *   @brief
4642 *       Gets meta data element size log2
4643 *   @return
4644 *       Meta data element size log2
4645 ************************************************************************************************************************
4646 */
GetMetaElementSizeLog2(Gfx10DataType dataType)4647 INT_32 Gfx10Lib::GetMetaElementSizeLog2(
4648     Gfx10DataType dataType) ///< Data surface type
4649 {
4650     INT_32 elemSizeLog2 = 0;
4651 
4652     if (dataType == Gfx10DataColor)
4653     {
4654         elemSizeLog2 = 0;
4655     }
4656     else if (dataType == Gfx10DataDepthStencil)
4657     {
4658         elemSizeLog2 = 2;
4659     }
4660     else
4661     {
4662         ADDR_ASSERT(dataType == Gfx10DataFmask);
4663         elemSizeLog2 = -1;
4664     }
4665 
4666     return elemSizeLog2;
4667 }
4668 
4669 /**
4670 ************************************************************************************************************************
4671 *   Gfx10Lib::GetMetaCacheSizeLog2
4672 *
4673 *   @brief
4674 *       Gets meta data cache line size log2
4675 *   @return
4676 *       Meta data cache line size log2
4677 ************************************************************************************************************************
4678 */
GetMetaCacheSizeLog2(Gfx10DataType dataType)4679 INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
4680     Gfx10DataType dataType) ///< Data surface type
4681 {
4682     INT_32 cacheSizeLog2 = 0;
4683 
4684     if (dataType == Gfx10DataColor)
4685     {
4686         cacheSizeLog2 = 6;
4687     }
4688     else if (dataType == Gfx10DataDepthStencil)
4689     {
4690         cacheSizeLog2 = 8;
4691     }
4692     else
4693     {
4694         ADDR_ASSERT(dataType == Gfx10DataFmask);
4695         cacheSizeLog2 = 8;
4696     }
4697     return cacheSizeLog2;
4698 }
4699 
4700 /**
4701 ************************************************************************************************************************
4702 *   Gfx10Lib::HwlComputeSurfaceInfoLinear
4703 *
4704 *   @brief
4705 *       Internal function to calculate alignment for linear surface
4706 *
4707 *   @return
4708 *       ADDR_E_RETURNCODE
4709 ************************************************************************************************************************
4710 */
HwlComputeSurfaceInfoLinear(const ADDR2_COMPUTE_SURFACE_INFO_INPUT * pIn,ADDR2_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const4711 ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
4712      const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
4713      ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
4714      ) const
4715 {
4716     ADDR_E_RETURNCODE returnCode = ADDR_OK;
4717 
4718     if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4719     {
4720         returnCode = ADDR_INVALIDPARAMS;
4721     }
4722     else
4723     {
4724         const UINT_32 elementBytes = pIn->bpp >> 3;
4725         const UINT_32 pitchAlign   = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4726         const UINT_32 mipDepth     = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4727         UINT_32       pitch        = PowTwoAlign(pIn->width, pitchAlign);
4728         UINT_32       actualHeight = pIn->height;
4729         UINT_64       sliceSize    = 0;
4730 
4731         if (pIn->numMipLevels > 1)
4732         {
4733             for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4734             {
4735                 UINT_32 mipWidth, mipHeight;
4736 
4737                 GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4738 
4739                 const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4740 
4741                 if (pOut->pMipInfo != NULL)
4742                 {
4743                     pOut->pMipInfo[i].pitch            = mipActualWidth;
4744                     pOut->pMipInfo[i].height           = mipHeight;
4745                     pOut->pMipInfo[i].depth            = mipDepth;
4746                     pOut->pMipInfo[i].offset           = sliceSize;
4747                     pOut->pMipInfo[i].mipTailOffset    = 0;
4748                     pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4749                 }
4750 
4751                 sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4752             }
4753         }
4754         else
4755         {
4756             returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4757 
4758             if (returnCode == ADDR_OK)
4759             {
4760                 sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4761 
4762                 if (pOut->pMipInfo != NULL)
4763                 {
4764                     pOut->pMipInfo[0].pitch            = pitch;
4765                     pOut->pMipInfo[0].height           = actualHeight;
4766                     pOut->pMipInfo[0].depth            = mipDepth;
4767                     pOut->pMipInfo[0].offset           = 0;
4768                     pOut->pMipInfo[0].mipTailOffset    = 0;
4769                     pOut->pMipInfo[0].macroBlockOffset = 0;
4770                 }
4771             }
4772         }
4773 
4774         if (returnCode == ADDR_OK)
4775         {
4776             pOut->pitch          = pitch;
4777             pOut->height         = actualHeight;
4778             pOut->numSlices      = pIn->numSlices;
4779             pOut->sliceSize      = sliceSize;
4780             pOut->surfSize       = sliceSize * pOut->numSlices;
4781             pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4782             pOut->blockWidth     = pitchAlign;
4783             pOut->blockHeight    = 1;
4784             pOut->blockSlices    = 1;
4785 
4786             // Following members are useless on GFX10
4787             pOut->mipChainPitch  = 0;
4788             pOut->mipChainHeight = 0;
4789             pOut->mipChainSlice  = 0;
4790             pOut->epitchIsHeight = FALSE;
4791 
4792             // Post calculation validate
4793             ADDR_ASSERT(pOut->sliceSize > 0);
4794         }
4795     }
4796 
4797     return returnCode;
4798 }
4799 
4800 } // V2
4801 } // Addr
4802