xref: /aosp_15_r20/external/mesa3d/src/amd/addrlib/src/gfx12/gfx12addrlib.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2 ************************************************************************************************************************
3 *
4 *  Copyright (C) 2023 Advanced Micro Devices, Inc.  All rights reserved.
5 *  SPDX-License-Identifier: MIT
6 *
7 ***********************************************************************************************************************/
8 
9 /**
10 ************************************************************************************************************************
11 * @file  gfx12addrlib.cpp
12 * @brief Contain the implementation for the Gfx12Lib class.
13 ************************************************************************************************************************
14 */
15 
16 #include "gfx12addrlib.h"
17 #include "gfx12_gb_reg.h"
18 
19 #include "amdgpu_asic_addr.h"
20 
21 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
22 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
23 
24 namespace Addr
25 {
26 /**
27 ************************************************************************************************************************
28 *   Gfx12HwlInit
29 *
30 *   @brief
31 *       Creates an Gfx12Lib object.
32 *
33 *   @return
34 *       Returns an Gfx12Lib object pointer.
35 ************************************************************************************************************************
36 */
Gfx12HwlInit(const Client * pClient)37 Addr::Lib* Gfx12HwlInit(
38     const Client* pClient)
39 {
40     return V3::Gfx12Lib::CreateObj(pClient);
41 }
42 
43 namespace V3
44 {
45 
46 ////////////////////////////////////////////////////////////////////////////////////////////////////
47 //                               Static Const Member
48 ////////////////////////////////////////////////////////////////////////////////////////////////////
49 const SwizzleModeFlags Gfx12Lib::SwizzleModeTable[ADDR3_MAX_TYPE] =
50 {//Linear 2d   3d  256B  4KB  64KB  256KB  Reserved
51     {{1,   0,   0,    0,   0,    0,     0,    0}}, // ADDR3_LINEAR
52     {{0,   1,   0,    1,   0,    0,     0,    0}}, // ADDR3_256B_2D
53     {{0,   1,   0,    0,   1,    0,     0,    0}}, // ADDR3_4KB_2D
54     {{0,   1,   0,    0,   0,    1,     0,    0}}, // ADDR3_64KB_2D
55     {{0,   1,   0,    0,   0,    0,     1,    0}}, // ADDR3_256KB_2D
56     {{0,   0,   1,    0,   1,    0,     0,    0}}, // ADDR3_4KB_3D
57     {{0,   0,   1,    0,   0,    1,     0,    0}}, // ADDR3_64KB_3D
58     {{0,   0,   1,    0,   0,    0,     1,    0}}, // ADDR3_256KB_3D
59 };
60 
61 /**
62 ************************************************************************************************************************
63 *   Gfx12Lib::Gfx12Lib
64 *
65 *   @brief
66 *       Constructor
67 *
68 ************************************************************************************************************************
69 */
Gfx12Lib(const Client * pClient)70 Gfx12Lib::Gfx12Lib(
71     const Client* pClient)
72     :
73     Lib(pClient),
74     m_numSwizzleBits(0)
75 {
76     memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
77 }
78 
79 /**
80 ************************************************************************************************************************
81 *   Gfx12Lib::~Gfx12Lib
82 *
83 *   @brief
84 *       Destructor
85 ************************************************************************************************************************
86 */
~Gfx12Lib()87 Gfx12Lib::~Gfx12Lib()
88 {
89 }
90 
91 /**
92 ************************************************************************************************************************
93 *   Gfx12Lib::ConvertSwizzlePatternToEquation
94 *
95 *   @brief
96 *       Convert swizzle pattern to equation.
97 *
98 *   @return
99 *       N/A
100 ************************************************************************************************************************
101 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,Addr3SwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const102 VOID Gfx12Lib::ConvertSwizzlePatternToEquation(
103     UINT_32                elemLog2,  ///< [in] element bytes log2
104     Addr3SwizzleMode       swMode,    ///< [in] swizzle mode
105     const ADDR_SW_PATINFO* pPatInfo,  ///< [in] swizzle pattern info
106     ADDR_EQUATION*         pEquation) ///< [out] equation converted from swizzle pattern
107     const
108 {
109     ADDR_BIT_SETTING fullSwizzlePattern[Log2Size256K];
110     GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
111 
112     const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
113     const UINT_32           blockSizeLog2 = GetBlockSizeLog2(swMode, TRUE);
114 
115     pEquation->numBits = blockSizeLog2;
116     pEquation->stackedDepthSlices = FALSE;
117 
118     for (UINT_32 i = 0; i < elemLog2; i++)
119     {
120         pEquation->addr[i].channel = 0;
121         pEquation->addr[i].valid = 1;
122         pEquation->addr[i].index = i;
123     }
124 
125     for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
126     {
127         ADDR_ASSERT(IsPow2(pSwizzle[i].value));
128 
129         if (pSwizzle[i].x != 0)
130         {
131             ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
132 
133             pEquation->addr[i].channel = 0;
134             pEquation->addr[i].valid = 1;
135             pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
136         }
137         else if (pSwizzle[i].y != 0)
138         {
139             ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
140 
141             pEquation->addr[i].channel = 1;
142             pEquation->addr[i].valid = 1;
143             pEquation->addr[i].index = Log2(pSwizzle[i].y);
144         }
145         else if (pSwizzle[i].z != 0)
146         {
147             ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
148 
149             pEquation->addr[i].channel = 2;
150             pEquation->addr[i].valid = 1;
151             pEquation->addr[i].index = Log2(pSwizzle[i].z);
152         }
153         else if (pSwizzle[i].s != 0)
154         {
155             ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].s)));
156 
157             pEquation->addr[i].channel = 3;
158             pEquation->addr[i].valid = 1;
159             pEquation->addr[i].index = Log2(pSwizzle[i].s);
160         }
161         else
162         {
163             ADDR_ASSERT_ALWAYS();
164         }
165     }
166 }
167 
168 /**
169 ************************************************************************************************************************
170 *   Gfx12Lib::InitEquationTable
171 *
172 *   @brief
173 *       Initialize Equation table.
174 *
175 *   @return
176 *       N/A
177 ************************************************************************************************************************
178 */
InitEquationTable()179 VOID Gfx12Lib::InitEquationTable()
180 {
181     memset(m_equationTable, 0, sizeof(m_equationTable));
182 
183     for (UINT_32 swModeIdx = 0; swModeIdx < ADDR3_MAX_TYPE; swModeIdx++)
184     {
185         const Addr3SwizzleMode swMode = static_cast<Addr3SwizzleMode>(swModeIdx);
186 
187         // Skip linear equation (data table is not useful for 2D/3D images-- only contains x-coordinate bits)
188         if (IsValidSwMode(swMode) && (IsLinear(swMode) == false))
189         {
190             const UINT_32 maxMsaa = Is2dSwizzle(swMode) ? MaxMsaaRateLog2 : 1;
191 
192             for (UINT_32 msaaIdx = 0; msaaIdx < maxMsaa; msaaIdx++)
193             {
194                 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
195                 {
196                     UINT_32                equationIndex = ADDR_INVALID_EQUATION_INDEX;
197                     const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, elemLog2, 1 << msaaIdx);
198 
199                     if (pPatInfo != NULL)
200                     {
201                         ADDR_EQUATION equation = {};
202 
203                         ConvertSwizzlePatternToEquation(elemLog2, swMode, pPatInfo, &equation);
204 
205                         equationIndex = m_numEquations;
206                         ADDR_ASSERT(equationIndex < NumSwizzlePatterns);
207 
208                         m_equationTable[equationIndex] = equation;
209                         m_numEquations++;
210                     }
211                     SetEquationTableEntry(swMode, msaaIdx, elemLog2, equationIndex);
212                 } // loop through bpp sizes
213             } // loop through MSAA rates
214         } // End check for valid non-linear modes
215     } // loop through swizzle modes
216 }
217 
218 /**
219 ************************************************************************************************************************
220 *   Gfx12Lib::HwlGetEquationIndex
221 *
222 *   @brief
223 *       Return equationIndex by surface info input
224 *
225 *   @return
226 *       equationIndex
227 ************************************************************************************************************************
228 */
HwlGetEquationIndex(const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pIn) const229 UINT_32 Gfx12Lib::HwlGetEquationIndex(
230     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn    ///< [in] input structure
231     ) const
232 {
233     UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
234 
235     if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
236         (pIn->resourceType == ADDR_RSRC_TEX_3D))
237     {
238         equationIdx = GetEquationTableEntry(pIn->swizzleMode, Log2(pIn->numSamples), Log2(pIn->bpp >> 3));
239     }
240 
241     return equationIdx;
242 }
243 
244 /**
245 ************************************************************************************************************************
246 *   Gfx12Lib::InitBlockDimensionTable
247 *
248 *   @brief
249 *       Initialize block dimension table for all swizzle modes + msaa samples + bpp bundles.
250 *
251 *   @return
252 *       N/A
253 ************************************************************************************************************************
254 */
InitBlockDimensionTable()255 VOID Gfx12Lib::InitBlockDimensionTable()
256 {
257     memset(m_blockDimensionTable, 0, sizeof(m_blockDimensionTable));
258 
259     ADDR3_COMPUTE_SURFACE_INFO_INPUT surfaceInfo {};
260 
261 
262     for (UINT_32 swModeIdx = 0; swModeIdx < ADDR3_MAX_TYPE; swModeIdx++)
263     {
264         const Addr3SwizzleMode swMode = static_cast<Addr3SwizzleMode>(swModeIdx);
265 
266         if (IsValidSwMode(swMode))
267         {
268             surfaceInfo.swizzleMode = swMode;
269             const UINT_32 maxMsaa   = Is2dSwizzle(swMode) ? MaxMsaaRateLog2 : 1;
270 
271             for (UINT_32 msaaIdx = 0; msaaIdx < maxMsaa; msaaIdx++)
272             {
273                 surfaceInfo.numSamples = (1u << msaaIdx);
274                 for (UINT_32 elementBytesLog2 = 0; elementBytesLog2 < MaxElementBytesLog2; elementBytesLog2++)
275                 {
276                     surfaceInfo.bpp = (1u << (elementBytesLog2 + 3));
277                     ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT input{ &surfaceInfo };
278                     ComputeBlockDimensionForSurf(&input, &m_blockDimensionTable[swModeIdx][msaaIdx][elementBytesLog2]);
279                 } // end loop through bpp sizes
280             } // end loop through MSAA rates
281         } // end check for valid swizzle modes
282     } // end loop through swizzle modes
283 }
284 
285 /**
286 ************************************************************************************************************************
287 *   Gfx12Lib::GetMipOrigin
288 *
289 *   @brief
290 *       Internal function to calculate origins of the mip levels
291 *
292 *   @return
293 *       ADDR_E_RETURNCODE
294 ************************************************************************************************************************
295 */
GetMipOrigin(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,const ADDR_EXTENT3D & mipExtentFirstInTail,ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const296 VOID Gfx12Lib::GetMipOrigin(
297      const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,        ///< [in] input structure
298      const ADDR_EXTENT3D&                           mipExtentFirstInTail,
299      ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*             pOut        ///< [out] output structure
300      ) const
301 {
302     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
303     const BOOL_32        is3d             = (pSurfInfo->resourceType == ADDR_RSRC_TEX_3D);
304     const UINT_32        bytesPerPixel    = pSurfInfo->bpp >> 3;
305     const UINT_32        elementBytesLog2 = Log2(bytesPerPixel);
306     const UINT_32        samplesLog2      = Log2(pSurfInfo->numSamples);
307 
308     // Calculate the width/height/depth for the given microblock, because the mip offset calculation
309     // is in units of microblocks but we want it in elements.
310     ADDR_EXTENT3D        microBlockExtent = HwlGetMicroBlockSize(pIn);
311     const ADDR_EXTENT3D  tailMaxDim       = GetMipTailDim(pIn, pOut->blockExtent);
312     const UINT_32        blockSizeLog2    = GetBlockSizeLog2(pSurfInfo->swizzleMode);
313 
314     UINT_32 pitch  = tailMaxDim.width;
315     UINT_32 height = tailMaxDim.height;
316     UINT_32 depth  = (is3d ? PowTwoAlign(mipExtentFirstInTail.depth, microBlockExtent.depth) : 1);
317 
318     const UINT_32 tailMaxDepth   = (is3d ? (depth / microBlockExtent.depth) : 1);
319 
320     for (UINT_32 i = pOut->firstMipIdInTail; i < pSurfInfo->numMipLevels; i++)
321     {
322         const INT_32  mipInTail = CalcMipInTail(pIn, pOut, i);
323         const UINT_32 mipOffset = CalcMipOffset(pIn, mipInTail);
324 
325         pOut->pMipInfo[i].offset           = mipOffset * tailMaxDepth;
326         pOut->pMipInfo[i].mipTailOffset    = mipOffset;
327         pOut->pMipInfo[i].macroBlockOffset = 0;
328 
329         pOut->pMipInfo[i].pitch  = pitch;
330         pOut->pMipInfo[i].height = height;
331         pOut->pMipInfo[i].depth  = depth;
332         if (IsLinear(pSurfInfo->swizzleMode))
333         {
334             pOut->pMipInfo[i].mipTailCoordX = mipOffset >> 8;
335             pOut->pMipInfo[i].mipTailCoordY = 0;
336             pOut->pMipInfo[i].mipTailCoordZ = 0;
337         }
338         else
339         {
340             UINT_32 mipX = ((mipOffset >> 9)  & 1)  |
341                            ((mipOffset >> 10) & 2)  |
342                            ((mipOffset >> 11) & 4)  |
343                            ((mipOffset >> 12) & 8)  |
344                            ((mipOffset >> 13) & 16) |
345                            ((mipOffset >> 14) & 32);
346             UINT_32 mipY = ((mipOffset >> 8)  & 1)  |
347                            ((mipOffset >> 9)  & 2)  |
348                            ((mipOffset >> 10) & 4)  |
349                            ((mipOffset >> 11) & 8)  |
350                            ((mipOffset >> 12) & 16) |
351                            ((mipOffset >> 13) & 32);
352 
353             pOut->pMipInfo[i].mipTailCoordX = mipX * microBlockExtent.width;
354             pOut->pMipInfo[i].mipTailCoordY = mipY * microBlockExtent.height;
355             pOut->pMipInfo[i].mipTailCoordZ = 0;
356         }
357         if (IsLinear(pSurfInfo->swizzleMode))
358         {
359             pitch = Max(pitch >> 1, 1u);
360         }
361         else
362         {
363             pOut->pMipInfo[i].pitch  = PowTwoAlign(pitch,  microBlockExtent.width);
364             pOut->pMipInfo[i].height = PowTwoAlign(height, microBlockExtent.height);
365             pOut->pMipInfo[i].depth  = PowTwoAlign(depth,  microBlockExtent.depth);
366             pitch  = Max(pitch >> 1,  1u);
367             height = Max(height >> 1, 1u);
368             depth  = Max(depth >> 1,  1u);
369         }
370     }
371 }
372 
373 /**
374 ************************************************************************************************************************
375 *   Gfx12Lib::GetMipOffset
376 *
377 *   @brief
378 *       Internal function to calculate alignment for a surface
379 *
380 *   @return
381 *       ADDR_E_RETURNCODE
382 ************************************************************************************************************************
383 */
GetMipOffset(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const384 VOID Gfx12Lib::GetMipOffset(
385      const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,    ///< [in] input structure
386      ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*             pOut    ///< [out] output structure
387      ) const
388 {
389     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
390     const UINT_32        bytesPerPixel    = pSurfInfo->bpp >> 3;
391     const UINT_32        elementBytesLog2 = Log2(bytesPerPixel);
392     const UINT_32        blockSizeLog2    = GetBlockSizeLog2(pSurfInfo->swizzleMode);
393     const UINT_32        blockSize        = 1 << blockSizeLog2;
394     const ADDR_EXTENT3D  tailMaxDim       = GetMipTailDim(pIn, pOut->blockExtent);;
395     const ADDR_EXTENT3D  mip0Dims         = GetBaseMipExtents(pSurfInfo);
396     const UINT_32        maxMipsInTail    = GetMaxNumMipsInTail(pIn);
397     const bool           isLinear         = IsLinear(pSurfInfo->swizzleMode);
398 
399     UINT_32 firstMipInTail    = pSurfInfo->numMipLevels;
400     UINT_64 mipChainSliceSize = 0;
401     UINT_64 mipChainSliceSizeDense  = 0;
402     UINT_64 mipSize[MaxMipLevels];
403     UINT_64 mipSliceSize[MaxMipLevels];
404 
405     const BOOL_32 useCustomPitch    = UseCustomPitch(pSurfInfo);
406     for (UINT_32 mipIdx = 0; mipIdx < pSurfInfo->numMipLevels; mipIdx++)
407     {
408         const ADDR_EXTENT3D  mipExtents = GetMipExtent(mip0Dims, mipIdx);
409 
410         if (Lib::SupportsMipTail(pSurfInfo->swizzleMode) &&
411             (pSurfInfo->numMipLevels > 1)                &&
412             IsInMipTail(tailMaxDim, mipExtents, maxMipsInTail, pSurfInfo->numMipLevels - mipIdx))
413         {
414             firstMipInTail          = mipIdx;
415             mipChainSliceSize      += blockSize / pOut->blockExtent.depth;
416             mipChainSliceSizeDense += blockSize / pOut->blockExtent.depth;
417             break;
418         }
419         else
420         {
421             UINT_32 pitchImgData   = 0u;
422             UINT_32 pitchSliceSize = 0u;
423             if (isLinear)
424             {
425                 // The slice size of a linear image is calculated as if the "pitch" is 256 byte aligned.
426                 // However, the rendering pitch is aligned to 128 bytes, and that is what needs to be reported
427                 // to our clients in the normal 'pitch' field.
428                 // Note this is NOT the same as the total size of the image being aligned to 256 bytes!
429                 pitchImgData   = (useCustomPitch ? pOut->pitch : PowTwoAlign(mipExtents.width, 128u / bytesPerPixel));
430                 pitchSliceSize = PowTwoAlign(pitchImgData, blockSize / bytesPerPixel);
431             }
432             else
433             {
434                 pitchImgData   = PowTwoAlign(mipExtents.width, pOut->blockExtent.width);
435                 pitchSliceSize = pitchImgData;
436             }
437 
438             UINT_32 height = UseCustomHeight(pSurfInfo)
439                                         ? pOut->height
440                                         : PowTwoAlign(mipExtents.height, pOut->blockExtent.height);
441             const UINT_32 depth  = PowTwoAlign(mipExtents.depth, pOut->blockExtent.depth);
442 
443             if (isLinear && pSurfInfo->flags.denseSliceExact && ((pitchImgData % blockSize) != 0))
444             {
445                 // If we want size to exactly equal (data)pitch * height, make sure that value is 256B aligned.
446                 // Essentially, if the pitch is less aligned, ensure the height is padded so total alignment is 256B.
447                 ADDR_ASSERT((blockSize % 128) == 0);
448                 height = PowTwoAlign(height, blockSize / 128u);
449             }
450 
451             // The original "blockExtent" calculation does subtraction of logs (i.e., division) to get the
452             // sizes.  We aligned our pitch and height to those sizes, which means we need to multiply the various
453             // factors back together to get back to the slice size.
454             UINT_64 sizeExceptPitch = static_cast<UINT_64>(height) * pSurfInfo->numSamples * (pSurfInfo->bpp >> 3);
455             UINT_64 sliceSize       = static_cast<UINT_64>(pitchSliceSize) * sizeExceptPitch;
456             UINT_64 sliceDataSize   = PowTwoAlign(static_cast<UINT_64>(pitchImgData) * sizeExceptPitch,
457                                                   static_cast<UINT_64>(blockSize));
458 
459             UINT_64 hwSliceSize     = sliceSize * pOut->blockExtent.depth;
460             ADDR_ASSERT(PowTwoAlign(hwSliceSize, static_cast<UINT_64>(blockSize)) == hwSliceSize);
461 
462             if ((mipIdx == 0) && CanTrimLinearPadding(pSurfInfo))
463             {
464                 // When this is the last linear subresource of the whole image (as laid out in memory), then we don't
465                 // need to worry about the real slice size and can reduce it to the end of the image data (or some
466                 // inflated value to meet a custom depth pitch)
467                 pitchSliceSize = pitchImgData;
468                 if (UseCustomHeight(pSurfInfo))
469                 {
470                     sliceSize = pSurfInfo->sliceAlign;
471                 }
472                 else
473                 {
474                     sliceSize = sliceDataSize;
475                 }
476                 // CanTrimLinearPadding is always false for 3D swizzles, so block depth is always 1.
477                 hwSliceSize = sliceSize;
478             }
479 
480             mipSize[mipIdx]         = sliceSize * depth;
481             mipSliceSize[mipIdx]    = hwSliceSize;
482             mipChainSliceSize      += sliceSize;
483             mipChainSliceSizeDense += (mipIdx == 0) ? sliceDataSize : sliceSize;
484 
485             if (pOut->pMipInfo != NULL)
486             {
487                 pOut->pMipInfo[mipIdx].pitch         = pitchImgData;
488                 pOut->pMipInfo[mipIdx].pitchForSlice = pitchSliceSize;
489                 pOut->pMipInfo[mipIdx].height        = height;
490                 pOut->pMipInfo[mipIdx].depth         = depth;
491             }
492         }
493     }
494 
495     pOut->sliceSize            = mipChainSliceSize;
496     pOut->sliceSizeDensePacked = mipChainSliceSizeDense;
497     pOut->surfSize             = mipChainSliceSize * pOut->numSlices;
498     pOut->mipChainInTail       = (firstMipInTail == 0) ? TRUE : FALSE;
499     pOut->firstMipIdInTail     = firstMipInTail;
500 
501     if (pOut->pMipInfo != NULL)
502     {
503         if (isLinear)
504         {
505             // 1. Linear swizzle mode doesn't have miptails.
506             // 2. The organization of linear 3D mipmap resource is same as GFX11, we should use mip slice size to
507             // caculate mip offset.
508             ADDR_ASSERT(firstMipInTail == pSurfInfo->numMipLevels);
509 
510             UINT_64 sliceSize = 0;
511 
512             for (INT_32 i = static_cast<INT_32>(pSurfInfo->numMipLevels) - 1; i >= 0; i--)
513             {
514                 pOut->pMipInfo[i].offset           = sliceSize;
515                 pOut->pMipInfo[i].macroBlockOffset = sliceSize;
516                 pOut->pMipInfo[i].mipTailOffset    = 0;
517 
518                 sliceSize += mipSliceSize[i];
519             }
520         }
521         else
522         {
523             UINT_64 offset         = 0;
524             UINT_64 macroBlkOffset = 0;
525 
526             // Even though "firstMipInTail" is zero-based while "numMipLevels" is one-based, from definition of
527             // _ADDR3_COMPUTE_SURFACE_INFO_OUTPUT struct,
528             // UINT_32             firstMipIdInTail;     ///< The id of first mip in tail, if there is no mip
529             //                                           ///  in tail, it will be set to number of mip levels
530             // See initialization:
531             //              UINT_32       firstMipInTail    = pIn->numMipLevels
532             // It is possible that they are equal if
533             //      1. a single mip level image that's larger than the largest mip that would fit in the mip tail if
534             //         the mip tail existed
535             //      2. 256B_2D and linear images which don't have miptails from HWAL functionality
536             //
537             // We can use firstMipInTail != pIn->numMipLevels to check it has mip in tails and do mipInfo assignment.
538             if (firstMipInTail != pSurfInfo->numMipLevels)
539             {
540                 // Determine the application dimensions of the first mip level that resides in the tail.
541                 // This is distinct from "tailMaxDim" which is the maximum size of a mip level that will fit in the
542                 // tail.
543                 ADDR_EXTENT3D mipExtentFirstInTail = GetMipExtent(mip0Dims, firstMipInTail);
544 
545                 // For a 2D image, "alignedDepth" is always "1".
546                 // For a 3D image, this is effectively the number of application slices associated with the first mip
547                 //                 in the tail (up-aligned to HW requirements).
548                 const UINT_32 alignedDepth = PowTwoAlign(mipExtentFirstInTail.depth, pOut->blockExtent.depth);
549 
550                 // "hwSlices" is the number of HW blocks required to represent the first mip level in the tail.
551                 const UINT_32 hwSlices = alignedDepth / pOut->blockExtent.depth;
552 
553                 // Note that for 3D images that utilize a 2D swizzle mode, there really can be multiple
554                 // HW slices that encompass the mip tail; i.e., hwSlices is not necessarily one.
555                 // For example, you could have a single mip level 8x8x32 image with a 4KB_2D swizzle mode
556                 // The 8x8 region fits into a 4KB block (so it's "in the tail"), but because we have a 2D
557                 // swizzle mode (where each slice is its own block, so blockExtent.depth == 1), hwSlices
558                 // will now be equivalent to the number of application slices, or 32.
559 
560                 // Mip tails are stored in "reverse" order -- i.e., the mip-tail itself is stored first, so the
561                 // first mip level outside the tail has an offset that's the dimension of the tail itself, or one
562                 // swizzle block in size.
563                 offset         = blockSize * hwSlices;
564                 macroBlkOffset = blockSize;
565 
566                 // And determine the per-mip information for everything inside the mip tail.
567                 GetMipOrigin(pIn, mipExtentFirstInTail, pOut);
568             }
569 
570             // Again, because mip-levels are stored backwards (smallest first), we start determining mip-level
571             // offsets from the smallest to the largest.
572             // Note that firstMipInTail == 0 immediately terminates the loop, so there is no need to check for this
573             // case.
574             for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
575             {
576                 pOut->pMipInfo[i].offset           = offset;
577                 pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
578                 pOut->pMipInfo[i].mipTailOffset    = 0;
579 
580                 offset         += mipSize[i];
581                 macroBlkOffset += mipSliceSize[i];
582             }
583         }
584     }
585 }
586 
587 /**
588 ************************************************************************************************************************
589 *   Gfx12Lib::HwlComputeSurfaceInfo
590 *
591 *   @brief
592 *       Internal function to calculate alignment for a surface
593 *
594 *   @return
595 *       VOID
596 ************************************************************************************************************************
597 */
HwlComputeSurfaceInfo(const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pSurfInfo,ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const598 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceInfo(
599      const ADDR3_COMPUTE_SURFACE_INFO_INPUT*  pSurfInfo,  ///< [in] input structure
600      ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*       pOut        ///< [out] output structure
601      ) const
602 {
603     ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT input{ pSurfInfo };
604 
605     // Check that only 2D swizzle mode supports MSAA
606     const UINT_32 samplesLog2 = Is2dSwizzle(pSurfInfo->swizzleMode) ? Log2(pSurfInfo->numSamples) : 0;
607 
608     // The block dimension width/height/depth is determined only by swizzle mode, MSAA samples and bpp
609     pOut->blockExtent = GetBlockDimensionTableEntry(pSurfInfo->swizzleMode, samplesLog2, Log2(pSurfInfo->bpp >> 3));
610 
611     ADDR_E_RETURNCODE  returnCode = ApplyCustomizedPitchHeight(pSurfInfo, pOut);
612 
613     if (returnCode == ADDR_OK)
614     {
615         pOut->numSlices = PowTwoAlign(pSurfInfo->numSlices, pOut->blockExtent.depth);
616         pOut->baseAlign = 1 << GetBlockSizeLog2(pSurfInfo->swizzleMode);
617 
618         GetMipOffset(&input, pOut);
619 
620         SanityCheckSurfSize(&input, pOut);
621 
622         // Slices must be exact multiples of the block sizes.  However:
623         // - with 3D images, one block will contain multiple slices, so that needs to be taken into account.
624         //
625         // Note that with linear images that have only one slice, we can always guarantee pOut->sliceSize is 256B
626         // alignment so there is no need to worry about it.
627         ADDR_ASSERT(((pOut->sliceSize * pOut->blockExtent.depth) % GetBlockSize(pSurfInfo->swizzleMode)) == 0);
628     }
629 
630     return returnCode;
631 }
632 
633 /**
634 ************************************************************************************************************************
635 *   Gfx12Lib::GetBaseMipExtents
636 *
637 *   @brief
638 *       Return the size of the base mip level in a nice cozy little structure.
639 *
640 ************************************************************************************************************************
641 */
GetBaseMipExtents(const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pIn) const642 ADDR_EXTENT3D Gfx12Lib::GetBaseMipExtents(
643     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn
644     ) const
645 {
646     return { pIn->width,
647              pIn->height,
648              (IsTex3d(pIn->resourceType) ? pIn->numSlices : 1) }; // slices is depth for 3d
649 }
650 
651 /**
652 ************************************************************************************************************************
653 *   Gfx12Lib::GetMaxNumMipsInTail
654 *
655 *   @brief
656 *       Return max number of mips in tails
657 *
658 *   @return
659 *       Max number of mips in tails
660 ************************************************************************************************************************
661 */
GetMaxNumMipsInTail(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn) const662 UINT_32 Gfx12Lib::GetMaxNumMipsInTail(
663     const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn
664     ) const
665 {
666     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
667     const UINT_32  blockSizeLog2 = GetBlockSizeLog2(pSurfInfo->swizzleMode);
668 
669     UINT_32 effectiveLog2 = blockSizeLog2;
670     UINT_32 mipsInTail    = 1;
671 
672     if (Is3dSwizzle(pSurfInfo->swizzleMode))
673     {
674         effectiveLog2 -= (blockSizeLog2 - 8) / 3;
675     }
676 
677     if (effectiveLog2 > 8)
678     {
679         mipsInTail = (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
680     }
681 
682     return mipsInTail;
683 }
684 
685 /**
686 ************************************************************************************************************************
687 *   Gfx12Lib::HwlCalcMipInTail
688 *
689 *   @brief
690 *       Internal function to calculate the "mipInTail" parameter.
691 *
692 *   @return
693 *       The magic "mipInTail" parameter.
694 ************************************************************************************************************************
695 */
CalcMipInTail(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut,UINT_32 mipLevel) const696 INT_32 Gfx12Lib::CalcMipInTail(
697     const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
698     const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*       pOut,
699     UINT_32                                        mipLevel
700     ) const
701 {
702     const INT_32  firstMipIdInTail = static_cast<INT_32>(pOut->firstMipIdInTail);
703 
704     INT_32  mipInTail = 0;
705 
706     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
707     mipInTail = static_cast<INT_32>(mipLevel) - firstMipIdInTail;
708     if ((mipInTail < 0) || (pSurfInfo->numMipLevels == 1) || (GetBlockSize(pSurfInfo->swizzleMode) <= 256))
709     {
710         mipInTail = MaxMipLevels;
711     }
712 
713     return mipInTail;
714 }
715 
716 /**
717 ************************************************************************************************************************
718 *   Gfx12Lib::CalcMipOffset
719 *
720 *   @brief
721 *
722 *   @return
723 *       The magic "mipInTail" parameter.
724 ************************************************************************************************************************
725 */
CalcMipOffset(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,UINT_32 mipInTail) const726 UINT_32 Gfx12Lib::CalcMipOffset(
727     const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
728     UINT_32                                        mipInTail
729     ) const
730 {
731     const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(pIn);
732 
733     const INT_32  signedM       = static_cast<INT_32>(maxMipsInTail) - static_cast<INT_32>(1) - mipInTail;
734     const UINT_32 m             = Max(0, signedM);
735     const UINT_32 mipOffset     = (m > 6) ? (16 << m) : (m << 8);
736 
737     return mipOffset;
738 }
739 
740 /**
741 ************************************************************************************************************************
742 *   Gfx12Lib::HwlComputeSurfaceAddrFromCoordLinear
743 *
744 *   @brief
745 *       Internal function to calculate address from coord for linear swizzle surface
746 *
747 *   @return
748 *       ADDR_E_RETURNCODE
749 ************************************************************************************************************************
750 */
HwlComputeSurfaceAddrFromCoordLinear(const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pSurfInfoIn,ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const751 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceAddrFromCoordLinear(
752     const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,         ///< [in] input structure
753     const ADDR3_COMPUTE_SURFACE_INFO_INPUT*          pSurfInfoIn, ///< [in] input structure
754     ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut         ///< [out] output structure
755     ) const
756 {
757     ADDR3_MIP_INFO mipInfo[MaxMipLevels];
758     ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
759 
760     ADDR3_COMPUTE_SURFACE_INFO_OUTPUT surfInfoOut = {0};
761     surfInfoOut.size     = sizeof(surfInfoOut);
762     surfInfoOut.pMipInfo = mipInfo;
763 
764     ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfo(pSurfInfoIn, &surfInfoOut);
765 
766     if (returnCode == ADDR_OK)
767     {
768         pOut->addr        = (surfInfoOut.sliceSize * pIn->slice) +
769                             mipInfo[pIn->mipId].offset +
770                             (pIn->y * mipInfo[pIn->mipId].pitch + pIn->x) * (pIn->bpp >> 3);
771 
772         pOut->bitPosition = 0;
773     }
774 
775     return returnCode;
776 }
777 
778 /**
779 ************************************************************************************************************************
780 *   Gfx12Lib::HwlComputeSurfaceAddrFromCoordTiled
781 *
782 *   @brief
783 *       Internal function to calculate address from coord for tiled swizzle surface
784 *
785 *   @return
786 *       ADDR_E_RETURNCODE
787 ************************************************************************************************************************
788 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const789 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceAddrFromCoordTiled(
790      const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
791      ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
792      ) const
793 {
794     // 256B block cannot support 3D image.
795     ADDR_ASSERT((IsTex3d(pIn->resourceType) && IsBlock256b(pIn->swizzleMode)) == FALSE);
796 
797     ADDR3_COMPUTE_SURFACE_INFO_INPUT  localIn               = {};
798     ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut              = {};
799     ADDR3_MIP_INFO                    mipInfo[MaxMipLevels] = {};
800 
801     localIn.size         = sizeof(localIn);
802     localIn.flags        = pIn->flags;
803     localIn.swizzleMode  = pIn->swizzleMode;
804     localIn.resourceType = pIn->resourceType;
805     localIn.format       = ADDR_FMT_INVALID;
806     localIn.bpp          = pIn->bpp;
807     localIn.width        = Max(pIn->unAlignedDims.width, 1u);
808     localIn.height       = Max(pIn->unAlignedDims.height, 1u);
809     localIn.numSlices    = Max(pIn->unAlignedDims.depth, 1u);
810     localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
811     localIn.numSamples   = Max(pIn->numSamples, 1u);
812 
813     localOut.size        = sizeof(localOut);
814     localOut.pMipInfo    = mipInfo;
815     ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT input{ &localIn };
816 
817     ADDR_E_RETURNCODE ret = ComputeSurfaceInfo(&localIn, &localOut);
818 
819     if (ret == ADDR_OK)
820     {
821         const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
822         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
823 
824         // Addr3 equation table excludes linear swizzle mode, and fortunately HwlComputeSurfaceAddrFromCoordTiled() is
825         // only called for non-linear swizzle mode.
826         const UINT_32 eqIndex     = GetEquationTableEntry(pIn->swizzleMode, Log2(localIn.numSamples), elemLog2);
827 
828         if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
829         {
830             ADDR3_COORD  coords = {};
831 
832             // For a 3D image, one swizzle block contains multiple application slices.
833             // For any given image, each HW slice is addressed identically to any other HW slice.
834             // hwSliceSizeBytes is the size of one HW slice; i.e., the number of bytes for the pattern to repeat.
835             // hwSliceId is the index (0, 1, 2...) of the HW slice that an application slice resides in.
836             const UINT_64 hwSliceSizeBytes = localOut.sliceSize * localOut.blockExtent.depth;
837             const UINT_32 hwSliceId = pIn->slice / localOut.blockExtent.depth;
838 
839             const UINT_32 pb     = mipInfo[pIn->mipId].pitch / localOut.blockExtent.width;
840             const UINT_32 yb     = pIn->y / localOut.blockExtent.height;
841             const UINT_32 xb     = pIn->x / localOut.blockExtent.width;
842             const UINT_64 blkIdx = yb * pb + xb;
843 
844             // Technically, the addition of "mipTailCoordX" is only necessary if we're in the mip-tail.
845             // The "mipTailCoordXYZ" values should be zero if we're not in the mip-tail.
846             const BOOL_32 inTail = ((mipInfo[pIn->mipId].mipTailOffset != 0) && (blkSizeLog2 != Log2Size256));
847 
848             ADDR_ASSERT((inTail == TRUE) ||
849                         // If we're not in the tail, then all of these must be zero.
850                         ((mipInfo[pIn->mipId].mipTailCoordX == 0) &&
851                          (mipInfo[pIn->mipId].mipTailCoordY == 0) &&
852                          (mipInfo[pIn->mipId].mipTailCoordZ == 0)));
853 
854             coords.x = pIn->x     + mipInfo[pIn->mipId].mipTailCoordX;
855             coords.y = pIn->y     + mipInfo[pIn->mipId].mipTailCoordY;
856             coords.z = pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ;
857 
858             // Note that in this path, blkIdx does not account for the HW slice ID, so we need to
859             // add it in here.
860             pOut->addr = hwSliceSizeBytes * hwSliceId;
861 
862             const UINT_32 blkOffset  = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
863                                                                  coords.x << elemLog2,
864                                                                  coords.y,
865                                                                  coords.z,
866                                                                  pIn->sample);
867 
868             pOut->addr += mipInfo[pIn->mipId].macroBlockOffset +
869                           (blkIdx << blkSizeLog2)              +
870                           blkOffset;
871 
872             ADDR_ASSERT(pOut->addr < localOut.surfSize);
873         }
874         else
875         {
876             ret = ADDR_INVALIDPARAMS;
877         }
878     }
879 
880     return ret;
881 }
882 
883 /**
884 ************************************************************************************************************************
885 *   Gfx12Lib::HwlComputePipeBankXor
886 *
887 *   @brief
888 *       Generate a PipeBankXor value to be ORed into bits above numSwizzleBits of address
889 *
890 *   @return
891 *       PipeBankXor value
892 ************************************************************************************************************************
893 */
HwlComputePipeBankXor(const ADDR3_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const894 ADDR_E_RETURNCODE Gfx12Lib::HwlComputePipeBankXor(
895     const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn,     ///< [in] input structure
896     ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut     ///< [out] output structure
897     ) const
898 {
899     if ((m_numSwizzleBits != 0)               && // does this configuration support swizzling
900         //         base address XOR in GFX12 will be applied to all blk_size = 4KB, 64KB, or 256KB swizzle modes,
901         //         Note that Linear and 256B are excluded.
902         (IsLinear(pIn->swizzleMode) == FALSE) &&
903         (IsBlock256b(pIn->swizzleMode) == FALSE))
904     {
905         pOut->pipeBankXor = pIn->surfIndex % (1 << m_numSwizzleBits);
906     }
907     else
908     {
909         pOut->pipeBankXor = 0;
910     }
911 
912     return ADDR_OK;
913 }
914 
915 /**
916 ************************************************************************************************************************
917 *   Gfx12Lib::ComputeOffsetFromEquation
918 *
919 *   @brief
920 *       Compute offset from equation
921 *
922 *   @return
923 *       Offset
924 ************************************************************************************************************************
925 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z,UINT_32 s) const926 UINT_32 Gfx12Lib::ComputeOffsetFromEquation(
927     const ADDR_EQUATION* pEq,   ///< Equation
928     UINT_32              x,     ///< x coord in bytes
929     UINT_32              y,     ///< y coord in pixel
930     UINT_32              z,     ///< z coord in slice
931     UINT_32              s      ///< MSAA sample index
932     ) const
933 {
934     UINT_32 offset = 0;
935 
936     for (UINT_32 i = 0; i < pEq->numBits; i++)
937     {
938         UINT_32 v = 0;
939 
940         if (pEq->addr[i].valid)
941         {
942             if (pEq->addr[i].channel == 0)
943             {
944                 v ^= (x >> pEq->addr[i].index) & 1;
945             }
946             else if (pEq->addr[i].channel == 1)
947             {
948                 v ^= (y >> pEq->addr[i].index) & 1;
949             }
950             else if (pEq->addr[i].channel == 2)
951             {
952                 v ^= (z >> pEq->addr[i].index) & 1;
953             }
954             else if (pEq->addr[i].channel == 3)
955             {
956                 v ^= (s >> pEq->addr[i].index) & 1;
957             }
958             else
959             {
960                 ADDR_ASSERT_ALWAYS();
961             }
962         }
963 
964         offset |= (v << i);
965     }
966 
967     return offset;
968 }
969 
970 /**
971 ************************************************************************************************************************
972 *   Gfx12Lib::GetSwizzlePatternInfo
973 *
974 *   @brief
975 *       Get swizzle pattern
976 *
977 *   @return
978 *       Swizzle pattern information
979 ************************************************************************************************************************
980 */
GetSwizzlePatternInfo(Addr3SwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numFrag) const981 const ADDR_SW_PATINFO* Gfx12Lib::GetSwizzlePatternInfo(
982     Addr3SwizzleMode swizzleMode,       ///< Swizzle mode
983     UINT_32          elemLog2,          ///< Element size in bytes log2
984     UINT_32          numFrag            ///< Number of fragment
985     ) const
986 {
987     const ADDR_SW_PATINFO* patInfo = NULL;
988 
989     if (Is2dSwizzle(swizzleMode) == FALSE)
990     {
991         ADDR_ASSERT(numFrag == 1);
992     }
993 
994     switch (swizzleMode)
995     {
996     case ADDR3_256KB_2D:
997         switch (numFrag)
998         {
999         case 1:
1000             patInfo = GFX12_SW_256KB_2D_1xAA_PATINFO;
1001             break;
1002         case 2:
1003             patInfo = GFX12_SW_256KB_2D_2xAA_PATINFO;
1004             break;
1005         case 4:
1006             patInfo = GFX12_SW_256KB_2D_4xAA_PATINFO;
1007             break;
1008         case 8:
1009             patInfo = GFX12_SW_256KB_2D_8xAA_PATINFO;
1010             break;
1011         default:
1012             ADDR_ASSERT_ALWAYS();
1013         }
1014         break;
1015     case ADDR3_256KB_3D:
1016         patInfo = GFX12_SW_256KB_3D_PATINFO;
1017         break;
1018     case ADDR3_64KB_2D:
1019         switch (numFrag)
1020         {
1021         case 1:
1022             patInfo = GFX12_SW_64KB_2D_1xAA_PATINFO;
1023             break;
1024         case 2:
1025             patInfo = GFX12_SW_64KB_2D_2xAA_PATINFO;
1026             break;
1027         case 4:
1028             patInfo = GFX12_SW_64KB_2D_4xAA_PATINFO;
1029             break;
1030         case 8:
1031             patInfo = GFX12_SW_64KB_2D_8xAA_PATINFO;
1032             break;
1033         default:
1034             ADDR_ASSERT_ALWAYS();
1035         }
1036         break;
1037     case ADDR3_64KB_3D:
1038         patInfo = GFX12_SW_64KB_3D_PATINFO;
1039         break;
1040     case ADDR3_4KB_2D:
1041         switch (numFrag)
1042         {
1043         case 1:
1044             patInfo = GFX12_SW_4KB_2D_1xAA_PATINFO;
1045             break;
1046         case 2:
1047             patInfo = GFX12_SW_4KB_2D_2xAA_PATINFO;
1048             break;
1049         case 4:
1050             patInfo = GFX12_SW_4KB_2D_4xAA_PATINFO;
1051             break;
1052         case 8:
1053             patInfo = GFX12_SW_4KB_2D_8xAA_PATINFO;
1054             break;
1055         default:
1056             ADDR_ASSERT_ALWAYS();
1057         }
1058         break;
1059     case ADDR3_4KB_3D:
1060         patInfo = GFX12_SW_4KB_3D_PATINFO;
1061         break;
1062     case ADDR3_256B_2D:
1063         switch (numFrag)
1064         {
1065         case 1:
1066             patInfo = GFX12_SW_256B_2D_1xAA_PATINFO;
1067             break;
1068         case 2:
1069             patInfo = GFX12_SW_256B_2D_2xAA_PATINFO;
1070             break;
1071         case 4:
1072             patInfo = GFX12_SW_256B_2D_4xAA_PATINFO;
1073             break;
1074         case 8:
1075             patInfo = GFX12_SW_256B_2D_8xAA_PATINFO;
1076             break;
1077         default:
1078             break;
1079         }
1080         break;
1081     default:
1082         ADDR_ASSERT_ALWAYS();
1083         break;
1084     }
1085 
1086     return (patInfo != NULL) ? &patInfo[elemLog2] : NULL;
1087 }
1088 /**
1089 ************************************************************************************************************************
1090 *   Gfx12Lib::HwlInitGlobalParams
1091 *
1092 *   @brief
1093 *       Initializes global parameters
1094 *
1095 *   @return
1096 *       TRUE if all settings are valid
1097 *
1098 ************************************************************************************************************************
1099 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)1100 BOOL_32 Gfx12Lib::HwlInitGlobalParams(
1101     const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1102 {
1103     BOOL_32              valid = TRUE;
1104     GB_ADDR_CONFIG_GFX12 gbAddrConfig;
1105 
1106     gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1107 
1108     switch (gbAddrConfig.bits.NUM_PIPES)
1109     {
1110         case ADDR_CONFIG_1_PIPE:
1111             m_pipesLog2 = 0;
1112             break;
1113         case ADDR_CONFIG_2_PIPE:
1114             m_pipesLog2 = 1;
1115             break;
1116         case ADDR_CONFIG_4_PIPE:
1117             m_pipesLog2 = 2;
1118             break;
1119         case ADDR_CONFIG_8_PIPE:
1120             m_pipesLog2 = 3;
1121             break;
1122         case ADDR_CONFIG_16_PIPE:
1123             m_pipesLog2 = 4;
1124             break;
1125         case ADDR_CONFIG_32_PIPE:
1126             m_pipesLog2 = 5;
1127             break;
1128         case ADDR_CONFIG_64_PIPE:
1129             m_pipesLog2 = 6;
1130             break;
1131         default:
1132             ADDR_ASSERT_ALWAYS();
1133             valid = FALSE;
1134             break;
1135     }
1136 
1137     switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1138     {
1139         case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1140             m_pipeInterleaveLog2 = 8;
1141             break;
1142         case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1143             m_pipeInterleaveLog2 = 9;
1144             break;
1145         case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1146             m_pipeInterleaveLog2 = 10;
1147             break;
1148         case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1149             m_pipeInterleaveLog2 = 11;
1150             break;
1151         default:
1152             ADDR_ASSERT_ALWAYS();
1153             valid = FALSE;
1154             break;
1155     }
1156 
1157     m_numSwizzleBits = ((m_pipesLog2 >= 3) ? m_pipesLog2 - 2 : 0);
1158 
1159     if (valid)
1160     {
1161         InitEquationTable();
1162         InitBlockDimensionTable();
1163     }
1164 
1165     return valid;
1166 }
1167 
1168 /**
1169 ************************************************************************************************************************
1170 *   Gfx12Lib::HwlComputeNonBlockCompressedView
1171 *
1172 *   @brief
1173 *       Compute non-block-compressed view for a given mipmap level/slice.
1174 *
1175 *   @return
1176 *       ADDR_E_RETURNCODE
1177 ************************************************************************************************************************
1178 */
HwlComputeNonBlockCompressedView(const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const1179 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeNonBlockCompressedView(
1180     const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn,    ///< [in] input structure
1181     ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT*      pOut    ///< [out] output structure
1182     ) const
1183 {
1184     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1185 
1186     if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) &&
1187         ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
1188     {
1189         // Only support BC1~BC7, ASTC, or ETC2 for now...
1190         returnCode = ADDR_NOTSUPPORTED;
1191     }
1192     else
1193     {
1194         UINT_32 bcWidth, bcHeight;
1195         const UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
1196 
1197         ADDR3_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
1198         infoIn.size         = sizeof(infoIn);
1199         infoIn.flags        = pIn->flags;
1200         infoIn.swizzleMode  = pIn->swizzleMode;
1201         infoIn.resourceType = pIn->resourceType;
1202         infoIn.format       = pIn->format;
1203         infoIn.bpp          = bpp;
1204         infoIn.width        = RoundUpQuotient(pIn->unAlignedDims.width, bcWidth);
1205         infoIn.height       = RoundUpQuotient(pIn->unAlignedDims.height, bcHeight);
1206         infoIn.numSlices    = pIn->unAlignedDims.depth;
1207         infoIn.numMipLevels = pIn->numMipLevels;
1208         infoIn.numSamples   = 1;
1209 
1210         ADDR3_MIP_INFO mipInfo[MaxMipLevels] = {};
1211 
1212         ADDR3_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
1213         infoOut.size     = sizeof(infoOut);
1214         infoOut.pMipInfo = mipInfo;
1215 
1216         returnCode = HwlComputeSurfaceInfo(&infoIn, &infoOut);
1217 
1218         if (returnCode == ADDR_OK)
1219         {
1220             ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
1221             subOffIn.size             = sizeof(subOffIn);
1222             subOffIn.swizzleMode      = infoIn.swizzleMode;
1223             subOffIn.resourceType     = infoIn.resourceType;
1224             subOffIn.pipeBankXor      = pIn->pipeBankXor;
1225             subOffIn.slice            = pIn->slice;
1226             subOffIn.sliceSize        = infoOut.sliceSize;
1227             subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
1228             subOffIn.mipTailOffset    = mipInfo[pIn->mipId].mipTailOffset;
1229 
1230             ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
1231             subOffOut.size = sizeof(subOffOut);
1232 
1233             // For any mipmap level, move nonBc view base address by offset
1234             HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
1235             pOut->offset = subOffOut.offset;
1236 
1237             ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
1238             slicePbXorIn.size            = sizeof(slicePbXorIn);
1239             slicePbXorIn.swizzleMode     = infoIn.swizzleMode;
1240             slicePbXorIn.resourceType    = infoIn.resourceType;
1241             slicePbXorIn.bpe             = infoIn.bpp;
1242             slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
1243             slicePbXorIn.slice           = pIn->slice;
1244             slicePbXorIn.numSamples      = 1;
1245 
1246             ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
1247             slicePbXorOut.size = sizeof(slicePbXorOut);
1248 
1249             // For any mipmap level, nonBc view should use computed pbXor
1250             HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
1251             pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
1252 
1253             const BOOL_32 tiled            = (pIn->swizzleMode != ADDR3_LINEAR);
1254             const BOOL_32 inTail           = tiled && (pIn->mipId >= infoOut.firstMipIdInTail);
1255             const UINT_32 requestMipWidth  =
1256                     RoundUpQuotient(Max(pIn->unAlignedDims.width  >> pIn->mipId, 1u), bcWidth);
1257             const UINT_32 requestMipHeight =
1258                     RoundUpQuotient(Max(pIn->unAlignedDims.height >> pIn->mipId, 1u), bcHeight);
1259 
1260             if (inTail)
1261             {
1262                 // For mipmap level that is in mip tail block, hack a lot of things...
1263                 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
1264                 // are fit in tail block:
1265 
1266                 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
1267                 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
1268 
1269                 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
1270                 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
1271 
1272                 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
1273                 pOut->unAlignedDims.width  = Min(requestMipWidth << pOut->mipId, infoOut.blockExtent.width / 2);
1274 
1275                 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
1276                 pOut->unAlignedDims.height = Min(requestMipHeight << pOut->mipId, infoOut.blockExtent.height);
1277             }
1278             // This check should cover at least mipId == 0
1279             else if ((requestMipWidth << pIn->mipId) == infoIn.width)
1280             {
1281                 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
1282                 // - only one mipmap level and mipId = 0
1283                 pOut->mipId        = 0;
1284                 pOut->numMipLevels = 1;
1285 
1286                 // (mip0) width = requestMipWidth
1287                 pOut->unAlignedDims.width  = requestMipWidth;
1288 
1289                 // (mip0) height = requestMipHeight
1290                 pOut->unAlignedDims.height = requestMipHeight;
1291             }
1292             else
1293             {
1294                 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
1295                 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
1296                 // because single mip view may have different pitch value than original (multiple) mip view...
1297                 // A simple case would be:
1298                 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
1299                 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
1300                 //   mip0 width = 0x101/mip1 width = 0x80
1301                 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
1302                 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
1303 
1304                 // - 2 levels and mipId = 1
1305                 pOut->mipId        = 1;
1306                 pOut->numMipLevels = 2;
1307 
1308                 const UINT_32 upperMipWidth  =
1309                     RoundUpQuotient(Max(pIn->unAlignedDims.width  >> (pIn->mipId - 1), 1u), bcWidth);
1310                 const UINT_32 upperMipHeight =
1311                     RoundUpQuotient(Max(pIn->unAlignedDims.height >> (pIn->mipId - 1), 1u), bcHeight);
1312 
1313                 const BOOL_32 needToAvoidInTail = tiled                                              &&
1314                                                   (requestMipWidth <= infoOut.blockExtent.width / 2) &&
1315                                                   (requestMipHeight <= infoOut.blockExtent.height);
1316 
1317                 const UINT_32 hwMipWidth  =
1318                     PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockExtent.width);
1319                 const UINT_32 hwMipHeight =
1320                     PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockExtent.height);
1321 
1322                 const BOOL_32 needExtraWidth =
1323                     ((upperMipWidth < requestMipWidth * 2) ||
1324                      ((upperMipWidth == requestMipWidth * 2) &&
1325                       ((needToAvoidInTail == TRUE) ||
1326                        (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockExtent.width)))));
1327 
1328                 const BOOL_32 needExtraHeight =
1329                     ((upperMipHeight < requestMipHeight * 2) ||
1330                      ((upperMipHeight == requestMipHeight * 2) &&
1331                       ((needToAvoidInTail == TRUE) ||
1332                        (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockExtent.height)))));
1333 
1334                 // (mip0) width = requestLastMipLevelWidth
1335                 pOut->unAlignedDims.width  = upperMipWidth + (needExtraWidth ? 1: 0);
1336 
1337                 // (mip0) height = requestLastMipLevelHeight
1338                 pOut->unAlignedDims.height = upperMipHeight + (needExtraHeight ? 1: 0);
1339             }
1340 
1341             // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
1342             ADDR_ASSERT(ShiftRight(pOut->unAlignedDims.width, pOut->mipId)  == requestMipWidth);
1343             // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
1344             ADDR_ASSERT(ShiftRight(pOut->unAlignedDims.height, pOut->mipId) == requestMipHeight);
1345         }
1346     }
1347 
1348     return returnCode;
1349 }
1350 
1351 /**
1352 ************************************************************************************************************************
1353 *   Gfx12Lib::HwlComputeSubResourceOffsetForSwizzlePattern
1354 *
1355 *   @brief
1356 *       Compute sub resource offset to support swizzle pattern
1357 *
1358 *   @return
1359 *       VOID
1360 ************************************************************************************************************************
1361 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const1362 VOID Gfx12Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
1363     const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,    ///< [in] input structure
1364     ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut    ///< [out] output structure
1365     ) const
1366 {
1367     pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
1368 }
1369 
1370 /**
1371 ************************************************************************************************************************
1372 *   Gfx12Lib::HwlComputeSlicePipeBankXor
1373 *
1374 *   @brief
1375 *       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
1376 *
1377 *   @return
1378 *       PipeBankXor value
1379 ************************************************************************************************************************
1380 */
HwlComputeSlicePipeBankXor(const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const1381 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSlicePipeBankXor(
1382     const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,   ///< [in] input structure
1383     ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut   ///< [out] output structure
1384     ) const
1385 {
1386     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1387 
1388     // PipeBankXor is only applied to 4KB, 64KB and 256KB on GFX12.
1389     if ((IsLinear(pIn->swizzleMode) == FALSE) && (IsBlock256b(pIn->swizzleMode) == FALSE))
1390     {
1391         if (pIn->bpe == 0)
1392         {
1393             // Require a valid bytes-per-element value passed from client...
1394             returnCode = ADDR_INVALIDPARAMS;
1395         }
1396         else
1397         {
1398             const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
1399                                                                     Log2(pIn->bpe >> 3),
1400                                                                     1);
1401 
1402             if (pPatInfo != NULL)
1403             {
1404                 const UINT_32 elemLog2    = Log2(pIn->bpe >> 3);
1405 
1406                 // Addr3 equation table excludes linear swizzle mode, and fortunately when calling
1407                 // HwlComputeSlicePipeBankXor the swizzle mode is non-linear, so we don't need to worry about negative
1408                 // table index.
1409                 const UINT_32 eqIndex     = GetEquationTableEntry(pIn->swizzleMode, Log2(pIn->numSamples), elemLog2);
1410 
1411                 const UINT_32 pipeBankXorOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
1412                                                                             0,
1413                                                                             0,
1414                                                                             pIn->slice,
1415                                                                             0);
1416 
1417                 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
1418 
1419                 // Should have no bit set under pipe interleave
1420                 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
1421 
1422                 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
1423             }
1424             else
1425             {
1426                 // Should never come here...
1427                 ADDR_NOT_IMPLEMENTED();
1428 
1429                 returnCode = ADDR_NOTSUPPORTED;
1430             }
1431         }
1432     }
1433     else
1434     {
1435         pOut->pipeBankXor = 0;
1436     }
1437 
1438     return returnCode;
1439 }
1440 
1441 /**
1442 ************************************************************************************************************************
1443 *   Gfx12Lib::HwlConvertChipFamily
1444 *
1445 *   @brief
1446 *       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1447 *   @return
1448 *       ChipFamily
1449 ************************************************************************************************************************
1450 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)1451 ChipFamily Gfx12Lib::HwlConvertChipFamily(
1452     UINT_32 chipFamily,        ///< [in] chip family defined in atiih.h
1453     UINT_32 chipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
1454 {
1455     return ADDR_CHIP_FAMILY_NAVI;
1456 }
1457 
1458 /**
1459 ************************************************************************************************************************
1460 *   Gfx12Lib::SanityCheckSurfSize
1461 *
1462 *   @brief
1463 *       Calculate the surface size via the exact hardware algorithm to see if it matches.
1464 *
1465 *   @return
1466 ************************************************************************************************************************
1467 */
SanityCheckSurfSize(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const1468 void Gfx12Lib::SanityCheckSurfSize(
1469     const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
1470     const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT*       pOut
1471     ) const
1472 {
1473 #if DEBUG
1474     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
1475     // Verify that the requested image size is valid for the below algorithm.  The below code includes
1476     // implicit assumptions about the surface dimensions being less than "MaxImageDim"; otherwise, it can't
1477     // calculate "firstMipInTail" accurately and the below assertion will trip incorrectly.
1478     //
1479     // Surfaces destined for use only on the SDMA engine can exceed the gfx-engine-imposed limitations of
1480     // the "maximum" image dimensions.
1481     if ((pSurfInfo->width <= MaxImageDim)         &&
1482         (pSurfInfo->height <= MaxImageDim)        &&
1483         (pSurfInfo->numMipLevels <= MaxMipLevels) &&
1484         (UseCustomPitch(pSurfInfo) == FALSE)      &&
1485         (UseCustomHeight(pSurfInfo) == FALSE)     &&
1486         // HiZS surfaces have a reduced image size (i.e,. each pixel represents an 8x8 region of the parent
1487         // image, at least for single samples) but they still have the same number of mip levels as the
1488         // parent image.  This disconnect produces false assertions below as the image size doesn't apparently
1489         // support the specified number of mip levels.
1490         ((pSurfInfo->flags.hiZHiS == 0) || (pSurfInfo->numMipLevels == 1)))
1491     {
1492         UINT_32  lastMipSize = 1;
1493         UINT_64  dataChainSize = 0;
1494 
1495         const ADDR_EXTENT3D  mip0Dims      = GetBaseMipExtents(pSurfInfo);
1496         const UINT_32        blockSizeLog2 = GetBlockSizeLog2(pSurfInfo->swizzleMode);
1497         const ADDR_EXTENT3D  tailMaxDim    = GetMipTailDim(pIn, pOut->blockExtent);
1498         const UINT_32        maxMipsInTail = GetMaxNumMipsInTail(pIn);
1499 
1500         UINT_32  firstMipInTail = 0;
1501         for (INT_32 mipIdx = MaxMipLevels - 1; mipIdx >= 0; mipIdx--)
1502         {
1503             const ADDR_EXTENT3D  mipExtents = GetMipExtent(mip0Dims, mipIdx);
1504 
1505             if (IsInMipTail(tailMaxDim, mipExtents, maxMipsInTail, pSurfInfo->numMipLevels - mipIdx))
1506             {
1507                 firstMipInTail = mipIdx;
1508             }
1509         }
1510 
1511         for (INT_32 mipIdx = firstMipInTail - 1; mipIdx >= -1; mipIdx--)
1512         {
1513             if (mipIdx < (static_cast<INT_32>(pSurfInfo->numMipLevels) - 1))
1514             {
1515                 dataChainSize += lastMipSize;
1516             }
1517 
1518             if (mipIdx >= 0)
1519             {
1520                 const ADDR_EXTENT3D  mipExtents     = GetMipExtent(mip0Dims, mipIdx);
1521                 const UINT_32        mipBlockWidth  = ShiftCeil(mipExtents.width, Log2(pOut->blockExtent.width));
1522                 const UINT_32        mipBlockHeight = ShiftCeil(mipExtents.height, Log2(pOut->blockExtent.height));
1523 
1524                 lastMipSize = 4 * lastMipSize
1525                     - ((mipBlockWidth & 1) ? mipBlockHeight : 0)
1526                     - ((mipBlockHeight & 1) ? mipBlockWidth : 0)
1527                     - ((mipBlockWidth & mipBlockHeight & 1) ? 1 : 0);
1528             }
1529         }
1530 
1531         if (CanTrimLinearPadding(pSurfInfo))
1532         {
1533             ADDR_ASSERT((pOut->sliceSize * pOut->blockExtent.depth) <= (dataChainSize << blockSizeLog2));
1534         }
1535         else
1536         {
1537             ADDR_ASSERT((pOut->sliceSize * pOut->blockExtent.depth) == (dataChainSize << blockSizeLog2));
1538         }
1539     }
1540 #endif
1541 }
1542 
1543 /**
1544 ************************************************************************************************************************
1545 *   Gfx12Lib::HwlGetMicroBlockSize
1546 *
1547 *   @brief
1548 *       Determines the dimensions of a 256B microblock
1549 *
1550 *   @return
1551 ************************************************************************************************************************
1552 */
HwlGetMicroBlockSize(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn) const1553 ADDR_EXTENT3D Gfx12Lib::HwlGetMicroBlockSize(
1554     const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn
1555     ) const
1556 {
1557     ADDR_EXTENT3D out = {};
1558     INT_32 widthLog2  = 0;
1559     INT_32 heightLog2 = 0;
1560     INT_32 depthLog2  = 0;
1561     Addr3SwizzleMode swMode    = pIn->pSurfInfo->swizzleMode;
1562     UINT_32          bppLog2   = Log2(pIn->pSurfInfo->bpp >> 3);
1563     UINT_32          blockBits = 8 - bppLog2;
1564     if (IsLinear(swMode))
1565     {
1566         widthLog2 = blockBits;
1567     }
1568     else if (Is2dSwizzle(swMode))
1569     {
1570         widthLog2  = (blockBits >> 1) + (blockBits & 1);
1571         heightLog2 = (blockBits >> 1);
1572     }
1573     else
1574     {
1575         ADDR_ASSERT(Is3dSwizzle(swMode));
1576         depthLog2  = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1577         widthLog2  = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1578         heightLog2 = (blockBits / 3);
1579     }
1580     out.width  = 1 << widthLog2;
1581     out.height = 1 << heightLog2;
1582     out.depth  = 1 << depthLog2;
1583     return out;
1584 }
1585 
1586 /**
1587 ************************************************************************************************************************
1588 *   Gfx12Lib::HwlCalcBlockSize
1589 *
1590 *   @brief
1591 *       Determines the extent, in pixels of a swizzle block.
1592 *
1593 *   @return
1594 ************************************************************************************************************************
1595 */
HwlCalcBlockSize(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,ADDR_EXTENT3D * pExtent) const1596 VOID Gfx12Lib::HwlCalcBlockSize(
1597     const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
1598     ADDR_EXTENT3D*                                 pExtent
1599     ) const
1600 {
1601     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
1602     const UINT_32                           log2BlkSize = GetBlockSizeLog2(pSurfInfo->swizzleMode);
1603     const UINT_32 eleBytes     = pSurfInfo->bpp >> 3;
1604     const UINT_32 log2EleBytes = Log2(eleBytes);
1605 
1606     if (IsLinear(pSurfInfo->swizzleMode))
1607     {
1608         // 1D swizzle mode doesn't support MSAA, so there is no need to consider log2(samples)
1609         pExtent->width  = 1 << (log2BlkSize - log2EleBytes);
1610         pExtent->height = 1;
1611         pExtent->depth  = 1;
1612     }
1613     else if (Is3dSwizzle(pSurfInfo->swizzleMode))
1614     {
1615         // 3D swizlze mode doesn't support MSAA, so there is no need to consider log2(samples)
1616         const UINT_32 base             = (log2BlkSize / 3) - (log2EleBytes / 3);
1617         const UINT_32 log2BlkSizeMod3  = log2BlkSize % 3;
1618         const UINT_32 log2EleBytesMod3 = log2EleBytes % 3;
1619 
1620         UINT_32  x = base;
1621         UINT_32  y = base;
1622         UINT_32  z = base;
1623 
1624         if (log2BlkSizeMod3 > 0)
1625         {
1626             x++;
1627         }
1628 
1629         if (log2BlkSizeMod3 > 1)
1630         {
1631             z++;
1632         }
1633 
1634         if (log2EleBytesMod3 > 0)
1635         {
1636             x--;
1637         }
1638 
1639         if (log2EleBytesMod3 > 1)
1640         {
1641             z--;
1642         }
1643 
1644         pExtent->width  = 1u << x;
1645         pExtent->height = 1u << y;
1646         pExtent->depth  = 1u << z;
1647     }
1648     else
1649     {
1650         // Only 2D swizzle mode supports MSAA...
1651         // Since for gfx12 MSAA is unconditionally supported by all 2D swizzle modes, we don't need to restrict samples
1652         // to be 1 for ADDR3_256B_2D and ADDR3_4KB_2D as gfx10/11 did.
1653         const UINT_32 log2Samples = Log2(pSurfInfo->numSamples);
1654         const UINT_32 log2Width   = (log2BlkSize  >> 1)  -
1655                                     (log2EleBytes >> 1)  -
1656                                     (log2Samples  >> 1)  -
1657                                     (log2EleBytes & log2Samples & 1);
1658         const UINT_32 log2Height  = (log2BlkSize  >> 1)  -
1659                                     (log2EleBytes >> 1)  -
1660                                     (log2Samples  >> 1)  -
1661                                     ((log2EleBytes | log2Samples) & 1);
1662 
1663         // Return the extent in actual units, not log2
1664         pExtent->width  = 1u << log2Width;
1665         pExtent->height = 1u << log2Height;
1666         pExtent->depth  = 1;
1667     }
1668 }
1669 
1670 /**
1671 ************************************************************************************************************************
1672 *   Gfx12Lib::HwlGetMipInTailMaxSize
1673 *
1674 *   @brief
1675 *       Determines the max size of a mip level that fits in the mip-tail.
1676 *
1677 *   @return
1678 ************************************************************************************************************************
1679 */
HwlGetMipInTailMaxSize(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,const ADDR_EXTENT3D & blockDims) const1680 ADDR_EXTENT3D Gfx12Lib::HwlGetMipInTailMaxSize(
1681     const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
1682     const ADDR_EXTENT3D&                           blockDims) const
1683 {
1684     ADDR_EXTENT3D mipTailDim = {};
1685     const Addr3SwizzleMode swizzleMode = pIn->pSurfInfo->swizzleMode;
1686     const UINT_32          log2BlkSize = GetBlockSizeLog2(swizzleMode);
1687 
1688     mipTailDim = blockDims;
1689 
1690     if (Is3dSwizzle(swizzleMode))
1691     {
1692         const UINT_32 dim = log2BlkSize % 3;
1693 
1694         if (dim == 0)
1695         {
1696             mipTailDim.height >>= 1;
1697         }
1698         else if (dim == 1)
1699         {
1700             mipTailDim.width >>= 1;
1701         }
1702         else
1703         {
1704             mipTailDim.depth >>= 1;
1705         }
1706     }
1707     else
1708     {
1709         if ((log2BlkSize % 2) == 0)
1710         {
1711             mipTailDim.width >>= 1;
1712         }
1713         else
1714         {
1715             mipTailDim.height >>= 1;
1716         }
1717     }
1718     return mipTailDim;
1719 }
1720 
1721 
1722 /**
1723 ************************************************************************************************************************
1724 *   Lib::GetPossibleSwizzleModes
1725 *
1726 *   @brief
1727 *       GFX12 specific implementation of Addr3GetPossibleSwizzleModes
1728 *
1729 *   @return
1730 *       ADDR_E_RETURNCODE
1731 ************************************************************************************************************************
1732 */
HwlGetPossibleSwizzleModes(const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT * pIn,ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT * pOut) const1733 ADDR_E_RETURNCODE Gfx12Lib::HwlGetPossibleSwizzleModes(
1734      const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT* pIn,    ///< [in] input structure
1735      ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT*      pOut    ///< [out] output structure
1736      ) const
1737 {
1738     ADDR_E_RETURNCODE returnCode = ADDR_OK;
1739 
1740     const ADDR3_SURFACE_FLAGS flags = pIn->flags;
1741 
1742     if (pIn->bpp == 96)
1743     {
1744         pOut->validModes.swLinear = 1;
1745     }
1746     // Depth/Stencil images can't be linear and must be 2D swizzle modes.
1747     // These three are related to DB block that supports only SW_64KB_2D and SW_256KB_2D for DSV.
1748     else if (flags.depth || flags.stencil)
1749     {
1750         pOut->validModes.sw2d64kB  = 1;
1751         pOut->validModes.sw2d256kB = 1;
1752     }
1753     // The organization of elements in the hierarchical surface is the same as any other surface, and it can support
1754     // any 2D swizzle mode (SW_256_2D, SW_4KB_2D, SW_64KB_2D, or SW_256KB_2D).  The swizzle mode can be selected
1755     // orthogonally to the underlying z or stencil surface.
1756     else if (pIn->flags.hiZHiS)
1757     {
1758         pOut->validModes.sw2d256B  = 1;
1759         pOut->validModes.sw2d4kB   = 1;
1760         pOut->validModes.sw2d64kB  = 1;
1761         pOut->validModes.sw2d256kB = 1;
1762     }
1763     // MSAA can't be linear and must be 2D swizzle modes.
1764     else if (pIn->numSamples > 1)
1765     {
1766         pOut->validModes.sw2d256B  = 1;
1767         pOut->validModes.sw2d4kB   = 1;
1768         pOut->validModes.sw2d64kB  = 1;
1769         pOut->validModes.sw2d256kB = 1;
1770     }
1771     // Block-compressed images need to be either using 2D or linear swizzle modes.
1772     else if (flags.blockCompressed)
1773     {
1774         pOut->validModes.swLinear = 1;
1775 
1776         // We find cases where Tex3d BlockCompressed image adopts 2D_256B should be prohibited.
1777         if (IsTex3d(pIn->resourceType) == FALSE)
1778         {
1779             pOut->validModes.sw2d256B = 1;
1780         }
1781         pOut->validModes.sw2d4kB   = 1;
1782         pOut->validModes.sw2d64kB  = 1;
1783         pOut->validModes.sw2d256kB = 1;
1784     }
1785     else if (IsTex1d(pIn->resourceType))
1786     {
1787         pOut->validModes.swLinear  = 1;
1788         pOut->validModes.sw2d256B  = 1;
1789         pOut->validModes.sw2d4kB   = 1;
1790         pOut->validModes.sw2d64kB  = 1;
1791         pOut->validModes.sw2d256kB = 1;
1792     }
1793     else if (flags.nv12 || flags.p010 || IsTex2d(pIn->resourceType) || flags.view3dAs2dArray)
1794     {
1795         //      NV12 and P010 support
1796         //      SW_LINEAR, SW_256B_2D, SW_4KB_2D, SW_64KB_2D, SW_256KB_2D
1797         // There could be more multimedia formats that require more hw specific tiling modes...
1798 
1799         // The exception is VRS images.
1800         // Linear is not allowed for VRS images.
1801         if (flags.isVrsImage == 0)
1802         {
1803             pOut->validModes.swLinear = 1;
1804         }
1805         if (flags.view3dAs2dArray == 0)
1806         {
1807             // ADDR3_256B_2D can't support 3D images.
1808             pOut->validModes.sw2d256B = 1;
1809         }
1810         pOut->validModes.sw2d4kB   = 1;
1811         pOut->validModes.sw2d64kB  = 1;
1812         pOut->validModes.sw2d256kB = 1;
1813     }
1814     else if (IsTex3d(pIn->resourceType))
1815     {
1816         // An eventual determination would be based on pal setting of height_watermark and depth_watermark.
1817         // However, we just adopt the simpler logic currently.
1818         // For 3D images w/ view3dAs2dArray = 0, SW_3D is preferred.
1819         // For 3D images w/ view3dAs2dArray = 1, it should go to 2D path above.
1820         // Enable linear since client may force linear tiling for 3D texture that does not set view3dAs2dArray.
1821         pOut->validModes.swLinear  = 1;
1822         pOut->validModes.sw3d4kB   = 1;
1823         pOut->validModes.sw3d64kB  = 1;
1824         pOut->validModes.sw3d256kB = 1;
1825     }
1826 
1827     // If client specifies a max alignment, remove swizzles that require alignment beyond it.
1828     if (pIn->maxAlign != 0)
1829     {
1830         if (pIn->maxAlign < Size256K)
1831         {
1832             pOut->validModes.value &= ~Blk256KBSwModeMask;
1833         }
1834 
1835         if (pIn->maxAlign < Size64K)
1836         {
1837             pOut->validModes.value &= ~Blk64KBSwModeMask;
1838         }
1839 
1840         if (pIn->maxAlign < Size4K)
1841         {
1842             pOut->validModes.value &= ~Blk4KBSwModeMask;
1843         }
1844 
1845         if (pIn->maxAlign < Size256)
1846         {
1847             pOut->validModes.value &= ~Blk256BSwModeMask;
1848         }
1849     }
1850 
1851     return returnCode;
1852 }
1853 
1854 /**
1855 ************************************************************************************************************************
1856 *   Gfx12Lib::HwlComputeStereoInfo
1857 *
1858 *   @brief
1859 *       Compute height alignment and right eye pipeBankXor for stereo surface
1860 *
1861 *   @return
1862 *       Error code
1863 *
1864 ************************************************************************************************************************
1865 */
HwlComputeStereoInfo(const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const1866 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeStereoInfo(
1867     const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn,        ///< Compute surface info
1868     UINT_32*                                pAlignY,    ///< Stereo requested additional alignment in Y
1869     UINT_32*                                pRightXor   ///< Right eye xor
1870     ) const
1871 {
1872     ADDR_E_RETURNCODE ret = ADDR_OK;
1873 
1874     *pRightXor = 0;
1875 
1876     const UINT_32 elemLog2    = Log2(pIn->bpp >> 3);
1877     const UINT_32 samplesLog2 = Log2(pIn->numSamples);
1878     const UINT_32 eqIndex     = GetEquationTableEntry(pIn->swizzleMode, samplesLog2, elemLog2);
1879 
1880     if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
1881     {
1882         const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
1883 
1884         UINT_32 yMax     = 0;
1885         UINT_32 yPosMask = 0;
1886 
1887         // First get "max y bit"
1888         for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
1889         {
1890             ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
1891 
1892             if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
1893                 (m_equationTable[eqIndex].addr[i].index > yMax))
1894             {
1895                 yMax = m_equationTable[eqIndex].addr[i].index;
1896             }
1897         }
1898 
1899         // Then loop again for populating a position mask of "max Y bit"
1900         for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
1901         {
1902             if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
1903                 (m_equationTable[eqIndex].addr[i].index == yMax))
1904             {
1905                 yPosMask |= 1u << i;
1906             }
1907         }
1908 
1909         const UINT_32 additionalAlign = 1 << yMax;
1910 
1911         if (additionalAlign >= *pAlignY)
1912         {
1913             *pAlignY = additionalAlign;
1914 
1915             const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
1916 
1917             if ((alignedHeight >> yMax) & 1)
1918             {
1919                 *pRightXor = yPosMask >> m_pipeInterleaveLog2;
1920             }
1921         }
1922     }
1923     else
1924     {
1925         ret = ADDR_INVALIDPARAMS;
1926     }
1927 
1928     return ret;
1929 }
1930 
1931 /**
1932 ************************************************************************************************************************
1933 *   Gfx12Lib::HwlValidateNonSwModeParams
1934 *
1935 *   @brief
1936 *       Validate compute surface info params except swizzle mode
1937 *
1938 *   @return
1939 *       TRUE if parameters are valid, FALSE otherwise
1940 ************************************************************************************************************************
1941 */
HwlValidateNonSwModeParams(const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT * pIn) const1942 BOOL_32 Gfx12Lib::HwlValidateNonSwModeParams(
1943     const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT* pIn
1944     ) const
1945 {
1946     const ADDR3_SURFACE_FLAGS flags     = pIn->flags;
1947     const AddrResourceType    rsrcType  = pIn->resourceType;
1948     const BOOL_32             isVrs     = flags.isVrsImage;
1949     const BOOL_32             isStereo  = flags.qbStereo;
1950     const BOOL_32             isDisplay = flags.display;
1951     const BOOL_32             isMipmap  = (pIn->numMipLevels > 1);
1952     const BOOL_32             isMsaa    = (pIn->numSamples > 1);
1953     const UINT_32             bpp       = pIn->bpp;
1954 
1955     BOOL_32                   valid     = TRUE;
1956     if ((bpp == 0) || (bpp > 128) || (pIn->width == 0) || (pIn->numSamples > 8))
1957     {
1958         ADDR_ASSERT_ALWAYS();
1959         valid = FALSE;
1960     }
1961 
1962     // Resource type check
1963     if (IsTex1d(rsrcType))
1964     {
1965         if (isMsaa || isStereo || isVrs || isDisplay)
1966         {
1967             ADDR_ASSERT_ALWAYS();
1968             valid = FALSE;
1969         }
1970     }
1971     else if (IsTex2d(rsrcType))
1972     {
1973         if ((isMsaa && isMipmap) || (isStereo && isMsaa) || (isStereo && isMipmap) ||
1974             // VRS surface needs to be 8BPP format
1975             (isVrs && (bpp != 8)))
1976         {
1977             ADDR_ASSERT_ALWAYS();
1978             valid = FALSE;
1979         }
1980     }
1981     else if (IsTex3d(rsrcType))
1982     {
1983         if (isMsaa || isStereo || isVrs || isDisplay)
1984         {
1985             ADDR_ASSERT_ALWAYS();
1986             valid = FALSE;
1987         }
1988     }
1989     else
1990     {
1991         // An invalid resource type that is not 1D, 2D or 3D.
1992         ADDR_ASSERT_ALWAYS();
1993         valid = FALSE;
1994     }
1995 
1996     return valid;
1997 }
1998 
1999 } // V3
2000 } // Addr
2001