1 /*
2 ************************************************************************************************************************
3 *
4 * Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved.
5 * SPDX-License-Identifier: MIT
6 *
7 ***********************************************************************************************************************/
8
9 /**
10 ************************************************************************************************************************
11 * @file gfx12addrlib.cpp
12 * @brief Contain the implementation for the Gfx12Lib class.
13 ************************************************************************************************************************
14 */
15
16 #include "gfx12addrlib.h"
17 #include "gfx12_gb_reg.h"
18
19 #include "amdgpu_asic_addr.h"
20
21 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
22 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
23
24 namespace Addr
25 {
26 /**
27 ************************************************************************************************************************
28 * Gfx12HwlInit
29 *
30 * @brief
31 * Creates an Gfx12Lib object.
32 *
33 * @return
34 * Returns an Gfx12Lib object pointer.
35 ************************************************************************************************************************
36 */
Gfx12HwlInit(const Client * pClient)37 Addr::Lib* Gfx12HwlInit(
38 const Client* pClient)
39 {
40 return V3::Gfx12Lib::CreateObj(pClient);
41 }
42
43 namespace V3
44 {
45
46 ////////////////////////////////////////////////////////////////////////////////////////////////////
47 // Static Const Member
48 ////////////////////////////////////////////////////////////////////////////////////////////////////
49 const SwizzleModeFlags Gfx12Lib::SwizzleModeTable[ADDR3_MAX_TYPE] =
50 {//Linear 2d 3d 256B 4KB 64KB 256KB Reserved
51 {{1, 0, 0, 0, 0, 0, 0, 0}}, // ADDR3_LINEAR
52 {{0, 1, 0, 1, 0, 0, 0, 0}}, // ADDR3_256B_2D
53 {{0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR3_4KB_2D
54 {{0, 1, 0, 0, 0, 1, 0, 0}}, // ADDR3_64KB_2D
55 {{0, 1, 0, 0, 0, 0, 1, 0}}, // ADDR3_256KB_2D
56 {{0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR3_4KB_3D
57 {{0, 0, 1, 0, 0, 1, 0, 0}}, // ADDR3_64KB_3D
58 {{0, 0, 1, 0, 0, 0, 1, 0}}, // ADDR3_256KB_3D
59 };
60
61 /**
62 ************************************************************************************************************************
63 * Gfx12Lib::Gfx12Lib
64 *
65 * @brief
66 * Constructor
67 *
68 ************************************************************************************************************************
69 */
Gfx12Lib(const Client * pClient)70 Gfx12Lib::Gfx12Lib(
71 const Client* pClient)
72 :
73 Lib(pClient),
74 m_numSwizzleBits(0)
75 {
76 memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
77 }
78
79 /**
80 ************************************************************************************************************************
81 * Gfx12Lib::~Gfx12Lib
82 *
83 * @brief
84 * Destructor
85 ************************************************************************************************************************
86 */
~Gfx12Lib()87 Gfx12Lib::~Gfx12Lib()
88 {
89 }
90
91 /**
92 ************************************************************************************************************************
93 * Gfx12Lib::ConvertSwizzlePatternToEquation
94 *
95 * @brief
96 * Convert swizzle pattern to equation.
97 *
98 * @return
99 * N/A
100 ************************************************************************************************************************
101 */
ConvertSwizzlePatternToEquation(UINT_32 elemLog2,Addr3SwizzleMode swMode,const ADDR_SW_PATINFO * pPatInfo,ADDR_EQUATION * pEquation) const102 VOID Gfx12Lib::ConvertSwizzlePatternToEquation(
103 UINT_32 elemLog2, ///< [in] element bytes log2
104 Addr3SwizzleMode swMode, ///< [in] swizzle mode
105 const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern info
106 ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern
107 const
108 {
109 ADDR_BIT_SETTING fullSwizzlePattern[Log2Size256K];
110 GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
111
112 const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
113 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode, TRUE);
114
115 pEquation->numBits = blockSizeLog2;
116 pEquation->stackedDepthSlices = FALSE;
117
118 for (UINT_32 i = 0; i < elemLog2; i++)
119 {
120 pEquation->addr[i].channel = 0;
121 pEquation->addr[i].valid = 1;
122 pEquation->addr[i].index = i;
123 }
124
125 for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
126 {
127 ADDR_ASSERT(IsPow2(pSwizzle[i].value));
128
129 if (pSwizzle[i].x != 0)
130 {
131 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
132
133 pEquation->addr[i].channel = 0;
134 pEquation->addr[i].valid = 1;
135 pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
136 }
137 else if (pSwizzle[i].y != 0)
138 {
139 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
140
141 pEquation->addr[i].channel = 1;
142 pEquation->addr[i].valid = 1;
143 pEquation->addr[i].index = Log2(pSwizzle[i].y);
144 }
145 else if (pSwizzle[i].z != 0)
146 {
147 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
148
149 pEquation->addr[i].channel = 2;
150 pEquation->addr[i].valid = 1;
151 pEquation->addr[i].index = Log2(pSwizzle[i].z);
152 }
153 else if (pSwizzle[i].s != 0)
154 {
155 ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].s)));
156
157 pEquation->addr[i].channel = 3;
158 pEquation->addr[i].valid = 1;
159 pEquation->addr[i].index = Log2(pSwizzle[i].s);
160 }
161 else
162 {
163 ADDR_ASSERT_ALWAYS();
164 }
165 }
166 }
167
168 /**
169 ************************************************************************************************************************
170 * Gfx12Lib::InitEquationTable
171 *
172 * @brief
173 * Initialize Equation table.
174 *
175 * @return
176 * N/A
177 ************************************************************************************************************************
178 */
InitEquationTable()179 VOID Gfx12Lib::InitEquationTable()
180 {
181 memset(m_equationTable, 0, sizeof(m_equationTable));
182
183 for (UINT_32 swModeIdx = 0; swModeIdx < ADDR3_MAX_TYPE; swModeIdx++)
184 {
185 const Addr3SwizzleMode swMode = static_cast<Addr3SwizzleMode>(swModeIdx);
186
187 // Skip linear equation (data table is not useful for 2D/3D images-- only contains x-coordinate bits)
188 if (IsValidSwMode(swMode) && (IsLinear(swMode) == false))
189 {
190 const UINT_32 maxMsaa = Is2dSwizzle(swMode) ? MaxMsaaRateLog2 : 1;
191
192 for (UINT_32 msaaIdx = 0; msaaIdx < maxMsaa; msaaIdx++)
193 {
194 for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
195 {
196 UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
197 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, elemLog2, 1 << msaaIdx);
198
199 if (pPatInfo != NULL)
200 {
201 ADDR_EQUATION equation = {};
202
203 ConvertSwizzlePatternToEquation(elemLog2, swMode, pPatInfo, &equation);
204
205 equationIndex = m_numEquations;
206 ADDR_ASSERT(equationIndex < NumSwizzlePatterns);
207
208 m_equationTable[equationIndex] = equation;
209 m_numEquations++;
210 }
211 SetEquationTableEntry(swMode, msaaIdx, elemLog2, equationIndex);
212 } // loop through bpp sizes
213 } // loop through MSAA rates
214 } // End check for valid non-linear modes
215 } // loop through swizzle modes
216 }
217
218 /**
219 ************************************************************************************************************************
220 * Gfx12Lib::HwlGetEquationIndex
221 *
222 * @brief
223 * Return equationIndex by surface info input
224 *
225 * @return
226 * equationIndex
227 ************************************************************************************************************************
228 */
HwlGetEquationIndex(const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pIn) const229 UINT_32 Gfx12Lib::HwlGetEquationIndex(
230 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
231 ) const
232 {
233 UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
234
235 if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
236 (pIn->resourceType == ADDR_RSRC_TEX_3D))
237 {
238 equationIdx = GetEquationTableEntry(pIn->swizzleMode, Log2(pIn->numSamples), Log2(pIn->bpp >> 3));
239 }
240
241 return equationIdx;
242 }
243
244 /**
245 ************************************************************************************************************************
246 * Gfx12Lib::InitBlockDimensionTable
247 *
248 * @brief
249 * Initialize block dimension table for all swizzle modes + msaa samples + bpp bundles.
250 *
251 * @return
252 * N/A
253 ************************************************************************************************************************
254 */
InitBlockDimensionTable()255 VOID Gfx12Lib::InitBlockDimensionTable()
256 {
257 memset(m_blockDimensionTable, 0, sizeof(m_blockDimensionTable));
258
259 ADDR3_COMPUTE_SURFACE_INFO_INPUT surfaceInfo {};
260
261
262 for (UINT_32 swModeIdx = 0; swModeIdx < ADDR3_MAX_TYPE; swModeIdx++)
263 {
264 const Addr3SwizzleMode swMode = static_cast<Addr3SwizzleMode>(swModeIdx);
265
266 if (IsValidSwMode(swMode))
267 {
268 surfaceInfo.swizzleMode = swMode;
269 const UINT_32 maxMsaa = Is2dSwizzle(swMode) ? MaxMsaaRateLog2 : 1;
270
271 for (UINT_32 msaaIdx = 0; msaaIdx < maxMsaa; msaaIdx++)
272 {
273 surfaceInfo.numSamples = (1u << msaaIdx);
274 for (UINT_32 elementBytesLog2 = 0; elementBytesLog2 < MaxElementBytesLog2; elementBytesLog2++)
275 {
276 surfaceInfo.bpp = (1u << (elementBytesLog2 + 3));
277 ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT input{ &surfaceInfo };
278 ComputeBlockDimensionForSurf(&input, &m_blockDimensionTable[swModeIdx][msaaIdx][elementBytesLog2]);
279 } // end loop through bpp sizes
280 } // end loop through MSAA rates
281 } // end check for valid swizzle modes
282 } // end loop through swizzle modes
283 }
284
285 /**
286 ************************************************************************************************************************
287 * Gfx12Lib::GetMipOrigin
288 *
289 * @brief
290 * Internal function to calculate origins of the mip levels
291 *
292 * @return
293 * ADDR_E_RETURNCODE
294 ************************************************************************************************************************
295 */
GetMipOrigin(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,const ADDR_EXTENT3D & mipExtentFirstInTail,ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const296 VOID Gfx12Lib::GetMipOrigin(
297 const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn, ///< [in] input structure
298 const ADDR_EXTENT3D& mipExtentFirstInTail,
299 ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
300 ) const
301 {
302 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
303 const BOOL_32 is3d = (pSurfInfo->resourceType == ADDR_RSRC_TEX_3D);
304 const UINT_32 bytesPerPixel = pSurfInfo->bpp >> 3;
305 const UINT_32 elementBytesLog2 = Log2(bytesPerPixel);
306 const UINT_32 samplesLog2 = Log2(pSurfInfo->numSamples);
307
308 // Calculate the width/height/depth for the given microblock, because the mip offset calculation
309 // is in units of microblocks but we want it in elements.
310 ADDR_EXTENT3D microBlockExtent = HwlGetMicroBlockSize(pIn);
311 const ADDR_EXTENT3D tailMaxDim = GetMipTailDim(pIn, pOut->blockExtent);
312 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pSurfInfo->swizzleMode);
313
314 UINT_32 pitch = tailMaxDim.width;
315 UINT_32 height = tailMaxDim.height;
316 UINT_32 depth = (is3d ? PowTwoAlign(mipExtentFirstInTail.depth, microBlockExtent.depth) : 1);
317
318 const UINT_32 tailMaxDepth = (is3d ? (depth / microBlockExtent.depth) : 1);
319
320 for (UINT_32 i = pOut->firstMipIdInTail; i < pSurfInfo->numMipLevels; i++)
321 {
322 const INT_32 mipInTail = CalcMipInTail(pIn, pOut, i);
323 const UINT_32 mipOffset = CalcMipOffset(pIn, mipInTail);
324
325 pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth;
326 pOut->pMipInfo[i].mipTailOffset = mipOffset;
327 pOut->pMipInfo[i].macroBlockOffset = 0;
328
329 pOut->pMipInfo[i].pitch = pitch;
330 pOut->pMipInfo[i].height = height;
331 pOut->pMipInfo[i].depth = depth;
332 if (IsLinear(pSurfInfo->swizzleMode))
333 {
334 pOut->pMipInfo[i].mipTailCoordX = mipOffset >> 8;
335 pOut->pMipInfo[i].mipTailCoordY = 0;
336 pOut->pMipInfo[i].mipTailCoordZ = 0;
337 }
338 else
339 {
340 UINT_32 mipX = ((mipOffset >> 9) & 1) |
341 ((mipOffset >> 10) & 2) |
342 ((mipOffset >> 11) & 4) |
343 ((mipOffset >> 12) & 8) |
344 ((mipOffset >> 13) & 16) |
345 ((mipOffset >> 14) & 32);
346 UINT_32 mipY = ((mipOffset >> 8) & 1) |
347 ((mipOffset >> 9) & 2) |
348 ((mipOffset >> 10) & 4) |
349 ((mipOffset >> 11) & 8) |
350 ((mipOffset >> 12) & 16) |
351 ((mipOffset >> 13) & 32);
352
353 pOut->pMipInfo[i].mipTailCoordX = mipX * microBlockExtent.width;
354 pOut->pMipInfo[i].mipTailCoordY = mipY * microBlockExtent.height;
355 pOut->pMipInfo[i].mipTailCoordZ = 0;
356 }
357 if (IsLinear(pSurfInfo->swizzleMode))
358 {
359 pitch = Max(pitch >> 1, 1u);
360 }
361 else
362 {
363 pOut->pMipInfo[i].pitch = PowTwoAlign(pitch, microBlockExtent.width);
364 pOut->pMipInfo[i].height = PowTwoAlign(height, microBlockExtent.height);
365 pOut->pMipInfo[i].depth = PowTwoAlign(depth, microBlockExtent.depth);
366 pitch = Max(pitch >> 1, 1u);
367 height = Max(height >> 1, 1u);
368 depth = Max(depth >> 1, 1u);
369 }
370 }
371 }
372
373 /**
374 ************************************************************************************************************************
375 * Gfx12Lib::GetMipOffset
376 *
377 * @brief
378 * Internal function to calculate alignment for a surface
379 *
380 * @return
381 * ADDR_E_RETURNCODE
382 ************************************************************************************************************************
383 */
GetMipOffset(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const384 VOID Gfx12Lib::GetMipOffset(
385 const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn, ///< [in] input structure
386 ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
387 ) const
388 {
389 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
390 const UINT_32 bytesPerPixel = pSurfInfo->bpp >> 3;
391 const UINT_32 elementBytesLog2 = Log2(bytesPerPixel);
392 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pSurfInfo->swizzleMode);
393 const UINT_32 blockSize = 1 << blockSizeLog2;
394 const ADDR_EXTENT3D tailMaxDim = GetMipTailDim(pIn, pOut->blockExtent);;
395 const ADDR_EXTENT3D mip0Dims = GetBaseMipExtents(pSurfInfo);
396 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(pIn);
397 const bool isLinear = IsLinear(pSurfInfo->swizzleMode);
398
399 UINT_32 firstMipInTail = pSurfInfo->numMipLevels;
400 UINT_64 mipChainSliceSize = 0;
401 UINT_64 mipChainSliceSizeDense = 0;
402 UINT_64 mipSize[MaxMipLevels];
403 UINT_64 mipSliceSize[MaxMipLevels];
404
405 const BOOL_32 useCustomPitch = UseCustomPitch(pSurfInfo);
406 for (UINT_32 mipIdx = 0; mipIdx < pSurfInfo->numMipLevels; mipIdx++)
407 {
408 const ADDR_EXTENT3D mipExtents = GetMipExtent(mip0Dims, mipIdx);
409
410 if (Lib::SupportsMipTail(pSurfInfo->swizzleMode) &&
411 (pSurfInfo->numMipLevels > 1) &&
412 IsInMipTail(tailMaxDim, mipExtents, maxMipsInTail, pSurfInfo->numMipLevels - mipIdx))
413 {
414 firstMipInTail = mipIdx;
415 mipChainSliceSize += blockSize / pOut->blockExtent.depth;
416 mipChainSliceSizeDense += blockSize / pOut->blockExtent.depth;
417 break;
418 }
419 else
420 {
421 UINT_32 pitchImgData = 0u;
422 UINT_32 pitchSliceSize = 0u;
423 if (isLinear)
424 {
425 // The slice size of a linear image is calculated as if the "pitch" is 256 byte aligned.
426 // However, the rendering pitch is aligned to 128 bytes, and that is what needs to be reported
427 // to our clients in the normal 'pitch' field.
428 // Note this is NOT the same as the total size of the image being aligned to 256 bytes!
429 pitchImgData = (useCustomPitch ? pOut->pitch : PowTwoAlign(mipExtents.width, 128u / bytesPerPixel));
430 pitchSliceSize = PowTwoAlign(pitchImgData, blockSize / bytesPerPixel);
431 }
432 else
433 {
434 pitchImgData = PowTwoAlign(mipExtents.width, pOut->blockExtent.width);
435 pitchSliceSize = pitchImgData;
436 }
437
438 UINT_32 height = UseCustomHeight(pSurfInfo)
439 ? pOut->height
440 : PowTwoAlign(mipExtents.height, pOut->blockExtent.height);
441 const UINT_32 depth = PowTwoAlign(mipExtents.depth, pOut->blockExtent.depth);
442
443 if (isLinear && pSurfInfo->flags.denseSliceExact && ((pitchImgData % blockSize) != 0))
444 {
445 // If we want size to exactly equal (data)pitch * height, make sure that value is 256B aligned.
446 // Essentially, if the pitch is less aligned, ensure the height is padded so total alignment is 256B.
447 ADDR_ASSERT((blockSize % 128) == 0);
448 height = PowTwoAlign(height, blockSize / 128u);
449 }
450
451 // The original "blockExtent" calculation does subtraction of logs (i.e., division) to get the
452 // sizes. We aligned our pitch and height to those sizes, which means we need to multiply the various
453 // factors back together to get back to the slice size.
454 UINT_64 sizeExceptPitch = static_cast<UINT_64>(height) * pSurfInfo->numSamples * (pSurfInfo->bpp >> 3);
455 UINT_64 sliceSize = static_cast<UINT_64>(pitchSliceSize) * sizeExceptPitch;
456 UINT_64 sliceDataSize = PowTwoAlign(static_cast<UINT_64>(pitchImgData) * sizeExceptPitch,
457 static_cast<UINT_64>(blockSize));
458
459 UINT_64 hwSliceSize = sliceSize * pOut->blockExtent.depth;
460 ADDR_ASSERT(PowTwoAlign(hwSliceSize, static_cast<UINT_64>(blockSize)) == hwSliceSize);
461
462 if ((mipIdx == 0) && CanTrimLinearPadding(pSurfInfo))
463 {
464 // When this is the last linear subresource of the whole image (as laid out in memory), then we don't
465 // need to worry about the real slice size and can reduce it to the end of the image data (or some
466 // inflated value to meet a custom depth pitch)
467 pitchSliceSize = pitchImgData;
468 if (UseCustomHeight(pSurfInfo))
469 {
470 sliceSize = pSurfInfo->sliceAlign;
471 }
472 else
473 {
474 sliceSize = sliceDataSize;
475 }
476 // CanTrimLinearPadding is always false for 3D swizzles, so block depth is always 1.
477 hwSliceSize = sliceSize;
478 }
479
480 mipSize[mipIdx] = sliceSize * depth;
481 mipSliceSize[mipIdx] = hwSliceSize;
482 mipChainSliceSize += sliceSize;
483 mipChainSliceSizeDense += (mipIdx == 0) ? sliceDataSize : sliceSize;
484
485 if (pOut->pMipInfo != NULL)
486 {
487 pOut->pMipInfo[mipIdx].pitch = pitchImgData;
488 pOut->pMipInfo[mipIdx].pitchForSlice = pitchSliceSize;
489 pOut->pMipInfo[mipIdx].height = height;
490 pOut->pMipInfo[mipIdx].depth = depth;
491 }
492 }
493 }
494
495 pOut->sliceSize = mipChainSliceSize;
496 pOut->sliceSizeDensePacked = mipChainSliceSizeDense;
497 pOut->surfSize = mipChainSliceSize * pOut->numSlices;
498 pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE;
499 pOut->firstMipIdInTail = firstMipInTail;
500
501 if (pOut->pMipInfo != NULL)
502 {
503 if (isLinear)
504 {
505 // 1. Linear swizzle mode doesn't have miptails.
506 // 2. The organization of linear 3D mipmap resource is same as GFX11, we should use mip slice size to
507 // caculate mip offset.
508 ADDR_ASSERT(firstMipInTail == pSurfInfo->numMipLevels);
509
510 UINT_64 sliceSize = 0;
511
512 for (INT_32 i = static_cast<INT_32>(pSurfInfo->numMipLevels) - 1; i >= 0; i--)
513 {
514 pOut->pMipInfo[i].offset = sliceSize;
515 pOut->pMipInfo[i].macroBlockOffset = sliceSize;
516 pOut->pMipInfo[i].mipTailOffset = 0;
517
518 sliceSize += mipSliceSize[i];
519 }
520 }
521 else
522 {
523 UINT_64 offset = 0;
524 UINT_64 macroBlkOffset = 0;
525
526 // Even though "firstMipInTail" is zero-based while "numMipLevels" is one-based, from definition of
527 // _ADDR3_COMPUTE_SURFACE_INFO_OUTPUT struct,
528 // UINT_32 firstMipIdInTail; ///< The id of first mip in tail, if there is no mip
529 // /// in tail, it will be set to number of mip levels
530 // See initialization:
531 // UINT_32 firstMipInTail = pIn->numMipLevels
532 // It is possible that they are equal if
533 // 1. a single mip level image that's larger than the largest mip that would fit in the mip tail if
534 // the mip tail existed
535 // 2. 256B_2D and linear images which don't have miptails from HWAL functionality
536 //
537 // We can use firstMipInTail != pIn->numMipLevels to check it has mip in tails and do mipInfo assignment.
538 if (firstMipInTail != pSurfInfo->numMipLevels)
539 {
540 // Determine the application dimensions of the first mip level that resides in the tail.
541 // This is distinct from "tailMaxDim" which is the maximum size of a mip level that will fit in the
542 // tail.
543 ADDR_EXTENT3D mipExtentFirstInTail = GetMipExtent(mip0Dims, firstMipInTail);
544
545 // For a 2D image, "alignedDepth" is always "1".
546 // For a 3D image, this is effectively the number of application slices associated with the first mip
547 // in the tail (up-aligned to HW requirements).
548 const UINT_32 alignedDepth = PowTwoAlign(mipExtentFirstInTail.depth, pOut->blockExtent.depth);
549
550 // "hwSlices" is the number of HW blocks required to represent the first mip level in the tail.
551 const UINT_32 hwSlices = alignedDepth / pOut->blockExtent.depth;
552
553 // Note that for 3D images that utilize a 2D swizzle mode, there really can be multiple
554 // HW slices that encompass the mip tail; i.e., hwSlices is not necessarily one.
555 // For example, you could have a single mip level 8x8x32 image with a 4KB_2D swizzle mode
556 // The 8x8 region fits into a 4KB block (so it's "in the tail"), but because we have a 2D
557 // swizzle mode (where each slice is its own block, so blockExtent.depth == 1), hwSlices
558 // will now be equivalent to the number of application slices, or 32.
559
560 // Mip tails are stored in "reverse" order -- i.e., the mip-tail itself is stored first, so the
561 // first mip level outside the tail has an offset that's the dimension of the tail itself, or one
562 // swizzle block in size.
563 offset = blockSize * hwSlices;
564 macroBlkOffset = blockSize;
565
566 // And determine the per-mip information for everything inside the mip tail.
567 GetMipOrigin(pIn, mipExtentFirstInTail, pOut);
568 }
569
570 // Again, because mip-levels are stored backwards (smallest first), we start determining mip-level
571 // offsets from the smallest to the largest.
572 // Note that firstMipInTail == 0 immediately terminates the loop, so there is no need to check for this
573 // case.
574 for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
575 {
576 pOut->pMipInfo[i].offset = offset;
577 pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
578 pOut->pMipInfo[i].mipTailOffset = 0;
579
580 offset += mipSize[i];
581 macroBlkOffset += mipSliceSize[i];
582 }
583 }
584 }
585 }
586
587 /**
588 ************************************************************************************************************************
589 * Gfx12Lib::HwlComputeSurfaceInfo
590 *
591 * @brief
592 * Internal function to calculate alignment for a surface
593 *
594 * @return
595 * VOID
596 ************************************************************************************************************************
597 */
HwlComputeSurfaceInfo(const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pSurfInfo,ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const598 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceInfo(
599 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo, ///< [in] input structure
600 ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
601 ) const
602 {
603 ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT input{ pSurfInfo };
604
605 // Check that only 2D swizzle mode supports MSAA
606 const UINT_32 samplesLog2 = Is2dSwizzle(pSurfInfo->swizzleMode) ? Log2(pSurfInfo->numSamples) : 0;
607
608 // The block dimension width/height/depth is determined only by swizzle mode, MSAA samples and bpp
609 pOut->blockExtent = GetBlockDimensionTableEntry(pSurfInfo->swizzleMode, samplesLog2, Log2(pSurfInfo->bpp >> 3));
610
611 ADDR_E_RETURNCODE returnCode = ApplyCustomizedPitchHeight(pSurfInfo, pOut);
612
613 if (returnCode == ADDR_OK)
614 {
615 pOut->numSlices = PowTwoAlign(pSurfInfo->numSlices, pOut->blockExtent.depth);
616 pOut->baseAlign = 1 << GetBlockSizeLog2(pSurfInfo->swizzleMode);
617
618 GetMipOffset(&input, pOut);
619
620 SanityCheckSurfSize(&input, pOut);
621
622 // Slices must be exact multiples of the block sizes. However:
623 // - with 3D images, one block will contain multiple slices, so that needs to be taken into account.
624 //
625 // Note that with linear images that have only one slice, we can always guarantee pOut->sliceSize is 256B
626 // alignment so there is no need to worry about it.
627 ADDR_ASSERT(((pOut->sliceSize * pOut->blockExtent.depth) % GetBlockSize(pSurfInfo->swizzleMode)) == 0);
628 }
629
630 return returnCode;
631 }
632
633 /**
634 ************************************************************************************************************************
635 * Gfx12Lib::GetBaseMipExtents
636 *
637 * @brief
638 * Return the size of the base mip level in a nice cozy little structure.
639 *
640 ************************************************************************************************************************
641 */
GetBaseMipExtents(const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pIn) const642 ADDR_EXTENT3D Gfx12Lib::GetBaseMipExtents(
643 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn
644 ) const
645 {
646 return { pIn->width,
647 pIn->height,
648 (IsTex3d(pIn->resourceType) ? pIn->numSlices : 1) }; // slices is depth for 3d
649 }
650
651 /**
652 ************************************************************************************************************************
653 * Gfx12Lib::GetMaxNumMipsInTail
654 *
655 * @brief
656 * Return max number of mips in tails
657 *
658 * @return
659 * Max number of mips in tails
660 ************************************************************************************************************************
661 */
GetMaxNumMipsInTail(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn) const662 UINT_32 Gfx12Lib::GetMaxNumMipsInTail(
663 const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn
664 ) const
665 {
666 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
667 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pSurfInfo->swizzleMode);
668
669 UINT_32 effectiveLog2 = blockSizeLog2;
670 UINT_32 mipsInTail = 1;
671
672 if (Is3dSwizzle(pSurfInfo->swizzleMode))
673 {
674 effectiveLog2 -= (blockSizeLog2 - 8) / 3;
675 }
676
677 if (effectiveLog2 > 8)
678 {
679 mipsInTail = (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
680 }
681
682 return mipsInTail;
683 }
684
685 /**
686 ************************************************************************************************************************
687 * Gfx12Lib::HwlCalcMipInTail
688 *
689 * @brief
690 * Internal function to calculate the "mipInTail" parameter.
691 *
692 * @return
693 * The magic "mipInTail" parameter.
694 ************************************************************************************************************************
695 */
CalcMipInTail(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut,UINT_32 mipLevel) const696 INT_32 Gfx12Lib::CalcMipInTail(
697 const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
698 const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
699 UINT_32 mipLevel
700 ) const
701 {
702 const INT_32 firstMipIdInTail = static_cast<INT_32>(pOut->firstMipIdInTail);
703
704 INT_32 mipInTail = 0;
705
706 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
707 mipInTail = static_cast<INT_32>(mipLevel) - firstMipIdInTail;
708 if ((mipInTail < 0) || (pSurfInfo->numMipLevels == 1) || (GetBlockSize(pSurfInfo->swizzleMode) <= 256))
709 {
710 mipInTail = MaxMipLevels;
711 }
712
713 return mipInTail;
714 }
715
716 /**
717 ************************************************************************************************************************
718 * Gfx12Lib::CalcMipOffset
719 *
720 * @brief
721 *
722 * @return
723 * The magic "mipInTail" parameter.
724 ************************************************************************************************************************
725 */
CalcMipOffset(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,UINT_32 mipInTail) const726 UINT_32 Gfx12Lib::CalcMipOffset(
727 const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
728 UINT_32 mipInTail
729 ) const
730 {
731 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(pIn);
732
733 const INT_32 signedM = static_cast<INT_32>(maxMipsInTail) - static_cast<INT_32>(1) - mipInTail;
734 const UINT_32 m = Max(0, signedM);
735 const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
736
737 return mipOffset;
738 }
739
740 /**
741 ************************************************************************************************************************
742 * Gfx12Lib::HwlComputeSurfaceAddrFromCoordLinear
743 *
744 * @brief
745 * Internal function to calculate address from coord for linear swizzle surface
746 *
747 * @return
748 * ADDR_E_RETURNCODE
749 ************************************************************************************************************************
750 */
HwlComputeSurfaceAddrFromCoordLinear(const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pSurfInfoIn,ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const751 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceAddrFromCoordLinear(
752 const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
753 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfoIn, ///< [in] input structure
754 ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
755 ) const
756 {
757 ADDR3_MIP_INFO mipInfo[MaxMipLevels];
758 ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels);
759
760 ADDR3_COMPUTE_SURFACE_INFO_OUTPUT surfInfoOut = {0};
761 surfInfoOut.size = sizeof(surfInfoOut);
762 surfInfoOut.pMipInfo = mipInfo;
763
764 ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfo(pSurfInfoIn, &surfInfoOut);
765
766 if (returnCode == ADDR_OK)
767 {
768 pOut->addr = (surfInfoOut.sliceSize * pIn->slice) +
769 mipInfo[pIn->mipId].offset +
770 (pIn->y * mipInfo[pIn->mipId].pitch + pIn->x) * (pIn->bpp >> 3);
771
772 pOut->bitPosition = 0;
773 }
774
775 return returnCode;
776 }
777
778 /**
779 ************************************************************************************************************************
780 * Gfx12Lib::HwlComputeSurfaceAddrFromCoordTiled
781 *
782 * @brief
783 * Internal function to calculate address from coord for tiled swizzle surface
784 *
785 * @return
786 * ADDR_E_RETURNCODE
787 ************************************************************************************************************************
788 */
HwlComputeSurfaceAddrFromCoordTiled(const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT * pIn,ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT * pOut) const789 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceAddrFromCoordTiled(
790 const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
791 ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
792 ) const
793 {
794 // 256B block cannot support 3D image.
795 ADDR_ASSERT((IsTex3d(pIn->resourceType) && IsBlock256b(pIn->swizzleMode)) == FALSE);
796
797 ADDR3_COMPUTE_SURFACE_INFO_INPUT localIn = {};
798 ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
799 ADDR3_MIP_INFO mipInfo[MaxMipLevels] = {};
800
801 localIn.size = sizeof(localIn);
802 localIn.flags = pIn->flags;
803 localIn.swizzleMode = pIn->swizzleMode;
804 localIn.resourceType = pIn->resourceType;
805 localIn.format = ADDR_FMT_INVALID;
806 localIn.bpp = pIn->bpp;
807 localIn.width = Max(pIn->unAlignedDims.width, 1u);
808 localIn.height = Max(pIn->unAlignedDims.height, 1u);
809 localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u);
810 localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
811 localIn.numSamples = Max(pIn->numSamples, 1u);
812
813 localOut.size = sizeof(localOut);
814 localOut.pMipInfo = mipInfo;
815 ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT input{ &localIn };
816
817 ADDR_E_RETURNCODE ret = ComputeSurfaceInfo(&localIn, &localOut);
818
819 if (ret == ADDR_OK)
820 {
821 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
822 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
823
824 // Addr3 equation table excludes linear swizzle mode, and fortunately HwlComputeSurfaceAddrFromCoordTiled() is
825 // only called for non-linear swizzle mode.
826 const UINT_32 eqIndex = GetEquationTableEntry(pIn->swizzleMode, Log2(localIn.numSamples), elemLog2);
827
828 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
829 {
830 ADDR3_COORD coords = {};
831
832 // For a 3D image, one swizzle block contains multiple application slices.
833 // For any given image, each HW slice is addressed identically to any other HW slice.
834 // hwSliceSizeBytes is the size of one HW slice; i.e., the number of bytes for the pattern to repeat.
835 // hwSliceId is the index (0, 1, 2...) of the HW slice that an application slice resides in.
836 const UINT_64 hwSliceSizeBytes = localOut.sliceSize * localOut.blockExtent.depth;
837 const UINT_32 hwSliceId = pIn->slice / localOut.blockExtent.depth;
838
839 const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockExtent.width;
840 const UINT_32 yb = pIn->y / localOut.blockExtent.height;
841 const UINT_32 xb = pIn->x / localOut.blockExtent.width;
842 const UINT_64 blkIdx = yb * pb + xb;
843
844 // Technically, the addition of "mipTailCoordX" is only necessary if we're in the mip-tail.
845 // The "mipTailCoordXYZ" values should be zero if we're not in the mip-tail.
846 const BOOL_32 inTail = ((mipInfo[pIn->mipId].mipTailOffset != 0) && (blkSizeLog2 != Log2Size256));
847
848 ADDR_ASSERT((inTail == TRUE) ||
849 // If we're not in the tail, then all of these must be zero.
850 ((mipInfo[pIn->mipId].mipTailCoordX == 0) &&
851 (mipInfo[pIn->mipId].mipTailCoordY == 0) &&
852 (mipInfo[pIn->mipId].mipTailCoordZ == 0)));
853
854 coords.x = pIn->x + mipInfo[pIn->mipId].mipTailCoordX;
855 coords.y = pIn->y + mipInfo[pIn->mipId].mipTailCoordY;
856 coords.z = pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ;
857
858 // Note that in this path, blkIdx does not account for the HW slice ID, so we need to
859 // add it in here.
860 pOut->addr = hwSliceSizeBytes * hwSliceId;
861
862 const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
863 coords.x << elemLog2,
864 coords.y,
865 coords.z,
866 pIn->sample);
867
868 pOut->addr += mipInfo[pIn->mipId].macroBlockOffset +
869 (blkIdx << blkSizeLog2) +
870 blkOffset;
871
872 ADDR_ASSERT(pOut->addr < localOut.surfSize);
873 }
874 else
875 {
876 ret = ADDR_INVALIDPARAMS;
877 }
878 }
879
880 return ret;
881 }
882
883 /**
884 ************************************************************************************************************************
885 * Gfx12Lib::HwlComputePipeBankXor
886 *
887 * @brief
888 * Generate a PipeBankXor value to be ORed into bits above numSwizzleBits of address
889 *
890 * @return
891 * PipeBankXor value
892 ************************************************************************************************************************
893 */
HwlComputePipeBankXor(const ADDR3_COMPUTE_PIPEBANKXOR_INPUT * pIn,ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT * pOut) const894 ADDR_E_RETURNCODE Gfx12Lib::HwlComputePipeBankXor(
895 const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
896 ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
897 ) const
898 {
899 if ((m_numSwizzleBits != 0) && // does this configuration support swizzling
900 // base address XOR in GFX12 will be applied to all blk_size = 4KB, 64KB, or 256KB swizzle modes,
901 // Note that Linear and 256B are excluded.
902 (IsLinear(pIn->swizzleMode) == FALSE) &&
903 (IsBlock256b(pIn->swizzleMode) == FALSE))
904 {
905 pOut->pipeBankXor = pIn->surfIndex % (1 << m_numSwizzleBits);
906 }
907 else
908 {
909 pOut->pipeBankXor = 0;
910 }
911
912 return ADDR_OK;
913 }
914
915 /**
916 ************************************************************************************************************************
917 * Gfx12Lib::ComputeOffsetFromEquation
918 *
919 * @brief
920 * Compute offset from equation
921 *
922 * @return
923 * Offset
924 ************************************************************************************************************************
925 */
ComputeOffsetFromEquation(const ADDR_EQUATION * pEq,UINT_32 x,UINT_32 y,UINT_32 z,UINT_32 s) const926 UINT_32 Gfx12Lib::ComputeOffsetFromEquation(
927 const ADDR_EQUATION* pEq, ///< Equation
928 UINT_32 x, ///< x coord in bytes
929 UINT_32 y, ///< y coord in pixel
930 UINT_32 z, ///< z coord in slice
931 UINT_32 s ///< MSAA sample index
932 ) const
933 {
934 UINT_32 offset = 0;
935
936 for (UINT_32 i = 0; i < pEq->numBits; i++)
937 {
938 UINT_32 v = 0;
939
940 if (pEq->addr[i].valid)
941 {
942 if (pEq->addr[i].channel == 0)
943 {
944 v ^= (x >> pEq->addr[i].index) & 1;
945 }
946 else if (pEq->addr[i].channel == 1)
947 {
948 v ^= (y >> pEq->addr[i].index) & 1;
949 }
950 else if (pEq->addr[i].channel == 2)
951 {
952 v ^= (z >> pEq->addr[i].index) & 1;
953 }
954 else if (pEq->addr[i].channel == 3)
955 {
956 v ^= (s >> pEq->addr[i].index) & 1;
957 }
958 else
959 {
960 ADDR_ASSERT_ALWAYS();
961 }
962 }
963
964 offset |= (v << i);
965 }
966
967 return offset;
968 }
969
970 /**
971 ************************************************************************************************************************
972 * Gfx12Lib::GetSwizzlePatternInfo
973 *
974 * @brief
975 * Get swizzle pattern
976 *
977 * @return
978 * Swizzle pattern information
979 ************************************************************************************************************************
980 */
GetSwizzlePatternInfo(Addr3SwizzleMode swizzleMode,UINT_32 elemLog2,UINT_32 numFrag) const981 const ADDR_SW_PATINFO* Gfx12Lib::GetSwizzlePatternInfo(
982 Addr3SwizzleMode swizzleMode, ///< Swizzle mode
983 UINT_32 elemLog2, ///< Element size in bytes log2
984 UINT_32 numFrag ///< Number of fragment
985 ) const
986 {
987 const ADDR_SW_PATINFO* patInfo = NULL;
988
989 if (Is2dSwizzle(swizzleMode) == FALSE)
990 {
991 ADDR_ASSERT(numFrag == 1);
992 }
993
994 switch (swizzleMode)
995 {
996 case ADDR3_256KB_2D:
997 switch (numFrag)
998 {
999 case 1:
1000 patInfo = GFX12_SW_256KB_2D_1xAA_PATINFO;
1001 break;
1002 case 2:
1003 patInfo = GFX12_SW_256KB_2D_2xAA_PATINFO;
1004 break;
1005 case 4:
1006 patInfo = GFX12_SW_256KB_2D_4xAA_PATINFO;
1007 break;
1008 case 8:
1009 patInfo = GFX12_SW_256KB_2D_8xAA_PATINFO;
1010 break;
1011 default:
1012 ADDR_ASSERT_ALWAYS();
1013 }
1014 break;
1015 case ADDR3_256KB_3D:
1016 patInfo = GFX12_SW_256KB_3D_PATINFO;
1017 break;
1018 case ADDR3_64KB_2D:
1019 switch (numFrag)
1020 {
1021 case 1:
1022 patInfo = GFX12_SW_64KB_2D_1xAA_PATINFO;
1023 break;
1024 case 2:
1025 patInfo = GFX12_SW_64KB_2D_2xAA_PATINFO;
1026 break;
1027 case 4:
1028 patInfo = GFX12_SW_64KB_2D_4xAA_PATINFO;
1029 break;
1030 case 8:
1031 patInfo = GFX12_SW_64KB_2D_8xAA_PATINFO;
1032 break;
1033 default:
1034 ADDR_ASSERT_ALWAYS();
1035 }
1036 break;
1037 case ADDR3_64KB_3D:
1038 patInfo = GFX12_SW_64KB_3D_PATINFO;
1039 break;
1040 case ADDR3_4KB_2D:
1041 switch (numFrag)
1042 {
1043 case 1:
1044 patInfo = GFX12_SW_4KB_2D_1xAA_PATINFO;
1045 break;
1046 case 2:
1047 patInfo = GFX12_SW_4KB_2D_2xAA_PATINFO;
1048 break;
1049 case 4:
1050 patInfo = GFX12_SW_4KB_2D_4xAA_PATINFO;
1051 break;
1052 case 8:
1053 patInfo = GFX12_SW_4KB_2D_8xAA_PATINFO;
1054 break;
1055 default:
1056 ADDR_ASSERT_ALWAYS();
1057 }
1058 break;
1059 case ADDR3_4KB_3D:
1060 patInfo = GFX12_SW_4KB_3D_PATINFO;
1061 break;
1062 case ADDR3_256B_2D:
1063 switch (numFrag)
1064 {
1065 case 1:
1066 patInfo = GFX12_SW_256B_2D_1xAA_PATINFO;
1067 break;
1068 case 2:
1069 patInfo = GFX12_SW_256B_2D_2xAA_PATINFO;
1070 break;
1071 case 4:
1072 patInfo = GFX12_SW_256B_2D_4xAA_PATINFO;
1073 break;
1074 case 8:
1075 patInfo = GFX12_SW_256B_2D_8xAA_PATINFO;
1076 break;
1077 default:
1078 break;
1079 }
1080 break;
1081 default:
1082 ADDR_ASSERT_ALWAYS();
1083 break;
1084 }
1085
1086 return (patInfo != NULL) ? &patInfo[elemLog2] : NULL;
1087 }
1088 /**
1089 ************************************************************************************************************************
1090 * Gfx12Lib::HwlInitGlobalParams
1091 *
1092 * @brief
1093 * Initializes global parameters
1094 *
1095 * @return
1096 * TRUE if all settings are valid
1097 *
1098 ************************************************************************************************************************
1099 */
HwlInitGlobalParams(const ADDR_CREATE_INPUT * pCreateIn)1100 BOOL_32 Gfx12Lib::HwlInitGlobalParams(
1101 const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
1102 {
1103 BOOL_32 valid = TRUE;
1104 GB_ADDR_CONFIG_GFX12 gbAddrConfig;
1105
1106 gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
1107
1108 switch (gbAddrConfig.bits.NUM_PIPES)
1109 {
1110 case ADDR_CONFIG_1_PIPE:
1111 m_pipesLog2 = 0;
1112 break;
1113 case ADDR_CONFIG_2_PIPE:
1114 m_pipesLog2 = 1;
1115 break;
1116 case ADDR_CONFIG_4_PIPE:
1117 m_pipesLog2 = 2;
1118 break;
1119 case ADDR_CONFIG_8_PIPE:
1120 m_pipesLog2 = 3;
1121 break;
1122 case ADDR_CONFIG_16_PIPE:
1123 m_pipesLog2 = 4;
1124 break;
1125 case ADDR_CONFIG_32_PIPE:
1126 m_pipesLog2 = 5;
1127 break;
1128 case ADDR_CONFIG_64_PIPE:
1129 m_pipesLog2 = 6;
1130 break;
1131 default:
1132 ADDR_ASSERT_ALWAYS();
1133 valid = FALSE;
1134 break;
1135 }
1136
1137 switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
1138 {
1139 case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
1140 m_pipeInterleaveLog2 = 8;
1141 break;
1142 case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
1143 m_pipeInterleaveLog2 = 9;
1144 break;
1145 case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
1146 m_pipeInterleaveLog2 = 10;
1147 break;
1148 case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
1149 m_pipeInterleaveLog2 = 11;
1150 break;
1151 default:
1152 ADDR_ASSERT_ALWAYS();
1153 valid = FALSE;
1154 break;
1155 }
1156
1157 m_numSwizzleBits = ((m_pipesLog2 >= 3) ? m_pipesLog2 - 2 : 0);
1158
1159 if (valid)
1160 {
1161 InitEquationTable();
1162 InitBlockDimensionTable();
1163 }
1164
1165 return valid;
1166 }
1167
1168 /**
1169 ************************************************************************************************************************
1170 * Gfx12Lib::HwlComputeNonBlockCompressedView
1171 *
1172 * @brief
1173 * Compute non-block-compressed view for a given mipmap level/slice.
1174 *
1175 * @return
1176 * ADDR_E_RETURNCODE
1177 ************************************************************************************************************************
1178 */
HwlComputeNonBlockCompressedView(const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT * pIn,ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT * pOut) const1179 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeNonBlockCompressedView(
1180 const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, ///< [in] input structure
1181 ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut ///< [out] output structure
1182 ) const
1183 {
1184 ADDR_E_RETURNCODE returnCode = ADDR_OK;
1185
1186 if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) &&
1187 ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
1188 {
1189 // Only support BC1~BC7, ASTC, or ETC2 for now...
1190 returnCode = ADDR_NOTSUPPORTED;
1191 }
1192 else
1193 {
1194 UINT_32 bcWidth, bcHeight;
1195 const UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
1196
1197 ADDR3_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
1198 infoIn.size = sizeof(infoIn);
1199 infoIn.flags = pIn->flags;
1200 infoIn.swizzleMode = pIn->swizzleMode;
1201 infoIn.resourceType = pIn->resourceType;
1202 infoIn.format = pIn->format;
1203 infoIn.bpp = bpp;
1204 infoIn.width = RoundUpQuotient(pIn->unAlignedDims.width, bcWidth);
1205 infoIn.height = RoundUpQuotient(pIn->unAlignedDims.height, bcHeight);
1206 infoIn.numSlices = pIn->unAlignedDims.depth;
1207 infoIn.numMipLevels = pIn->numMipLevels;
1208 infoIn.numSamples = 1;
1209
1210 ADDR3_MIP_INFO mipInfo[MaxMipLevels] = {};
1211
1212 ADDR3_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
1213 infoOut.size = sizeof(infoOut);
1214 infoOut.pMipInfo = mipInfo;
1215
1216 returnCode = HwlComputeSurfaceInfo(&infoIn, &infoOut);
1217
1218 if (returnCode == ADDR_OK)
1219 {
1220 ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
1221 subOffIn.size = sizeof(subOffIn);
1222 subOffIn.swizzleMode = infoIn.swizzleMode;
1223 subOffIn.resourceType = infoIn.resourceType;
1224 subOffIn.pipeBankXor = pIn->pipeBankXor;
1225 subOffIn.slice = pIn->slice;
1226 subOffIn.sliceSize = infoOut.sliceSize;
1227 subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
1228 subOffIn.mipTailOffset = mipInfo[pIn->mipId].mipTailOffset;
1229
1230 ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
1231 subOffOut.size = sizeof(subOffOut);
1232
1233 // For any mipmap level, move nonBc view base address by offset
1234 HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
1235 pOut->offset = subOffOut.offset;
1236
1237 ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
1238 slicePbXorIn.size = sizeof(slicePbXorIn);
1239 slicePbXorIn.swizzleMode = infoIn.swizzleMode;
1240 slicePbXorIn.resourceType = infoIn.resourceType;
1241 slicePbXorIn.bpe = infoIn.bpp;
1242 slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
1243 slicePbXorIn.slice = pIn->slice;
1244 slicePbXorIn.numSamples = 1;
1245
1246 ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
1247 slicePbXorOut.size = sizeof(slicePbXorOut);
1248
1249 // For any mipmap level, nonBc view should use computed pbXor
1250 HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
1251 pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
1252
1253 const BOOL_32 tiled = (pIn->swizzleMode != ADDR3_LINEAR);
1254 const BOOL_32 inTail = tiled && (pIn->mipId >= infoOut.firstMipIdInTail);
1255 const UINT_32 requestMipWidth =
1256 RoundUpQuotient(Max(pIn->unAlignedDims.width >> pIn->mipId, 1u), bcWidth);
1257 const UINT_32 requestMipHeight =
1258 RoundUpQuotient(Max(pIn->unAlignedDims.height >> pIn->mipId, 1u), bcHeight);
1259
1260 if (inTail)
1261 {
1262 // For mipmap level that is in mip tail block, hack a lot of things...
1263 // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
1264 // are fit in tail block:
1265
1266 // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
1267 pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
1268
1269 // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
1270 pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
1271
1272 // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
1273 pOut->unAlignedDims.width = Min(requestMipWidth << pOut->mipId, infoOut.blockExtent.width / 2);
1274
1275 // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
1276 pOut->unAlignedDims.height = Min(requestMipHeight << pOut->mipId, infoOut.blockExtent.height);
1277 }
1278 // This check should cover at least mipId == 0
1279 else if ((requestMipWidth << pIn->mipId) == infoIn.width)
1280 {
1281 // For mipmap level [N] that is not in mip tail block and downgraded without losing element:
1282 // - only one mipmap level and mipId = 0
1283 pOut->mipId = 0;
1284 pOut->numMipLevels = 1;
1285
1286 // (mip0) width = requestMipWidth
1287 pOut->unAlignedDims.width = requestMipWidth;
1288
1289 // (mip0) height = requestMipHeight
1290 pOut->unAlignedDims.height = requestMipHeight;
1291 }
1292 else
1293 {
1294 // For mipmap level [N] that is not in mip tail block and downgraded with element losing,
1295 // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
1296 // because single mip view may have different pitch value than original (multiple) mip view...
1297 // A simple case would be:
1298 // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
1299 // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
1300 // mip0 width = 0x101/mip1 width = 0x80
1301 // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
1302 // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
1303
1304 // - 2 levels and mipId = 1
1305 pOut->mipId = 1;
1306 pOut->numMipLevels = 2;
1307
1308 const UINT_32 upperMipWidth =
1309 RoundUpQuotient(Max(pIn->unAlignedDims.width >> (pIn->mipId - 1), 1u), bcWidth);
1310 const UINT_32 upperMipHeight =
1311 RoundUpQuotient(Max(pIn->unAlignedDims.height >> (pIn->mipId - 1), 1u), bcHeight);
1312
1313 const BOOL_32 needToAvoidInTail = tiled &&
1314 (requestMipWidth <= infoOut.blockExtent.width / 2) &&
1315 (requestMipHeight <= infoOut.blockExtent.height);
1316
1317 const UINT_32 hwMipWidth =
1318 PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockExtent.width);
1319 const UINT_32 hwMipHeight =
1320 PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockExtent.height);
1321
1322 const BOOL_32 needExtraWidth =
1323 ((upperMipWidth < requestMipWidth * 2) ||
1324 ((upperMipWidth == requestMipWidth * 2) &&
1325 ((needToAvoidInTail == TRUE) ||
1326 (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockExtent.width)))));
1327
1328 const BOOL_32 needExtraHeight =
1329 ((upperMipHeight < requestMipHeight * 2) ||
1330 ((upperMipHeight == requestMipHeight * 2) &&
1331 ((needToAvoidInTail == TRUE) ||
1332 (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockExtent.height)))));
1333
1334 // (mip0) width = requestLastMipLevelWidth
1335 pOut->unAlignedDims.width = upperMipWidth + (needExtraWidth ? 1: 0);
1336
1337 // (mip0) height = requestLastMipLevelHeight
1338 pOut->unAlignedDims.height = upperMipHeight + (needExtraHeight ? 1: 0);
1339 }
1340
1341 // Assert the downgrading from this mip[0] width would still generate correct mip[N] width
1342 ADDR_ASSERT(ShiftRight(pOut->unAlignedDims.width, pOut->mipId) == requestMipWidth);
1343 // Assert the downgrading from this mip[0] height would still generate correct mip[N] height
1344 ADDR_ASSERT(ShiftRight(pOut->unAlignedDims.height, pOut->mipId) == requestMipHeight);
1345 }
1346 }
1347
1348 return returnCode;
1349 }
1350
1351 /**
1352 ************************************************************************************************************************
1353 * Gfx12Lib::HwlComputeSubResourceOffsetForSwizzlePattern
1354 *
1355 * @brief
1356 * Compute sub resource offset to support swizzle pattern
1357 *
1358 * @return
1359 * VOID
1360 ************************************************************************************************************************
1361 */
HwlComputeSubResourceOffsetForSwizzlePattern(const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT * pIn,ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT * pOut) const1362 VOID Gfx12Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
1363 const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure
1364 ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure
1365 ) const
1366 {
1367 pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
1368 }
1369
1370 /**
1371 ************************************************************************************************************************
1372 * Gfx12Lib::HwlComputeSlicePipeBankXor
1373 *
1374 * @brief
1375 * Generate slice PipeBankXor value based on base PipeBankXor value and slice id
1376 *
1377 * @return
1378 * PipeBankXor value
1379 ************************************************************************************************************************
1380 */
HwlComputeSlicePipeBankXor(const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT * pIn,ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT * pOut) const1381 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSlicePipeBankXor(
1382 const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
1383 ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
1384 ) const
1385 {
1386 ADDR_E_RETURNCODE returnCode = ADDR_OK;
1387
1388 // PipeBankXor is only applied to 4KB, 64KB and 256KB on GFX12.
1389 if ((IsLinear(pIn->swizzleMode) == FALSE) && (IsBlock256b(pIn->swizzleMode) == FALSE))
1390 {
1391 if (pIn->bpe == 0)
1392 {
1393 // Require a valid bytes-per-element value passed from client...
1394 returnCode = ADDR_INVALIDPARAMS;
1395 }
1396 else
1397 {
1398 const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
1399 Log2(pIn->bpe >> 3),
1400 1);
1401
1402 if (pPatInfo != NULL)
1403 {
1404 const UINT_32 elemLog2 = Log2(pIn->bpe >> 3);
1405
1406 // Addr3 equation table excludes linear swizzle mode, and fortunately when calling
1407 // HwlComputeSlicePipeBankXor the swizzle mode is non-linear, so we don't need to worry about negative
1408 // table index.
1409 const UINT_32 eqIndex = GetEquationTableEntry(pIn->swizzleMode, Log2(pIn->numSamples), elemLog2);
1410
1411 const UINT_32 pipeBankXorOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
1412 0,
1413 0,
1414 pIn->slice,
1415 0);
1416
1417 const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
1418
1419 // Should have no bit set under pipe interleave
1420 ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
1421
1422 pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
1423 }
1424 else
1425 {
1426 // Should never come here...
1427 ADDR_NOT_IMPLEMENTED();
1428
1429 returnCode = ADDR_NOTSUPPORTED;
1430 }
1431 }
1432 }
1433 else
1434 {
1435 pOut->pipeBankXor = 0;
1436 }
1437
1438 return returnCode;
1439 }
1440
1441 /**
1442 ************************************************************************************************************************
1443 * Gfx12Lib::HwlConvertChipFamily
1444 *
1445 * @brief
1446 * Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
1447 * @return
1448 * ChipFamily
1449 ************************************************************************************************************************
1450 */
HwlConvertChipFamily(UINT_32 chipFamily,UINT_32 chipRevision)1451 ChipFamily Gfx12Lib::HwlConvertChipFamily(
1452 UINT_32 chipFamily, ///< [in] chip family defined in atiih.h
1453 UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1454 {
1455 return ADDR_CHIP_FAMILY_NAVI;
1456 }
1457
1458 /**
1459 ************************************************************************************************************************
1460 * Gfx12Lib::SanityCheckSurfSize
1461 *
1462 * @brief
1463 * Calculate the surface size via the exact hardware algorithm to see if it matches.
1464 *
1465 * @return
1466 ************************************************************************************************************************
1467 */
SanityCheckSurfSize(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT * pOut) const1468 void Gfx12Lib::SanityCheckSurfSize(
1469 const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
1470 const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut
1471 ) const
1472 {
1473 #if DEBUG
1474 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
1475 // Verify that the requested image size is valid for the below algorithm. The below code includes
1476 // implicit assumptions about the surface dimensions being less than "MaxImageDim"; otherwise, it can't
1477 // calculate "firstMipInTail" accurately and the below assertion will trip incorrectly.
1478 //
1479 // Surfaces destined for use only on the SDMA engine can exceed the gfx-engine-imposed limitations of
1480 // the "maximum" image dimensions.
1481 if ((pSurfInfo->width <= MaxImageDim) &&
1482 (pSurfInfo->height <= MaxImageDim) &&
1483 (pSurfInfo->numMipLevels <= MaxMipLevels) &&
1484 (UseCustomPitch(pSurfInfo) == FALSE) &&
1485 (UseCustomHeight(pSurfInfo) == FALSE) &&
1486 // HiZS surfaces have a reduced image size (i.e,. each pixel represents an 8x8 region of the parent
1487 // image, at least for single samples) but they still have the same number of mip levels as the
1488 // parent image. This disconnect produces false assertions below as the image size doesn't apparently
1489 // support the specified number of mip levels.
1490 ((pSurfInfo->flags.hiZHiS == 0) || (pSurfInfo->numMipLevels == 1)))
1491 {
1492 UINT_32 lastMipSize = 1;
1493 UINT_64 dataChainSize = 0;
1494
1495 const ADDR_EXTENT3D mip0Dims = GetBaseMipExtents(pSurfInfo);
1496 const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pSurfInfo->swizzleMode);
1497 const ADDR_EXTENT3D tailMaxDim = GetMipTailDim(pIn, pOut->blockExtent);
1498 const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(pIn);
1499
1500 UINT_32 firstMipInTail = 0;
1501 for (INT_32 mipIdx = MaxMipLevels - 1; mipIdx >= 0; mipIdx--)
1502 {
1503 const ADDR_EXTENT3D mipExtents = GetMipExtent(mip0Dims, mipIdx);
1504
1505 if (IsInMipTail(tailMaxDim, mipExtents, maxMipsInTail, pSurfInfo->numMipLevels - mipIdx))
1506 {
1507 firstMipInTail = mipIdx;
1508 }
1509 }
1510
1511 for (INT_32 mipIdx = firstMipInTail - 1; mipIdx >= -1; mipIdx--)
1512 {
1513 if (mipIdx < (static_cast<INT_32>(pSurfInfo->numMipLevels) - 1))
1514 {
1515 dataChainSize += lastMipSize;
1516 }
1517
1518 if (mipIdx >= 0)
1519 {
1520 const ADDR_EXTENT3D mipExtents = GetMipExtent(mip0Dims, mipIdx);
1521 const UINT_32 mipBlockWidth = ShiftCeil(mipExtents.width, Log2(pOut->blockExtent.width));
1522 const UINT_32 mipBlockHeight = ShiftCeil(mipExtents.height, Log2(pOut->blockExtent.height));
1523
1524 lastMipSize = 4 * lastMipSize
1525 - ((mipBlockWidth & 1) ? mipBlockHeight : 0)
1526 - ((mipBlockHeight & 1) ? mipBlockWidth : 0)
1527 - ((mipBlockWidth & mipBlockHeight & 1) ? 1 : 0);
1528 }
1529 }
1530
1531 if (CanTrimLinearPadding(pSurfInfo))
1532 {
1533 ADDR_ASSERT((pOut->sliceSize * pOut->blockExtent.depth) <= (dataChainSize << blockSizeLog2));
1534 }
1535 else
1536 {
1537 ADDR_ASSERT((pOut->sliceSize * pOut->blockExtent.depth) == (dataChainSize << blockSizeLog2));
1538 }
1539 }
1540 #endif
1541 }
1542
1543 /**
1544 ************************************************************************************************************************
1545 * Gfx12Lib::HwlGetMicroBlockSize
1546 *
1547 * @brief
1548 * Determines the dimensions of a 256B microblock
1549 *
1550 * @return
1551 ************************************************************************************************************************
1552 */
HwlGetMicroBlockSize(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn) const1553 ADDR_EXTENT3D Gfx12Lib::HwlGetMicroBlockSize(
1554 const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn
1555 ) const
1556 {
1557 ADDR_EXTENT3D out = {};
1558 INT_32 widthLog2 = 0;
1559 INT_32 heightLog2 = 0;
1560 INT_32 depthLog2 = 0;
1561 Addr3SwizzleMode swMode = pIn->pSurfInfo->swizzleMode;
1562 UINT_32 bppLog2 = Log2(pIn->pSurfInfo->bpp >> 3);
1563 UINT_32 blockBits = 8 - bppLog2;
1564 if (IsLinear(swMode))
1565 {
1566 widthLog2 = blockBits;
1567 }
1568 else if (Is2dSwizzle(swMode))
1569 {
1570 widthLog2 = (blockBits >> 1) + (blockBits & 1);
1571 heightLog2 = (blockBits >> 1);
1572 }
1573 else
1574 {
1575 ADDR_ASSERT(Is3dSwizzle(swMode));
1576 depthLog2 = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1577 widthLog2 = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1578 heightLog2 = (blockBits / 3);
1579 }
1580 out.width = 1 << widthLog2;
1581 out.height = 1 << heightLog2;
1582 out.depth = 1 << depthLog2;
1583 return out;
1584 }
1585
1586 /**
1587 ************************************************************************************************************************
1588 * Gfx12Lib::HwlCalcBlockSize
1589 *
1590 * @brief
1591 * Determines the extent, in pixels of a swizzle block.
1592 *
1593 * @return
1594 ************************************************************************************************************************
1595 */
HwlCalcBlockSize(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,ADDR_EXTENT3D * pExtent) const1596 VOID Gfx12Lib::HwlCalcBlockSize(
1597 const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
1598 ADDR_EXTENT3D* pExtent
1599 ) const
1600 {
1601 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pSurfInfo = pIn->pSurfInfo;
1602 const UINT_32 log2BlkSize = GetBlockSizeLog2(pSurfInfo->swizzleMode);
1603 const UINT_32 eleBytes = pSurfInfo->bpp >> 3;
1604 const UINT_32 log2EleBytes = Log2(eleBytes);
1605
1606 if (IsLinear(pSurfInfo->swizzleMode))
1607 {
1608 // 1D swizzle mode doesn't support MSAA, so there is no need to consider log2(samples)
1609 pExtent->width = 1 << (log2BlkSize - log2EleBytes);
1610 pExtent->height = 1;
1611 pExtent->depth = 1;
1612 }
1613 else if (Is3dSwizzle(pSurfInfo->swizzleMode))
1614 {
1615 // 3D swizlze mode doesn't support MSAA, so there is no need to consider log2(samples)
1616 const UINT_32 base = (log2BlkSize / 3) - (log2EleBytes / 3);
1617 const UINT_32 log2BlkSizeMod3 = log2BlkSize % 3;
1618 const UINT_32 log2EleBytesMod3 = log2EleBytes % 3;
1619
1620 UINT_32 x = base;
1621 UINT_32 y = base;
1622 UINT_32 z = base;
1623
1624 if (log2BlkSizeMod3 > 0)
1625 {
1626 x++;
1627 }
1628
1629 if (log2BlkSizeMod3 > 1)
1630 {
1631 z++;
1632 }
1633
1634 if (log2EleBytesMod3 > 0)
1635 {
1636 x--;
1637 }
1638
1639 if (log2EleBytesMod3 > 1)
1640 {
1641 z--;
1642 }
1643
1644 pExtent->width = 1u << x;
1645 pExtent->height = 1u << y;
1646 pExtent->depth = 1u << z;
1647 }
1648 else
1649 {
1650 // Only 2D swizzle mode supports MSAA...
1651 // Since for gfx12 MSAA is unconditionally supported by all 2D swizzle modes, we don't need to restrict samples
1652 // to be 1 for ADDR3_256B_2D and ADDR3_4KB_2D as gfx10/11 did.
1653 const UINT_32 log2Samples = Log2(pSurfInfo->numSamples);
1654 const UINT_32 log2Width = (log2BlkSize >> 1) -
1655 (log2EleBytes >> 1) -
1656 (log2Samples >> 1) -
1657 (log2EleBytes & log2Samples & 1);
1658 const UINT_32 log2Height = (log2BlkSize >> 1) -
1659 (log2EleBytes >> 1) -
1660 (log2Samples >> 1) -
1661 ((log2EleBytes | log2Samples) & 1);
1662
1663 // Return the extent in actual units, not log2
1664 pExtent->width = 1u << log2Width;
1665 pExtent->height = 1u << log2Height;
1666 pExtent->depth = 1;
1667 }
1668 }
1669
1670 /**
1671 ************************************************************************************************************************
1672 * Gfx12Lib::HwlGetMipInTailMaxSize
1673 *
1674 * @brief
1675 * Determines the max size of a mip level that fits in the mip-tail.
1676 *
1677 * @return
1678 ************************************************************************************************************************
1679 */
HwlGetMipInTailMaxSize(const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT * pIn,const ADDR_EXTENT3D & blockDims) const1680 ADDR_EXTENT3D Gfx12Lib::HwlGetMipInTailMaxSize(
1681 const ADDR3_COMPUTE_SURFACE_INFO_PARAMS_INPUT* pIn,
1682 const ADDR_EXTENT3D& blockDims) const
1683 {
1684 ADDR_EXTENT3D mipTailDim = {};
1685 const Addr3SwizzleMode swizzleMode = pIn->pSurfInfo->swizzleMode;
1686 const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode);
1687
1688 mipTailDim = blockDims;
1689
1690 if (Is3dSwizzle(swizzleMode))
1691 {
1692 const UINT_32 dim = log2BlkSize % 3;
1693
1694 if (dim == 0)
1695 {
1696 mipTailDim.height >>= 1;
1697 }
1698 else if (dim == 1)
1699 {
1700 mipTailDim.width >>= 1;
1701 }
1702 else
1703 {
1704 mipTailDim.depth >>= 1;
1705 }
1706 }
1707 else
1708 {
1709 if ((log2BlkSize % 2) == 0)
1710 {
1711 mipTailDim.width >>= 1;
1712 }
1713 else
1714 {
1715 mipTailDim.height >>= 1;
1716 }
1717 }
1718 return mipTailDim;
1719 }
1720
1721
1722 /**
1723 ************************************************************************************************************************
1724 * Lib::GetPossibleSwizzleModes
1725 *
1726 * @brief
1727 * GFX12 specific implementation of Addr3GetPossibleSwizzleModes
1728 *
1729 * @return
1730 * ADDR_E_RETURNCODE
1731 ************************************************************************************************************************
1732 */
HwlGetPossibleSwizzleModes(const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT * pIn,ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT * pOut) const1733 ADDR_E_RETURNCODE Gfx12Lib::HwlGetPossibleSwizzleModes(
1734 const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT* pIn, ///< [in] input structure
1735 ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT* pOut ///< [out] output structure
1736 ) const
1737 {
1738 ADDR_E_RETURNCODE returnCode = ADDR_OK;
1739
1740 const ADDR3_SURFACE_FLAGS flags = pIn->flags;
1741
1742 if (pIn->bpp == 96)
1743 {
1744 pOut->validModes.swLinear = 1;
1745 }
1746 // Depth/Stencil images can't be linear and must be 2D swizzle modes.
1747 // These three are related to DB block that supports only SW_64KB_2D and SW_256KB_2D for DSV.
1748 else if (flags.depth || flags.stencil)
1749 {
1750 pOut->validModes.sw2d64kB = 1;
1751 pOut->validModes.sw2d256kB = 1;
1752 }
1753 // The organization of elements in the hierarchical surface is the same as any other surface, and it can support
1754 // any 2D swizzle mode (SW_256_2D, SW_4KB_2D, SW_64KB_2D, or SW_256KB_2D). The swizzle mode can be selected
1755 // orthogonally to the underlying z or stencil surface.
1756 else if (pIn->flags.hiZHiS)
1757 {
1758 pOut->validModes.sw2d256B = 1;
1759 pOut->validModes.sw2d4kB = 1;
1760 pOut->validModes.sw2d64kB = 1;
1761 pOut->validModes.sw2d256kB = 1;
1762 }
1763 // MSAA can't be linear and must be 2D swizzle modes.
1764 else if (pIn->numSamples > 1)
1765 {
1766 pOut->validModes.sw2d256B = 1;
1767 pOut->validModes.sw2d4kB = 1;
1768 pOut->validModes.sw2d64kB = 1;
1769 pOut->validModes.sw2d256kB = 1;
1770 }
1771 // Block-compressed images need to be either using 2D or linear swizzle modes.
1772 else if (flags.blockCompressed)
1773 {
1774 pOut->validModes.swLinear = 1;
1775
1776 // We find cases where Tex3d BlockCompressed image adopts 2D_256B should be prohibited.
1777 if (IsTex3d(pIn->resourceType) == FALSE)
1778 {
1779 pOut->validModes.sw2d256B = 1;
1780 }
1781 pOut->validModes.sw2d4kB = 1;
1782 pOut->validModes.sw2d64kB = 1;
1783 pOut->validModes.sw2d256kB = 1;
1784 }
1785 else if (IsTex1d(pIn->resourceType))
1786 {
1787 pOut->validModes.swLinear = 1;
1788 pOut->validModes.sw2d256B = 1;
1789 pOut->validModes.sw2d4kB = 1;
1790 pOut->validModes.sw2d64kB = 1;
1791 pOut->validModes.sw2d256kB = 1;
1792 }
1793 else if (flags.nv12 || flags.p010 || IsTex2d(pIn->resourceType) || flags.view3dAs2dArray)
1794 {
1795 // NV12 and P010 support
1796 // SW_LINEAR, SW_256B_2D, SW_4KB_2D, SW_64KB_2D, SW_256KB_2D
1797 // There could be more multimedia formats that require more hw specific tiling modes...
1798
1799 // The exception is VRS images.
1800 // Linear is not allowed for VRS images.
1801 if (flags.isVrsImage == 0)
1802 {
1803 pOut->validModes.swLinear = 1;
1804 }
1805 if (flags.view3dAs2dArray == 0)
1806 {
1807 // ADDR3_256B_2D can't support 3D images.
1808 pOut->validModes.sw2d256B = 1;
1809 }
1810 pOut->validModes.sw2d4kB = 1;
1811 pOut->validModes.sw2d64kB = 1;
1812 pOut->validModes.sw2d256kB = 1;
1813 }
1814 else if (IsTex3d(pIn->resourceType))
1815 {
1816 // An eventual determination would be based on pal setting of height_watermark and depth_watermark.
1817 // However, we just adopt the simpler logic currently.
1818 // For 3D images w/ view3dAs2dArray = 0, SW_3D is preferred.
1819 // For 3D images w/ view3dAs2dArray = 1, it should go to 2D path above.
1820 // Enable linear since client may force linear tiling for 3D texture that does not set view3dAs2dArray.
1821 pOut->validModes.swLinear = 1;
1822 pOut->validModes.sw3d4kB = 1;
1823 pOut->validModes.sw3d64kB = 1;
1824 pOut->validModes.sw3d256kB = 1;
1825 }
1826
1827 // If client specifies a max alignment, remove swizzles that require alignment beyond it.
1828 if (pIn->maxAlign != 0)
1829 {
1830 if (pIn->maxAlign < Size256K)
1831 {
1832 pOut->validModes.value &= ~Blk256KBSwModeMask;
1833 }
1834
1835 if (pIn->maxAlign < Size64K)
1836 {
1837 pOut->validModes.value &= ~Blk64KBSwModeMask;
1838 }
1839
1840 if (pIn->maxAlign < Size4K)
1841 {
1842 pOut->validModes.value &= ~Blk4KBSwModeMask;
1843 }
1844
1845 if (pIn->maxAlign < Size256)
1846 {
1847 pOut->validModes.value &= ~Blk256BSwModeMask;
1848 }
1849 }
1850
1851 return returnCode;
1852 }
1853
1854 /**
1855 ************************************************************************************************************************
1856 * Gfx12Lib::HwlComputeStereoInfo
1857 *
1858 * @brief
1859 * Compute height alignment and right eye pipeBankXor for stereo surface
1860 *
1861 * @return
1862 * Error code
1863 *
1864 ************************************************************************************************************************
1865 */
HwlComputeStereoInfo(const ADDR3_COMPUTE_SURFACE_INFO_INPUT * pIn,UINT_32 * pAlignY,UINT_32 * pRightXor) const1866 ADDR_E_RETURNCODE Gfx12Lib::HwlComputeStereoInfo(
1867 const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info
1868 UINT_32* pAlignY, ///< Stereo requested additional alignment in Y
1869 UINT_32* pRightXor ///< Right eye xor
1870 ) const
1871 {
1872 ADDR_E_RETURNCODE ret = ADDR_OK;
1873
1874 *pRightXor = 0;
1875
1876 const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
1877 const UINT_32 samplesLog2 = Log2(pIn->numSamples);
1878 const UINT_32 eqIndex = GetEquationTableEntry(pIn->swizzleMode, samplesLog2, elemLog2);
1879
1880 if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
1881 {
1882 const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
1883
1884 UINT_32 yMax = 0;
1885 UINT_32 yPosMask = 0;
1886
1887 // First get "max y bit"
1888 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
1889 {
1890 ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
1891
1892 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
1893 (m_equationTable[eqIndex].addr[i].index > yMax))
1894 {
1895 yMax = m_equationTable[eqIndex].addr[i].index;
1896 }
1897 }
1898
1899 // Then loop again for populating a position mask of "max Y bit"
1900 for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
1901 {
1902 if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
1903 (m_equationTable[eqIndex].addr[i].index == yMax))
1904 {
1905 yPosMask |= 1u << i;
1906 }
1907 }
1908
1909 const UINT_32 additionalAlign = 1 << yMax;
1910
1911 if (additionalAlign >= *pAlignY)
1912 {
1913 *pAlignY = additionalAlign;
1914
1915 const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
1916
1917 if ((alignedHeight >> yMax) & 1)
1918 {
1919 *pRightXor = yPosMask >> m_pipeInterleaveLog2;
1920 }
1921 }
1922 }
1923 else
1924 {
1925 ret = ADDR_INVALIDPARAMS;
1926 }
1927
1928 return ret;
1929 }
1930
1931 /**
1932 ************************************************************************************************************************
1933 * Gfx12Lib::HwlValidateNonSwModeParams
1934 *
1935 * @brief
1936 * Validate compute surface info params except swizzle mode
1937 *
1938 * @return
1939 * TRUE if parameters are valid, FALSE otherwise
1940 ************************************************************************************************************************
1941 */
HwlValidateNonSwModeParams(const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT * pIn) const1942 BOOL_32 Gfx12Lib::HwlValidateNonSwModeParams(
1943 const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT* pIn
1944 ) const
1945 {
1946 const ADDR3_SURFACE_FLAGS flags = pIn->flags;
1947 const AddrResourceType rsrcType = pIn->resourceType;
1948 const BOOL_32 isVrs = flags.isVrsImage;
1949 const BOOL_32 isStereo = flags.qbStereo;
1950 const BOOL_32 isDisplay = flags.display;
1951 const BOOL_32 isMipmap = (pIn->numMipLevels > 1);
1952 const BOOL_32 isMsaa = (pIn->numSamples > 1);
1953 const UINT_32 bpp = pIn->bpp;
1954
1955 BOOL_32 valid = TRUE;
1956 if ((bpp == 0) || (bpp > 128) || (pIn->width == 0) || (pIn->numSamples > 8))
1957 {
1958 ADDR_ASSERT_ALWAYS();
1959 valid = FALSE;
1960 }
1961
1962 // Resource type check
1963 if (IsTex1d(rsrcType))
1964 {
1965 if (isMsaa || isStereo || isVrs || isDisplay)
1966 {
1967 ADDR_ASSERT_ALWAYS();
1968 valid = FALSE;
1969 }
1970 }
1971 else if (IsTex2d(rsrcType))
1972 {
1973 if ((isMsaa && isMipmap) || (isStereo && isMsaa) || (isStereo && isMipmap) ||
1974 // VRS surface needs to be 8BPP format
1975 (isVrs && (bpp != 8)))
1976 {
1977 ADDR_ASSERT_ALWAYS();
1978 valid = FALSE;
1979 }
1980 }
1981 else if (IsTex3d(rsrcType))
1982 {
1983 if (isMsaa || isStereo || isVrs || isDisplay)
1984 {
1985 ADDR_ASSERT_ALWAYS();
1986 valid = FALSE;
1987 }
1988 }
1989 else
1990 {
1991 // An invalid resource type that is not 1D, 2D or 3D.
1992 ADDR_ASSERT_ALWAYS();
1993 valid = FALSE;
1994 }
1995
1996 return valid;
1997 }
1998
1999 } // V3
2000 } // Addr
2001