1 /*
2 * Copyright (c) 2018-2022, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file vphal_render_16alignment.cpp
24 //! \brief Surface alignment as 16 bytes
25 //! \details Unified VP HAL Surface 16 bytes alignment module interfaces
26 //!
27 #include "vphal_render_16alignment.h"
28 #include "vphal_debug.h"
29 #include "vpkrnheader.h"
30 #include "vphal_render_composite.h"
31 #include "vphal_render_ief.h"
32 #include "vphal_renderer.h"
33
34 #define AVS_SAMPLER_INDEX 1
35 #define THREED_SAMPLER_INDEX 1 // 3D sampler
36
37 #define ALIGN16_SRC_INDEX 0
38 #define ALIGN16_SRC_Y_INDEX 0
39 #define ALIGN16_SRC_U_INDEX 1
40 #define ALIGN16_SRC_UV_INDEX 1
41 #define ALIGN16_SRC_V_INDEX 2
42 #define ALIGN16_TRG_INDEX 3
43 #define ALIGN16_TRG_Y_INDEX 3
44 #define ALIGN16_TRG_U_INDEX 4
45 #define ALIGN16_TRG_UV_INDEX 4
46 #define ALIGN16_TRG_V_INDEX 5
47 //!
48 //! \brief 16 Bytes Alignment Kernel params for Gen9 Media Walker
49 //!
50 static const RENDERHAL_KERNEL_PARAM g_16Align_MW_KernelParam[1] =
51 {
52 /* GRF_Count
53 | BT_Count
54 | | Sampler_Count
55 | | | Thread_Count
56 | | | | GRF_Start_Register
57 | | | | | CURBE_Length
58 | | | | | | block_width
59 | | | | | | | block_height
60 | | | | | | | | blocks_x
61 | | | | | | | | | blocks_y
62 | | | | | | | | | |*/
63 { 4, 34, 1, VPHAL_USE_MEDIA_THREADS_MAX, 0, 4, 16, 16, 1, 1 } // NV12 and YUY2 and YV12
64 };
65
66 //!
67 //! \brief 16Align load the curbe data
68 //! \details Curbe data for 16Align
69 //! \param PVPHAL_16_ALIGN_STATE p16AlignState
70 //! [in] Pointer to the 16Align State
71 //! \param PVPHAL_16_ALIGN_RENDER_DATA pRenderData
72 //! [in] Pointer to 16Align render data
73 //! \param int32_t* piCurbeOffset
74 //! [out] Pointer to curbe data offset
75 //! \return MOS_STATUS
76 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
77 //!
VpHal_16AlignLoadStaticData(PVPHAL_16_ALIGN_STATE p16AlignState,PVPHAL_16_ALIGN_RENDER_DATA pRenderData,int32_t * piCurbeOffset)78 MOS_STATUS VpHal_16AlignLoadStaticData(
79 PVPHAL_16_ALIGN_STATE p16AlignState,
80 PVPHAL_16_ALIGN_RENDER_DATA pRenderData,
81 int32_t* piCurbeOffset)
82 {
83 PRENDERHAL_INTERFACE pRenderHal;
84 MEDIA_WALKER_16ALIGN_STATIC_DATA WalkerStatic;
85 MOS_STATUS eStatus;
86 int32_t iCurbeLength;
87 float fOffsetY, fOffsetX;
88 float fShiftX, fShiftY;
89 float fStepX, fStepY;
90
91 VPHAL_RENDER_CHK_NULL(p16AlignState);
92 VPHAL_RENDER_CHK_NULL(p16AlignState->pRenderHal);
93 eStatus = MOS_STATUS_SUCCESS;
94 pRenderHal = p16AlignState->pRenderHal;
95
96 // Set relevant static data
97 MOS_ZeroMemory(&WalkerStatic, sizeof(MEDIA_WALKER_16ALIGN_STATIC_DATA));
98 if (pRenderData->ScalingRatio_H < 0.0625f ||
99 pRenderData->ScalingRatio_V < 0.0625f)
100 {
101 WalkerStatic.DW0.Sampler_Index = THREED_SAMPLER_INDEX;
102 WalkerStatic.DW11.ScalingMode = 0;
103 fShiftX = VPHAL_HW_LINEAR_SHIFT;
104 fShiftY = VPHAL_HW_LINEAR_SHIFT;
105 }
106 else
107 {
108 WalkerStatic.DW0.Sampler_Index = AVS_SAMPLER_INDEX;
109 WalkerStatic.DW11.ScalingMode = 1;
110 fOffsetX = 0.0f;
111 fOffsetY = 0.0f;
112 fShiftX = 0.0f;
113 fShiftY = 0.0f;
114 }
115 switch (p16AlignState->pSource->Format)
116 {
117 case Format_NV12:
118 WalkerStatic.DW1.pSrcSurface_Y = ALIGN16_SRC_Y_INDEX;
119 WalkerStatic.DW2.pSrcSurface_UV = ALIGN16_SRC_UV_INDEX;
120 WalkerStatic.DW9.Input_Format = 0;
121 break;
122 case Format_YUY2:
123 WalkerStatic.DW1.pSrcSurface = ALIGN16_SRC_INDEX;
124 WalkerStatic.DW9.Input_Format = 1;
125 break;
126 case Format_YV12:
127 WalkerStatic.DW1.pSrcSurface_Y = ALIGN16_SRC_Y_INDEX;
128 WalkerStatic.DW2.pSrcSurface_U = ALIGN16_SRC_U_INDEX;
129 WalkerStatic.DW3.pSrcSurface_V = ALIGN16_SRC_V_INDEX;
130 WalkerStatic.DW9.Input_Format = 2;
131 break;
132 case Format_A8R8G8B8:
133 WalkerStatic.DW1.pSrcSurface = ALIGN16_SRC_INDEX;
134 WalkerStatic.DW9.Input_Format = 3;
135 WalkerStatic.DW16.CSC_COEFF_0 = 0;
136 WalkerStatic.DW16.CSC_COEFF_1 = 0;
137 WalkerStatic.DW17.CSC_COEFF_2 = 0;
138 WalkerStatic.DW17.CSC_COEFF_3 = 0;
139 WalkerStatic.DW18.CSC_COEFF_4 = 0;
140 WalkerStatic.DW18.CSC_COEFF_5 = 0;
141 WalkerStatic.DW19.CSC_COEFF_6 = 0;
142 WalkerStatic.DW19.CSC_COEFF_7 = 0;
143 WalkerStatic.DW20.CSC_COEFF_8 = 0;
144 WalkerStatic.DW20.CSC_COEFF_9 = 0;
145 WalkerStatic.DW21.CSC_COEFF_10 = 0;
146 WalkerStatic.DW21.CSC_COEFF_11 = 0;
147 break;
148 default:
149 VPHAL_RENDER_ASSERTMESSAGE("16 align input format doesn't support.");
150 eStatus = MOS_STATUS_INVALID_PARAMETER;
151 break;
152 }
153 #if defined(LINUX) && !defined(WDDM_LINUX)
154 WalkerStatic.DW10.Output_Pitch = p16AlignState->pTarget->OsResource.iPitch;
155 WalkerStatic.DW10.Output_Height = p16AlignState->pTarget->OsResource.iHeight;
156 #endif
157 switch (p16AlignState->pTarget->Format)
158 {
159 case Format_NV12:
160 WalkerStatic.DW4.pOutSurface_Y = ALIGN16_TRG_Y_INDEX;
161 WalkerStatic.DW5.pOutSurface_UV = ALIGN16_TRG_UV_INDEX;
162 WalkerStatic.DW9.Output_Format = 0;
163 break;
164 case Format_YUY2:
165 WalkerStatic.DW4.pOutSurface = ALIGN16_TRG_INDEX;
166 WalkerStatic.DW9.Output_Format = 1;
167 break;
168 case Format_YV12:
169 WalkerStatic.DW4.pOutSurface_Y = ALIGN16_TRG_Y_INDEX;
170 WalkerStatic.DW5.pOutSurface_U = ALIGN16_TRG_U_INDEX;
171 WalkerStatic.DW6.pOutSurface_V = ALIGN16_TRG_V_INDEX;
172 WalkerStatic.DW9.Output_Format = 2;
173 break;
174 default:
175 VPHAL_RENDER_ASSERTMESSAGE("16 align output format doesn't support.");
176 eStatus = MOS_STATUS_INVALID_PARAMETER;
177 break;
178 }
179 if (p16AlignState->pTarget->b16UsrPtr)
180 {
181 WalkerStatic.DW22.OutputMode = 0;
182 }
183 else
184 {
185 WalkerStatic.DW22.OutputMode = 1;
186 }
187 fStepX = ((p16AlignState->pSource->rcSrc.right - p16AlignState->pSource->rcSrc.left) * 1.0f) /
188 ((p16AlignState->pSource->rcDst.right - p16AlignState->pSource->rcDst.left) > 0 ?
189 (p16AlignState->pSource->rcDst.right - p16AlignState->pSource->rcDst.left) : 1);
190 fStepY = ((p16AlignState->pSource->rcSrc.bottom - p16AlignState->pSource->rcSrc.top) * 1.0f) /
191 ((p16AlignState->pSource->rcDst.bottom - p16AlignState->pSource->rcDst.top) > 0 ?
192 (p16AlignState->pSource->rcDst.bottom - p16AlignState->pSource->rcDst.top) : 1);
193 fOffsetX = (float)p16AlignState->pSource->rcSrc.left;
194 fOffsetY = (float)p16AlignState->pSource->rcSrc.top;
195 fShiftX -= p16AlignState->pSource->rcDst.left;
196 fShiftY -= p16AlignState->pSource->rcDst.top;
197 WalkerStatic.DW12.Original_X = (fOffsetX + fShiftX * fStepX) / pRenderData->dwSurfStateWd;
198 WalkerStatic.DW13.Original_Y = (fOffsetY + fShiftY * fStepY) / pRenderData->dwSurfStateHt;
199 WalkerStatic.DW22.Output_Top = p16AlignState->pSource->rcDst.top;
200 WalkerStatic.DW23.Output_Bottom = p16AlignState->pSource->rcDst.bottom - 1;
201 WalkerStatic.DW23.Output_Left = p16AlignState->pSource->rcDst.left;
202 WalkerStatic.DW24.Output_Right = p16AlignState->pSource->rcDst.right - 1;
203 WalkerStatic.DW24.bClearFlag = 0; // do not clear outside region of crop area.
204
205 WalkerStatic.DW7.ScalingStep_H = fStepX / pRenderData->dwSurfStateWd;
206 WalkerStatic.DW8.ScalingStep_V = fStepY / pRenderData->dwSurfStateHt;
207
208 iCurbeLength = sizeof(MEDIA_WALKER_16ALIGN_STATIC_DATA);
209
210 *piCurbeOffset = pRenderHal->pfnLoadCurbeData(
211 pRenderHal,
212 pRenderData->pMediaState,
213 &WalkerStatic,
214 iCurbeLength);
215
216 if (*piCurbeOffset < 0)
217 {
218 eStatus = MOS_STATUS_UNKNOWN;
219 goto finish;
220 }
221
222 finish:
223 VPHAL_RENDER_ASSERT(eStatus == MOS_STATUS_SUCCESS);
224 return eStatus;
225 }
226
227 //!
228 //! \brief 16Align kernel setup
229 //! \details Kernel setup for bitcopy
230 //! \param PVPHAL_16_ALIGN_STATE p16AlignState
231 //! [in] Pointer to the 16Align State
232 //! \param PVPHAL_16_ALIGN_RENDER_DATA pRenderData
233 //! [in] Pointer to 16Align render data
234 //! \return MOS_STATUS
235 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
236 //!
VpHal_16AlignSetupKernel(PVPHAL_16_ALIGN_STATE p16AlignState,PVPHAL_16_ALIGN_RENDER_DATA pRenderData)237 MOS_STATUS VpHal_16AlignSetupKernel(
238 PVPHAL_16_ALIGN_STATE p16AlignState,
239 PVPHAL_16_ALIGN_RENDER_DATA pRenderData)
240 {
241 MOS_STATUS eStatus;
242 Kdll_CacheEntry *pCacheEntryTable;
243
244 VPHAL_RENDER_CHK_NULL(p16AlignState);
245 eStatus = MOS_STATUS_SUCCESS;
246 pCacheEntryTable =
247 p16AlignState->pKernelDllState->ComponentKernelCache.pCacheEntries;
248
249 // Set the Kernel Parameters
250 pRenderData->pKernelParam = p16AlignState->pKernelParamTable;
251 pRenderData->PerfTag = VPHAL_NONE;
252
253 // Set curbe & inline data size
254 pRenderData->iCurbeLength = pRenderData->pKernelParam->CURBE_Length * GRF_SIZE;
255
256 // Set Kernel entry
257 pRenderData->KernelEntry.iKUID = IDR_VP_1_1_16aligned;
258 pRenderData->KernelEntry.iKCID = -1;
259 pRenderData->KernelEntry.iSize = pCacheEntryTable[IDR_VP_1_1_16aligned].iSize;
260 pRenderData->KernelEntry.pBinary = pCacheEntryTable[IDR_VP_1_1_16aligned].pBinary;
261
262 finish:
263 return eStatus;
264 }
265
266 //!
267 //! \brief Recalculate Sampler Avs 8x8 Horizontal/Vertical scaling table
268 //! \details Recalculate Sampler Avs 8x8 Horizontal/Vertical scaling table
269 //! \param MOS_FORMAT SrcFormat
270 //! [in] Source Format
271 //! \param float fScale
272 //! [in] Horizontal or Vertical Scale Factor
273 //! \param bool bVertical
274 //! [in] true if Vertical Scaling, else Horizontal Scaling
275 //! \param uint32_t dwChromaSiting
276 //! [in] Chroma Siting
277 //! \param bool bBalancedFilter
278 //! [in] true if Gen9+, balanced filter
279 //! \param bool b8TapAdaptiveEnable
280 //! [in] true if 8Tap Adaptive Enable
281 //! \param PVPHAL_AVS_PARAMS pAvsParams
282 //! [in/out] Pointer to AVS Params
283 //! \return MOS_STATUS
284 //!
VpHal_16AlignSamplerAvsCalcScalingTable(MOS_FORMAT SrcFormat,float fScale,bool bVertical,uint32_t dwChromaSiting,bool bBalancedFilter,bool b8TapAdaptiveEnable,PMHW_AVS_PARAMS pAvsParams)285 static MOS_STATUS VpHal_16AlignSamplerAvsCalcScalingTable(
286 MOS_FORMAT SrcFormat,
287 float fScale,
288 bool bVertical,
289 uint32_t dwChromaSiting,
290 bool bBalancedFilter,
291 bool b8TapAdaptiveEnable,
292 PMHW_AVS_PARAMS pAvsParams)
293 {
294 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
295 MHW_PLANE Plane;
296 int32_t iUvPhaseOffset;
297 uint32_t dwHwPhrase;
298 uint32_t YCoefTableSize;
299 uint32_t UVCoefTableSize;
300 float fScaleParam;
301 int32_t* piYCoefsParam;
302 int32_t* piUVCoefsParam;
303 float fHPStrength;
304
305 VPHAL_RENDER_CHK_NULL(pAvsParams);
306 VPHAL_RENDER_CHK_NULL(pAvsParams->piYCoefsY);
307 VPHAL_RENDER_CHK_NULL(pAvsParams->piYCoefsX);
308 VPHAL_RENDER_CHK_NULL(pAvsParams->piUVCoefsY);
309 VPHAL_RENDER_CHK_NULL(pAvsParams->piUVCoefsX);
310
311 if (bBalancedFilter)
312 {
313 YCoefTableSize = POLYPHASE_Y_COEFFICIENT_TABLE_SIZE_G9;
314 UVCoefTableSize = POLYPHASE_UV_COEFFICIENT_TABLE_SIZE_G9;
315 dwHwPhrase = NUM_HW_POLYPHASE_TABLES_G9;
316 }
317 else
318 {
319 YCoefTableSize = POLYPHASE_Y_COEFFICIENT_TABLE_SIZE_G8;
320 UVCoefTableSize = POLYPHASE_UV_COEFFICIENT_TABLE_SIZE_G8;
321 dwHwPhrase = MHW_NUM_HW_POLYPHASE_TABLES;
322 }
323
324 fHPStrength = 0.0F;
325 piYCoefsParam = bVertical ? pAvsParams->piYCoefsY : pAvsParams->piYCoefsX;
326 piUVCoefsParam = bVertical ? pAvsParams->piUVCoefsY : pAvsParams->piUVCoefsX;
327 fScaleParam = bVertical ? pAvsParams->fScaleY : pAvsParams->fScaleX;
328
329 // Recalculate Horizontal or Vertical scaling table
330 if (SrcFormat != pAvsParams->Format || fScale != fScaleParam)
331 {
332 MOS_ZeroMemory(piYCoefsParam, YCoefTableSize);
333 MOS_ZeroMemory(piUVCoefsParam, UVCoefTableSize);
334
335 // 4-tap filtering for RGB format G-channel if 8tap adaptive filter is not enabled.
336 Plane = (IS_RGB32_FORMAT(SrcFormat) && !b8TapAdaptiveEnable) ? MHW_U_PLANE : MHW_Y_PLANE;
337 if (bVertical)
338 {
339 pAvsParams->fScaleY = fScale;
340 }
341 else
342 {
343 pAvsParams->fScaleX = fScale;
344 }
345
346 // For 1x scaling in horizontal direction, use special coefficients for filtering
347 // we don't do this when bForcePolyPhaseCoefs flag is set
348 if (fScale == 1.0F && !pAvsParams->bForcePolyPhaseCoefs)
349 {
350 VPHAL_RENDER_CHK_STATUS(Mhw_SetNearestModeTable(
351 piYCoefsParam,
352 Plane,
353 bBalancedFilter));
354 // If the 8-tap adaptive is enabled for all channel, then UV/RB use the same coefficient as Y/G
355 // So, coefficient for UV/RB channels caculation can be passed
356 if (!b8TapAdaptiveEnable)
357 {
358 VPHAL_RENDER_CHK_STATUS(Mhw_SetNearestModeTable(
359 piUVCoefsParam,
360 MHW_U_PLANE,
361 bBalancedFilter));
362 }
363 }
364 else
365 {
366 // Clamp the Scaling Factor if > 1.0x
367 fScale = MOS_MIN(1.0F, fScale);
368
369 VPHAL_RENDER_CHK_STATUS(Mhw_CalcPolyphaseTablesY(
370 piYCoefsParam,
371 fScale,
372 Plane,
373 SrcFormat,
374 fHPStrength,
375 true,
376 dwHwPhrase,
377 0));
378
379 // If the 8-tap adaptive is enabled for all channel, then UV/RB use the same coefficient as Y/G
380 // So, coefficient for UV/RB channels caculation can be passed
381 if (!b8TapAdaptiveEnable)
382 {
383 if (!bBalancedFilter)
384 {
385 VPHAL_RENDER_CHK_STATUS(Mhw_CalcPolyphaseTablesY(
386 piUVCoefsParam,
387 fScale,
388 MHW_U_PLANE,
389 SrcFormat,
390 fHPStrength,
391 true,
392 dwHwPhrase,
393 0));
394 }
395 else
396 {
397 // If Chroma Siting info is present
398 if (dwChromaSiting & (bVertical ? MHW_CHROMA_SITING_VERT_TOP : MHW_CHROMA_SITING_HORZ_LEFT))
399 {
400 // No Chroma Siting
401 VPHAL_RENDER_CHK_STATUS(Mhw_CalcPolyphaseTablesUV(
402 piUVCoefsParam,
403 2.0F,
404 fScale));
405 }
406 else
407 {
408 // Chroma siting offset needs to be added
409 if (dwChromaSiting & (bVertical ? MHW_CHROMA_SITING_VERT_CENTER : MHW_CHROMA_SITING_HORZ_CENTER))
410 {
411 iUvPhaseOffset = MOS_UF_ROUND(0.5F * 16.0F); // U0.4
412 }
413 else //if (ChromaSiting & (bVertical ? MHW_CHROMA_SITING_VERT_BOTTOM : MHW_CHROMA_SITING_HORZ_RIGHT))
414 {
415 iUvPhaseOffset = MOS_UF_ROUND(1.0F * 16.0F); // U0.4
416 }
417
418 VPHAL_RENDER_CHK_STATUS(Mhw_CalcPolyphaseTablesUVOffset(
419 piUVCoefsParam,
420 3.0F,
421 fScale,
422 iUvPhaseOffset));
423 }
424 }
425 }
426 }
427 }
428
429 finish:
430 return eStatus;
431 }
432
433 //!
434 //! \brief Set Sampler Avs 8x8 Table for LGCA
435 //! \details Set Sampler Avs 8x8 Table for LGCA
436 //! \param PRENDERHAL_INTERFACE pRenderHal
437 //! [in] Pointer to RenderHal Interface Structure
438 //! \param PMHW_SAMPLER_STATE_PARAM pSamplerStateParams
439 //! [in] Pointer to Sampler State Params
440 //! \param PMHW_AVS_PARAMS pAvsParams
441 //! [in/out] Pointer to AVS Params
442 //! \param MOS_FORMAT SrcFormat
443 //! [in] Source Format
444 //! \return MOS_STATUS
445 //!
VpHal_16AlignSetSamplerAvsTableParam(PRENDERHAL_INTERFACE pRenderHal,PMHW_SAMPLER_STATE_PARAM pSamplerStateParams,PMHW_AVS_PARAMS pAvsParams,MOS_FORMAT SrcFormat,float fScaleX,float fScaleY,uint32_t dwChromaSiting)446 static MOS_STATUS VpHal_16AlignSetSamplerAvsTableParam(
447 PRENDERHAL_INTERFACE pRenderHal,
448 PMHW_SAMPLER_STATE_PARAM pSamplerStateParams,
449 PMHW_AVS_PARAMS pAvsParams,
450 MOS_FORMAT SrcFormat,
451 float fScaleX,
452 float fScaleY,
453 uint32_t dwChromaSiting)
454 {
455 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
456 bool bBalancedFilter;
457 PMHW_SAMPLER_AVS_TABLE_PARAM pMhwSamplerAvsTableParam;
458
459 VPHAL_RENDER_CHK_NULL(pRenderHal);
460 VPHAL_RENDER_CHK_NULL(pSamplerStateParams);
461 VPHAL_RENDER_CHK_NULL(pAvsParams);
462 if (pAvsParams->piUVCoefsX == nullptr || pAvsParams->piYCoefsY == nullptr ||
463 pAvsParams->piYCoefsX == nullptr || pAvsParams->piUVCoefsY == nullptr ||
464 pAvsParams == nullptr)
465 {
466 VPHAL_RENDER_ASSERTMESSAGE("meet null ptr!");
467 }
468
469 pMhwSamplerAvsTableParam = pSamplerStateParams->Avs.pMhwSamplerAvsTableParam;
470
471 pMhwSamplerAvsTableParam->b8TapAdaptiveEnable = pSamplerStateParams->Avs.b8TapAdaptiveEnable;
472 pMhwSamplerAvsTableParam->byteTransitionArea8Pixels = MEDIASTATE_AVS_TRANSITION_AREA_8_PIXELS;
473 pMhwSamplerAvsTableParam->byteTransitionArea4Pixels = MEDIASTATE_AVS_TRANSITION_AREA_4_PIXELS;
474 pMhwSamplerAvsTableParam->byteMaxDerivative8Pixels = MEDIASTATE_AVS_MAX_DERIVATIVE_8_PIXELS;
475 pMhwSamplerAvsTableParam->byteMaxDerivative4Pixels = MEDIASTATE_AVS_MAX_DERIVATIVE_4_PIXELS;
476 pMhwSamplerAvsTableParam->byteDefaultSharpnessLevel = MEDIASTATE_AVS_SHARPNESS_LEVEL_SHARP;
477
478 // Enable Adaptive Filtering, if it is being upscaled
479 // in either direction. we must check for this before clamping the SF.
480 if ((IS_YUV_FORMAT(SrcFormat) && (fScaleX > 1.0F || fScaleY > 1.0F)) ||
481 pMhwSamplerAvsTableParam->b8TapAdaptiveEnable)
482 {
483 pMhwSamplerAvsTableParam->bBypassXAdaptiveFiltering = false;
484 pMhwSamplerAvsTableParam->bBypassYAdaptiveFiltering = false;
485 if (pMhwSamplerAvsTableParam->b8TapAdaptiveEnable)
486 {
487 pMhwSamplerAvsTableParam->bAdaptiveFilterAllChannels = true;
488
489 if (IS_RGB_FORMAT(SrcFormat))
490 {
491 pMhwSamplerAvsTableParam->bEnableRGBAdaptive = true;
492 }
493 }
494 }
495 else
496 {
497 pMhwSamplerAvsTableParam->bBypassXAdaptiveFiltering = true;
498 pMhwSamplerAvsTableParam->bBypassYAdaptiveFiltering = true;
499 }
500
501 // No changes to AVS parameters -> skip
502 if (SrcFormat == pAvsParams->Format &&
503 fScaleX == pAvsParams->fScaleX &&
504 fScaleY == pAvsParams->fScaleY)
505 {
506 goto finish;
507 }
508
509 // AVS Coefficients don't change for Scaling Factors > 1.0x
510 // Hence recalculation is avoided
511 if (fScaleX > 1.0F && pAvsParams->fScaleX > 1.0F)
512 {
513 pAvsParams->fScaleX = fScaleX;
514 }
515
516 // AVS Coefficients don't change for Scaling Factors > 1.0x
517 // Hence recalculation is avoided
518 if (fScaleY > 1.0F && pAvsParams->fScaleY > 1.0F)
519 {
520 pAvsParams->fScaleY = fScaleY;
521 }
522
523 bBalancedFilter = true;
524 // Recalculate Horizontal scaling table
525 VPHAL_HW_CHK_STATUS(VpHal_16AlignSamplerAvsCalcScalingTable(
526 SrcFormat,
527 fScaleX,
528 false,
529 dwChromaSiting,
530 bBalancedFilter,
531 pMhwSamplerAvsTableParam->b8TapAdaptiveEnable ? true : false,
532 pAvsParams));
533
534 // Recalculate Vertical scaling table
535 VPHAL_HW_CHK_STATUS(VpHal_16AlignSamplerAvsCalcScalingTable(
536 SrcFormat,
537 fScaleY,
538 true,
539 dwChromaSiting,
540 bBalancedFilter,
541 pMhwSamplerAvsTableParam->b8TapAdaptiveEnable ? true : false,
542 pAvsParams));
543
544 pMhwSamplerAvsTableParam->bIsCoeffExtraEnabled = true;
545 // Save format used to calculate AVS parameters
546 pAvsParams->Format = SrcFormat;
547 pMhwSamplerAvsTableParam->b4TapGY = (IS_RGB32_FORMAT(SrcFormat) && !pMhwSamplerAvsTableParam->b8TapAdaptiveEnable);
548 pMhwSamplerAvsTableParam->b4TapRBUV = (!pMhwSamplerAvsTableParam->b8TapAdaptiveEnable);
549
550 VpHal_RenderCommonSetAVSTableParam(pAvsParams, pMhwSamplerAvsTableParam);
551
552 finish:
553 return eStatus;
554 }
555
556 //!
557 //! \brief 16Align setup HW states
558 //! \details Setup HW states for 16Align
559 //! \param PVPHAL_16_ALIGN_STATE p16AlignState
560 //! [in] Pointer to the 16Align State
561 //! \param PVPHAL_16_ALIGN_RENDER_DATA pRenderData
562 //! [in/out] Pointer to 16Align render data
563 //! \return MOS_STATUS
564 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
565 //!
VpHal_16AlignSetSamplerStates(PVPHAL_16_ALIGN_STATE p16AlignState,PVPHAL_16_ALIGN_RENDER_DATA pRenderData)566 MOS_STATUS VpHal_16AlignSetSamplerStates(
567 PVPHAL_16_ALIGN_STATE p16AlignState,
568 PVPHAL_16_ALIGN_RENDER_DATA pRenderData)
569 {
570 MOS_STATUS eStatus;
571 PRENDERHAL_INTERFACE pRenderHal;
572 PMHW_SAMPLER_STATE_PARAM pSamplerStateParams;
573
574 VPHAL_PUBLIC_CHK_NULL(p16AlignState);
575 VPHAL_PUBLIC_CHK_NULL(pRenderData);
576
577 pRenderHal = p16AlignState->pRenderHal;
578
579 VPHAL_PUBLIC_CHK_NULL(pRenderHal);
580 pSamplerStateParams = &pRenderData->SamplerStateParams;
581 pSamplerStateParams->bInUse = true;
582
583 if (pRenderData->ScalingRatio_H < 0.0625f ||
584 pRenderData->ScalingRatio_V < 0.0625f)
585 {
586 p16AlignState->pSource->bUseSampleUnorm = true;
587 pSamplerStateParams->SamplerType = MHW_SAMPLER_TYPE_3D;
588 pSamplerStateParams->Unorm.SamplerFilterMode = MHW_SAMPLER_FILTER_BILINEAR;
589 pSamplerStateParams->Unorm.AddressU = MHW_GFX3DSTATE_TEXCOORDMODE_CLAMP;
590 pSamplerStateParams->Unorm.AddressV = MHW_GFX3DSTATE_TEXCOORDMODE_CLAMP;
591 pSamplerStateParams->Unorm.AddressW = MHW_GFX3DSTATE_TEXCOORDMODE_CLAMP;
592 }
593 else
594 {
595 pSamplerStateParams->SamplerType = MHW_SAMPLER_TYPE_AVS;
596 pSamplerStateParams->Avs.AvsType = false;
597 pSamplerStateParams->Avs.bEnableIEF = false;
598 pSamplerStateParams->Avs.b8TapAdaptiveEnable = false;
599 pSamplerStateParams->Avs.bHdcDwEnable = false;
600 pSamplerStateParams->Avs.bEnableAVS = true;
601 pSamplerStateParams->Avs.WeakEdgeThr = DETAIL_WEAK_EDGE_THRESHOLD;
602 pSamplerStateParams->Avs.StrongEdgeThr = DETAIL_STRONG_EDGE_THRESHOLD;
603 pSamplerStateParams->Avs.StrongEdgeWght = DETAIL_STRONG_EDGE_WEIGHT;
604 pSamplerStateParams->Avs.RegularWght = DETAIL_REGULAR_EDGE_WEIGHT;
605 pSamplerStateParams->Avs.NonEdgeWght = DETAIL_NON_EDGE_WEIGHT;
606 pSamplerStateParams->Avs.pMhwSamplerAvsTableParam = &p16AlignState->mhwSamplerAvsTableParam;
607
608 VPHAL_RENDER_CHK_STATUS(VpHal_16AlignSetSamplerAvsTableParam(
609 pRenderHal,
610 pSamplerStateParams,
611 pRenderData->pAVSParameters,
612 p16AlignState->pSource->Format,
613 pRenderData->ScalingRatio_H,
614 pRenderData->ScalingRatio_V,
615 MHW_CHROMA_SITING_HORZ_LEFT | MHW_CHROMA_SITING_VERT_TOP));
616 }
617
618
619 eStatus = pRenderHal->pfnSetSamplerStates(
620 pRenderHal,
621 pRenderData->iMediaID,
622 pSamplerStateParams,
623 1);
624
625 finish:
626 return eStatus;
627 }
628
629 //!
630 //! \brief 16Align setup HW states
631 //! \details Setup HW states for 16Align
632 //! \param PVPHAL_16_ALIGN_STATE p16AlignState
633 //! [in] Pointer to the 16Align State
634 //! \param PVPHAL_16_ALIGN_RENDER_DATA pRenderData
635 //! [in/out] Pointer to 16Align render data
636 //! \return MOS_STATUS
637 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
638 //!
VpHal_16AlignSetupHwStates(PVPHAL_16_ALIGN_STATE p16AlignState,PVPHAL_16_ALIGN_RENDER_DATA pRenderData)639 MOS_STATUS VpHal_16AlignSetupHwStates(
640 PVPHAL_16_ALIGN_STATE p16AlignState,
641 PVPHAL_16_ALIGN_RENDER_DATA pRenderData)
642 {
643 PRENDERHAL_INTERFACE pRenderHal;
644 int32_t iKrnAllocation;
645 int32_t iCurbeOffset;
646 MOS_STATUS eStatus;
647 int32_t iThreadCount;
648 MHW_KERNEL_PARAM MhwKernelParam;
649
650 VPHAL_RENDER_CHK_NULL(p16AlignState);
651 VPHAL_RENDER_CHK_NULL(pRenderData);
652
653 eStatus = MOS_STATUS_SUCCESS;
654 pRenderHal = p16AlignState->pRenderHal;
655 VPHAL_RENDER_CHK_NULL(pRenderHal);
656
657 // Allocate and reset media state
658 pRenderData->pMediaState = pRenderHal->pfnAssignMediaState(pRenderHal, (RENDERHAL_COMPONENT)RENDERHAL_COMPONENT_16ALIGN);
659 VPHAL_RENDER_CHK_NULL(pRenderData->pMediaState);
660
661 // Allocate and reset SSH instance
662 VPHAL_RENDER_CHK_STATUS(pRenderHal->pfnAssignSshInstance(pRenderHal));
663
664 // Assign and Reset Binding Table
665 VPHAL_RENDER_CHK_STATUS(pRenderHal->pfnAssignBindingTable(
666 pRenderHal,
667 &pRenderData->iBindingTable));
668
669 // Setup surface states
670 VPHAL_RENDER_CHK_STATUS(p16AlignState->pfnSetupSurfaceStates(
671 p16AlignState,
672 pRenderData));
673
674 // load static data
675 VPHAL_RENDER_CHK_STATUS(p16AlignState->pfnLoadStaticData(
676 p16AlignState,
677 pRenderData,
678 &iCurbeOffset));
679
680 if (p16AlignState->pPerfData->CompMaxThreads.bEnabled)
681 {
682 iThreadCount = p16AlignState->pPerfData->CompMaxThreads.uiVal;
683 }
684 else
685 {
686 iThreadCount = pRenderData->pKernelParam->Thread_Count;
687 }
688
689 // Setup VFE State params.
690 VPHAL_RENDER_CHK_STATUS(pRenderHal->pfnSetVfeStateParams(
691 pRenderHal,
692 MEDIASTATE_DEBUG_COUNTER_FREE_RUNNING,
693 iThreadCount,
694 pRenderData->iCurbeLength,
695 pRenderData->iInlineLength,
696 nullptr));
697
698 // Load kernel to GSH
699 INIT_MHW_KERNEL_PARAM(MhwKernelParam, &pRenderData->KernelEntry);
700 iKrnAllocation = pRenderHal->pfnLoadKernel(
701 pRenderHal,
702 pRenderData->pKernelParam,
703 &MhwKernelParam,
704 nullptr);
705
706 if (iKrnAllocation < 0)
707 {
708 eStatus = MOS_STATUS_UNKNOWN;
709 goto finish;
710 }
711
712 // Allocate Media ID, link to kernel
713 pRenderData->iMediaID = pRenderHal->pfnAllocateMediaID(
714 pRenderHal,
715 iKrnAllocation,
716 pRenderData->iBindingTable,
717 iCurbeOffset,
718 (pRenderData->pKernelParam->CURBE_Length << 5),
719 0,
720 nullptr);
721
722 if (pRenderData->iMediaID < 0)
723 {
724 eStatus = MOS_STATUS_UNKNOWN;
725 goto finish;
726 }
727
728 // Set Sampler states for this Media ID
729 VPHAL_RENDER_CHK_STATUS(p16AlignState->pfnSetSamplerStates(
730 p16AlignState,
731 pRenderData));
732
733 finish:
734 VPHAL_RENDER_ASSERT(eStatus == MOS_STATUS_SUCCESS);
735 return eStatus;
736 }
737
738 //!
739 //! \brief 16Align media walker setup
740 //! \details Media walker setup for bitcopy
741 //! \param PVPHAL_16_ALIGN_STATE p16AlignState
742 //! [in] Pointer to the 16Align State
743 //! \param PVPHAL_16_ALIGN_RENDER_DATA pRenderData
744 //! [in] Pointer to 16Align render data
745 //! \param PMHW_WALKER_PARAMS pWalkerParams
746 //! [in/out] Pointer to Walker params
747 //! \return MOS_STATUS
748 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
749 //!
VpHal_16AlignRenderMediaWalker(PVPHAL_16_ALIGN_STATE p16AlignState,PVPHAL_16_ALIGN_RENDER_DATA pRenderData,PMHW_WALKER_PARAMS pWalkerParams)750 MOS_STATUS VpHal_16AlignRenderMediaWalker(
751 PVPHAL_16_ALIGN_STATE p16AlignState,
752 PVPHAL_16_ALIGN_RENDER_DATA pRenderData,
753 PMHW_WALKER_PARAMS pWalkerParams)
754 {
755 PRENDERHAL_INTERFACE pRenderHal;
756 uint32_t dwWidth;
757 uint32_t dwHeight;
758 MOS_STATUS eStatus;
759
760 eStatus = MOS_STATUS_SUCCESS;
761 pRenderHal = p16AlignState->pRenderHal;
762
763 // Calculate how many media object commands are needed.
764 dwWidth = MOS_ALIGN_CEIL((p16AlignState->pTarget->rcDst.right -
765 p16AlignState->pTarget->rcDst.left),
766 pRenderData->pKernelParam->block_width);
767 dwHeight = MOS_ALIGN_CEIL((p16AlignState->pTarget->rcDst.bottom -
768 p16AlignState->pTarget->rcDst.top),
769 pRenderData->pKernelParam->block_height);
770
771 pRenderData->iBlocksX = dwWidth / pRenderData->pKernelParam->block_width;
772 pRenderData->iBlocksY = dwHeight / pRenderData->pKernelParam->block_height;
773
774 // Set walker cmd params - Rasterscan
775 MOS_ZeroMemory(pWalkerParams, sizeof(*pWalkerParams));
776
777 pWalkerParams->InterfaceDescriptorOffset = pRenderData->iMediaID;
778
779 pWalkerParams->dwGlobalLoopExecCount = 1;
780 pWalkerParams->dwLocalLoopExecCount = pRenderData->iBlocksY - 1;
781
782 pWalkerParams->GlobalResolution.x = pRenderData->iBlocksX;
783 pWalkerParams->GlobalResolution.y = pRenderData->iBlocksY;
784
785 pWalkerParams->GlobalStart.x = 0;
786 pWalkerParams->GlobalStart.y = 0;
787
788 pWalkerParams->GlobalOutlerLoopStride.x = pRenderData->iBlocksX;
789 pWalkerParams->GlobalOutlerLoopStride.y = 0;
790
791 pWalkerParams->GlobalInnerLoopUnit.x = 0;
792 pWalkerParams->GlobalInnerLoopUnit.y = pRenderData->iBlocksY;
793
794 pWalkerParams->BlockResolution.x = pRenderData->iBlocksX;
795 pWalkerParams->BlockResolution.y = pRenderData->iBlocksY;
796
797 pWalkerParams->LocalStart.x = 0;
798 pWalkerParams->LocalStart.y = 0;
799
800 pWalkerParams->LocalEnd.x = pRenderData->iBlocksX - 1;
801 pWalkerParams->LocalEnd.y = 0;
802
803 pWalkerParams->LocalOutLoopStride.x = 0;
804 pWalkerParams->LocalOutLoopStride.y = 1;
805
806 pWalkerParams->LocalInnerLoopUnit.x = 1;
807 pWalkerParams->LocalInnerLoopUnit.y = 0;
808
809 return eStatus;
810 }
811
812 //!
813 //! \brief 16Align renderer
814 //! \details Renderer function for 16Align
815 //! \param PVPHAL_16_ALIGN_STATE p16AlignState
816 //! [in] Pointer to the 16Align State
817 //! \param PVPHAL_RENDER_PARAMS pRenderParams
818 //! [in] Pointer to 16Align render params
819 //! \return MOS_STATUS
820 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
821 //!
VpHal_16AlignRender(PVPHAL_16_ALIGN_STATE p16AlignState,PVPHAL_RENDER_PARAMS pRenderParams)822 MOS_STATUS VpHal_16AlignRender(
823 PVPHAL_16_ALIGN_STATE p16AlignState,
824 PVPHAL_RENDER_PARAMS pRenderParams)
825 {
826 MOS_STATUS eStatus;
827 PRENDERHAL_INTERFACE pRenderHal;
828 PMOS_INTERFACE pOsInterface;
829 MHW_WALKER_PARAMS WalkerParams;
830 VPHAL_16_ALIGN_RENDER_DATA RenderData;
831 PRENDERHAL_L3_CACHE_SETTINGS pCacheSettings = nullptr;
832 uint32_t dwInputRegionHeight;
833 uint32_t dwInputRegionWidth;
834 uint32_t dwOutputRegionHeight;
835 uint32_t dwOutputRegionWidth;
836
837 VPHAL_RENDER_ASSERT(p16AlignState);
838 VPHAL_RENDER_ASSERT(pRenderParams);
839 VPHAL_RENDER_ASSERT(p16AlignState->pOsInterface);
840 VPHAL_RENDER_ASSERT(p16AlignState->pRenderHal);
841 VPHAL_RENDER_ASSERT(p16AlignState->pPerfData);
842
843 eStatus = MOS_STATUS_SUCCESS;
844 pOsInterface = p16AlignState->pOsInterface;
845 pRenderHal = p16AlignState->pRenderHal;
846 MOS_ZeroMemory(&RenderData, sizeof(RenderData));
847
848 // Reset reporting
849 p16AlignState->Reporting.InitReportValue();
850
851 // Reset states before rendering
852 pOsInterface->pfnResetOsStates(pOsInterface);
853 VPHAL_RENDER_CHK_STATUS(pRenderHal->pfnReset(pRenderHal));
854 pOsInterface->pfnResetPerfBufferID(pOsInterface); // reset once per frame
855
856 VPHAL_DBG_STATE_DUMPPER_SET_CURRENT_STAGE(VPHAL_DBG_STAGE_COMP);
857
858 // Configure cache settings for this render operation
859 pCacheSettings = &pRenderHal->L3CacheSettings;
860 MOS_ZeroMemory(pCacheSettings, sizeof(*pCacheSettings));
861 pCacheSettings->bOverride = true;
862 pCacheSettings->bL3CachingEnabled = p16AlignState->SurfMemObjCtl.bL3CachingEnabled;
863
864 if (p16AlignState->pPerfData->L3SQCReg1Override.bEnabled)
865 {
866 pCacheSettings->bSqcReg1Override = true;
867 pCacheSettings->dwSqcReg1 = p16AlignState->pPerfData->L3SQCReg1Override.uiVal;
868 }
869
870 if (p16AlignState->pPerfData->L3CntlReg2Override.bEnabled)
871 {
872 pCacheSettings->bCntlReg2Override = true;
873 pCacheSettings->dwCntlReg2 = p16AlignState->pPerfData->L3CntlReg2Override.uiVal;
874 }
875
876 if (p16AlignState->pPerfData->L3CntlReg3Override.bEnabled)
877 {
878 pCacheSettings->bCntlReg3Override = true;
879 pCacheSettings->dwCntlReg3 = p16AlignState->pPerfData->L3CntlReg3Override.uiVal;
880 }
881
882 if (p16AlignState->pPerfData->L3LRA1RegOverride.bEnabled)
883 {
884 pCacheSettings->bLra1RegOverride = true;
885 pCacheSettings->dwLra1Reg = p16AlignState->pPerfData->L3LRA1RegOverride.uiVal;
886 }
887
888 // Setup Source/Target surface and get the Source width/height for
889 p16AlignState->pSource = pRenderParams->pSrc[0];
890 p16AlignState->pTarget = pRenderParams->pTarget[0];
891 dwInputRegionWidth = p16AlignState->pSource->rcSrc.right - p16AlignState->pSource->rcSrc.left;
892 dwInputRegionHeight = p16AlignState->pSource->rcSrc.bottom - p16AlignState->pSource->rcSrc.top;
893 dwOutputRegionWidth = p16AlignState->pSource->rcDst.right - p16AlignState->pSource->rcDst.left;
894 dwOutputRegionHeight = p16AlignState->pSource->rcDst.bottom - p16AlignState->pSource->rcDst.top;
895
896 RenderData.ScalingRatio_H = (float)dwOutputRegionWidth / (float)dwInputRegionWidth;
897 RenderData.ScalingRatio_V = (float)dwOutputRegionHeight / (float)dwInputRegionHeight;
898
899 RenderData.pAVSParameters = &p16AlignState->AVSParameters;
900 RenderData.SamplerStateParams.Avs.pMhwSamplerAvsTableParam = &RenderData.mhwSamplerAvsTableParam;
901
902 p16AlignState->pKernelParamTable = (PRENDERHAL_KERNEL_PARAM)&g_16Align_MW_KernelParam[0];
903
904 // Ensure input can be read
905 pOsInterface->pfnSyncOnResource(
906 pOsInterface,
907 &p16AlignState->pSource->OsResource,
908 pOsInterface->CurrentGpuContextOrdinal,
909 false);
910
911 // Ensure the output can be written
912 pOsInterface->pfnSyncOnResource(
913 pOsInterface,
914 &p16AlignState->pTarget->OsResource,
915 pOsInterface->CurrentGpuContextOrdinal,
916 true);
917
918 // Setup copy kernel
919 VPHAL_RENDER_CHK_STATUS(p16AlignState->pfnSetupKernel(
920 p16AlignState,
921 &RenderData));
922
923 // Submit HW States and Commands
924 VPHAL_RENDER_CHK_STATUS(VpHal_16AlignSetupHwStates(
925 p16AlignState,
926 &RenderData));
927
928 // Set perftag information
929 pOsInterface->pfnResetPerfBufferID(pOsInterface);
930 pOsInterface->pfnSetPerfTag(pOsInterface, RenderData.PerfTag);
931
932 VPHAL_RENDER_CHK_STATUS(VpHal_16AlignRenderMediaWalker(
933 p16AlignState,
934 &RenderData,
935 &WalkerParams));
936
937 VPHAL_DBG_STATE_DUMPPER_DUMP_GSH(pRenderHal);
938 VPHAL_DBG_STATE_DUMPPER_DUMP_SSH(pRenderHal);
939
940 VPHAL_RENDER_CHK_STATUS(VpHal_RndrSubmitCommands(
941 pRenderHal,
942 nullptr,
943 p16AlignState->bNullHwRender16Align,
944 &WalkerParams,
945 nullptr,
946 &p16AlignState->StatusTableUpdateParams,
947 kernelUserPtr,
948 0,
949 nullptr,
950 true));
951
952 finish:
953 MOS_ZeroMemory(pCacheSettings, sizeof(*pCacheSettings));
954 VPHAL_RENDER_ASSERT(eStatus == MOS_STATUS_SUCCESS);
955 VPHAL_RENDER_NORMALMESSAGE("finished UsrPtr process!");
956 return eStatus;
957 }
958
959 //!
960 //! \brief 16Align Destroy state
961 //! \details Function to destroy 16Align state
962 //! \param PVPHAL_16_ALIGN_STATE p16AlignState
963 //! [in] Pointer to the 16Align State
964 //! \return MOS_STATUS
965 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
966 //!
VpHal_16AlignDestroy(PVPHAL_16_ALIGN_STATE p16AlignState)967 MOS_STATUS VpHal_16AlignDestroy(
968 PVPHAL_16_ALIGN_STATE p16AlignState)
969 {
970 MOS_STATUS eStatus;
971 eStatus = MOS_STATUS_SUCCESS;
972 VPHAL_RENDER_CHK_NULL(p16AlignState);
973 VpHal_RenderDestroyAVSParams(&p16AlignState->AVSParameters);
974 MOS_UNUSED(p16AlignState);
975
976 finish:
977 return eStatus;
978 }
979
980 //!
981 //! \brief 16Align kernel state Initializations
982 //! \details Kernel state Initializations for 16Align
983 //! \param PVPHAL_16_ALIGN_STATE p16AlignState
984 //! [in] Pointer to the 16Align State
985 //! \param const VphalSettings* pSettings
986 //! [in] Pointer to VPHAL Setting
987 //! \param Kdll_State pKernelDllState
988 //! [in/out] Pointer to bitcopy kernel Dll state
989 //! \return MOS_STATUS
990 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
991 //!
VpHal_16AlignInitialize(PVPHAL_16_ALIGN_STATE p16AlignState,const VphalSettings * pSettings,Kdll_State * pKernelDllState)992 MOS_STATUS VpHal_16AlignInitialize(
993 PVPHAL_16_ALIGN_STATE p16AlignState,
994 const VphalSettings *pSettings,
995 Kdll_State *pKernelDllState)
996 {
997 MOS_NULL_RENDERING_FLAGS NullRenderingFlags;
998
999 VPHAL_RENDER_ASSERT(p16AlignState);
1000 VPHAL_RENDER_ASSERT(p16AlignState->pOsInterface);
1001
1002 NullRenderingFlags =
1003 p16AlignState->pOsInterface->pfnGetNullHWRenderFlags(p16AlignState->pOsInterface);
1004 p16AlignState->bNullHwRender16Align =
1005 NullRenderingFlags.VPLgca ||
1006 NullRenderingFlags.VPGobal;
1007
1008 // Setup interface to KDLL
1009 p16AlignState->pKernelDllState = pKernelDllState;
1010 VpHal_RenderInitAVSParams(&p16AlignState->AVSParameters,
1011 POLYPHASE_Y_COEFFICIENT_TABLE_SIZE_G9,
1012 POLYPHASE_UV_COEFFICIENT_TABLE_SIZE_G9);
1013
1014 return MOS_STATUS_SUCCESS;
1015 }
1016
1017 //!
1018 //! \brief Set Surface for HW Access
1019 //! \details Common Function for setting up surface state, need to use this function
1020 //! if render would use CP HM
1021 //! \param [in] bSrc
1022 //! indicate the surface is input source.
1023 //! \param [in] pRenderHal
1024 //! Pointer to RenderHal Interface Structure
1025 //! \param [in] pSurface
1026 //! Pointer to Surface
1027 //! \param [in] pRenderSurface
1028 //! Pointer to Render Surface
1029 //! \param [in] pSurfaceParams
1030 //! Pointer to RenderHal Surface Params
1031 //! \param [in] PVPHAL_16_ALIGN_RENDER_DATA
1032 //! Pointer to Rendering data
1033 //! \return MOS_STATUS
1034 //! MOS_STATUS_SUCCESS if success. Error code otherwise
1035 //!
VpHal_16AlignSetupSurfaceStatesInt(bool bSrc,PRENDERHAL_INTERFACE pRenderHal,PVPHAL_SURFACE pSurface,PRENDERHAL_SURFACE pRenderSurface,PRENDERHAL_SURFACE_STATE_PARAMS pSurfaceParams,PVPHAL_16_ALIGN_RENDER_DATA pRenderData)1036 MOS_STATUS VpHal_16AlignSetupSurfaceStatesInt(
1037 bool bSrc,
1038 PRENDERHAL_INTERFACE pRenderHal,
1039 PVPHAL_SURFACE pSurface,
1040 PRENDERHAL_SURFACE pRenderSurface,
1041 PRENDERHAL_SURFACE_STATE_PARAMS pSurfaceParams,
1042 PVPHAL_16_ALIGN_RENDER_DATA pRenderData)
1043 {
1044 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1045 PRENDERHAL_SURFACE_STATE_ENTRY pSurfaceEntry;
1046 MOS_FORMAT format = pSurface->Format;
1047 uint32_t width = pSurface->dwWidth;
1048 #if defined(LINUX) && !defined(WDDM_LINUX)
1049 uint32_t dwSize = pSurface->dwHeight * pSurface->OsResource.iPitch;
1050 #else
1051 uint32_t dwSize = pSurface->dwHeight * pSurface->dwPitch;
1052 #endif
1053
1054 if (!bSrc && pSurface->b16UsrPtr)
1055 {
1056 // system linear surface.
1057 // reset the output surface format as Raw and calculate the surface size.
1058 pSurface->Format = Format_RAW;
1059 switch (format)
1060 {
1061 case Format_NV12:
1062 for (int i = 0; i < 2; i++)
1063 {
1064 pSurface->dwWidth = (i==0)?dwSize:dwSize/2;
1065 VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetBufferSurfaceForHwAccess(
1066 pRenderHal,
1067 pSurface,
1068 pRenderSurface,
1069 pSurfaceParams,
1070 pRenderData->iBindingTable,
1071 ((i==0)?ALIGN16_TRG_Y_INDEX:ALIGN16_TRG_UV_INDEX),
1072 bSrc?false:true));
1073 // add UV offset which was missed in raw buffer common configuration.
1074 if (i > 0)
1075 {
1076 pSurfaceEntry = &pRenderHal->pStateHeap->pSurfaceEntry[pRenderHal->pStateHeap->iCurrentSurfaceState-1]; // fetch the surface plane
1077 pSurfaceEntry->SurfaceToken.DW2.SurfaceOffset = dwSize;
1078 }
1079 }
1080 break;
1081 case Format_YUY2:
1082 pSurface->dwWidth = dwSize * 2;
1083 VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetBufferSurfaceForHwAccess(
1084 pRenderHal,
1085 pSurface,
1086 pRenderSurface,
1087 pSurfaceParams,
1088 pRenderData->iBindingTable,
1089 ALIGN16_TRG_INDEX,
1090 bSrc?false:true));
1091 break;
1092 case Format_YV12:
1093 // YV12 should be allocated as 3 linear buffer for every Y U V output plane.
1094 for (int i = 0; i < 3; i++)
1095 {
1096 pSurface->dwWidth = (i == 0)?dwSize:dwSize/4;
1097 VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetBufferSurfaceForHwAccess(
1098 pRenderHal,
1099 pSurface,
1100 pRenderSurface,
1101 pSurfaceParams,
1102 pRenderData->iBindingTable,
1103 (i==0)?ALIGN16_TRG_Y_INDEX:((i==1)?ALIGN16_TRG_V_INDEX:ALIGN16_TRG_U_INDEX),
1104 bSrc?false:true));
1105 // add U, V offset which was missed in raw buffer common configuration.
1106 // recalculate U, V offset based on 16aligned pitch.
1107 if (i > 0)
1108 {
1109 pSurfaceEntry = &pRenderHal->pStateHeap->pSurfaceEntry[pRenderHal->pStateHeap->iCurrentSurfaceState-1]; // fetch the surface plane
1110 pSurfaceEntry->SurfaceToken.DW2.SurfaceOffset = (i == 1)?(dwSize*5/4):dwSize;
1111 }
1112 }
1113 break;
1114 default:
1115 VPHAL_RENDER_ASSERTMESSAGE("16 align output format doesn't support.");
1116 eStatus = MOS_STATUS_INVALID_PARAMETER;
1117 break;
1118 }
1119 // resotre the target format and width for curbe data.
1120 pSurface->Format = format;
1121 pSurface->dwWidth = width;
1122 }
1123 else
1124 {
1125 // input source keep using 2D surface foramt. set tile mode as linear.
1126 // VA 2D surface
1127 VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetSurfaceForHwAccess(
1128 pRenderHal,
1129 pSurface,
1130 pRenderSurface,
1131 pSurfaceParams,
1132 pRenderData->iBindingTable,
1133 bSrc?ALIGN16_SRC_INDEX:ALIGN16_TRG_INDEX,
1134 bSrc?false:true));
1135 // for 1 sampler access YV12 3plane input, Y plane should use the R8 sampler type, the same as U,V plane
1136 // for 3 samplers access YV12 3plane input, Y plane should use Y8 sampler type
1137 // 16-alignment kernel always uses 1-sampler, legacy FC kernel always uses 3-sampler
1138 if (pSurface->Format == Format_YV12)
1139 {
1140 uint32_t * pSrcPlaneYSampler = nullptr;
1141 pSurfaceEntry = &pRenderHal->pStateHeap->pSurfaceEntry[0]; // input Y plane
1142 pSrcPlaneYSampler = (uint32_t*)pSurfaceEntry->pSurfaceState + 2; // DW2
1143 *pSrcPlaneYSampler = (*pSrcPlaneYSampler & 0x07FFFFFF) | (0x0B<<27);
1144 if (pSurface->b16UsrPtr)
1145 {
1146 // correct the input surface index, from YVU to YUV.
1147 pSurfaceEntry = &pRenderHal->pStateHeap->pSurfaceEntry[1];
1148 VPHAL_RENDER_CHK_STATUS(pRenderHal->pfnBindSurfaceState(pRenderHal, pRenderData->iBindingTable,
1149 ALIGN16_SRC_V_INDEX, pSurfaceEntry));
1150 pSurfaceEntry = &pRenderHal->pStateHeap->pSurfaceEntry[2];
1151 VPHAL_RENDER_CHK_STATUS(pRenderHal->pfnBindSurfaceState(pRenderHal, pRenderData->iBindingTable,
1152 ALIGN16_SRC_U_INDEX, pSurfaceEntry));
1153 }
1154 }
1155 if (bSrc)
1156 {
1157 pSurfaceEntry = &pRenderHal->pStateHeap->pSurfaceEntry[0];
1158 pRenderData->dwSurfStateHt = pSurfaceEntry->dwHeight;
1159 pRenderData->dwSurfStateWd = pSurfaceEntry->dwWidth;
1160 }
1161 }
1162 finish:
1163 VPHAL_RENDER_ASSERT(eStatus == MOS_STATUS_SUCCESS);
1164 return eStatus;
1165 }
1166
1167 //!
1168 //! \brief 16alignment setup surface states
1169 //! \details Setup surface states for 16Align
1170 //! \param PVPHAL_16_ALIGN_STATE p16AlignState
1171 //! [in] Pointer to the 16Align State
1172 //! \param PVPHAL_16_ALIGN_RENDER_DATA pRenderData
1173 //! [in] Pointer to 16Align render data
1174 //! \return MOS_STATUS
1175 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
1176 //!
VpHal_16AlignSetupSurfaceStates(PVPHAL_16_ALIGN_STATE p16AlignState,PVPHAL_16_ALIGN_RENDER_DATA pRenderData)1177 MOS_STATUS VpHal_16AlignSetupSurfaceStates(
1178 PVPHAL_16_ALIGN_STATE p16AlignState,
1179 PVPHAL_16_ALIGN_RENDER_DATA pRenderData)
1180 {
1181 PRENDERHAL_INTERFACE pRenderHal;
1182 RENDERHAL_SURFACE_STATE_PARAMS SurfaceParams;
1183 MOS_STATUS eStatus;
1184 PRENDERHAL_SURFACE_STATE_ENTRY pSurfaceEntry;
1185
1186 eStatus = MOS_STATUS_SUCCESS;
1187 pRenderHal = p16AlignState->pRenderHal;
1188
1189 // Source surface
1190 MOS_ZeroMemory(&SurfaceParams, sizeof(SurfaceParams));
1191
1192 if (pRenderData->ScalingRatio_H < 0.0625f ||
1193 pRenderData->ScalingRatio_V < 0.0625f)
1194 {
1195 SurfaceParams.bAVS = false;
1196 }
1197 else
1198 {
1199 SurfaceParams.bAVS = true;
1200 }
1201 SurfaceParams.Boundary = RENDERHAL_SS_BOUNDARY_SRCRECT;
1202 SurfaceParams.isOutput = false;
1203 SurfaceParams.MemObjCtl =
1204 p16AlignState->SurfMemObjCtl.SourceSurfMemObjCtl;
1205 SurfaceParams.Type = RENDERHAL_SURFACE_TYPE_ADV_G9;
1206 SurfaceParams.bWidthInDword_Y = false;
1207 SurfaceParams.bWidthInDword_UV = false;
1208 SurfaceParams.bWidth16Align = false;
1209
1210 VPHAL_RENDER_CHK_STATUS(VpHal_16AlignSetupSurfaceStatesInt(true,
1211 pRenderHal,
1212 p16AlignState->pSource,
1213 &p16AlignState->RenderHalSource,
1214 &SurfaceParams,
1215 pRenderData));
1216
1217 // Target surface
1218 SurfaceParams.MemObjCtl =
1219 p16AlignState->SurfMemObjCtl.TargetSurfMemObjCtl;
1220 SurfaceParams.Type = pRenderHal->SurfaceTypeDefault;
1221 SurfaceParams.isOutput = true;
1222 SurfaceParams.bAVS = false;
1223 SurfaceParams.Boundary = RENDERHAL_SS_BOUNDARY_DSTRECT;
1224
1225 VPHAL_RENDER_CHK_STATUS(VpHal_16AlignSetupSurfaceStatesInt(false,
1226 pRenderHal,
1227 p16AlignState->pTarget,
1228 &p16AlignState->RenderHalTarget,
1229 &SurfaceParams,
1230 pRenderData));
1231
1232 finish:
1233 VPHAL_RENDER_ASSERT(eStatus == MOS_STATUS_SUCCESS);
1234 return eStatus;
1235 }
1236
1237 //!
1238 //! \brief 16Align interface Initializations
1239 //! \details Interface Initializations for 16Align
1240 //! \param PVPHAL_16_ALIGN_STATE p16AlignState
1241 //! [in] Pointer to the 16Align State
1242 //! \param PRENDERHAL_INTERFACE pRenderHal
1243 //! [in/out] Pointer to RenderHal Interface Structure
1244 //! \return MOS_STATUS
1245 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
1246 //!
VpHal_16AlignInitInterface(PVPHAL_16_ALIGN_STATE p16AlignState,PRENDERHAL_INTERFACE pRenderHal)1247 MOS_STATUS VpHal_16AlignInitInterface(
1248 PVPHAL_16_ALIGN_STATE p16AlignState,
1249 PRENDERHAL_INTERFACE pRenderHal)
1250 {
1251 PMOS_INTERFACE pOsInterface;
1252 MOS_STATUS eStatus;
1253
1254 eStatus = MOS_STATUS_SUCCESS;
1255 pOsInterface = pRenderHal->pOsInterface;
1256
1257 // Connect renderer to other VPHAL components (HW/OS interfaces)
1258 p16AlignState->pRenderHal = pRenderHal;
1259 p16AlignState->pOsInterface = pOsInterface;
1260 p16AlignState->pSkuTable = pRenderHal->pSkuTable;
1261
1262 // Setup functions
1263 p16AlignState->pfnInitialize = VpHal_16AlignInitialize;
1264 p16AlignState->pfnDestroy = VpHal_16AlignDestroy;
1265 p16AlignState->pfnRender = VpHal_16AlignRender;
1266 p16AlignState->pfnSetupSurfaceStates = VpHal_16AlignSetupSurfaceStates;
1267
1268 // States
1269 p16AlignState->bFtrMediaWalker =
1270 p16AlignState->pRenderHal->pfnGetMediaWalkerStatus(p16AlignState->pRenderHal) ? true : false;
1271
1272 p16AlignState->pfnLoadStaticData = VpHal_16AlignLoadStaticData;
1273 p16AlignState->pfnSetupKernel = VpHal_16AlignSetupKernel;
1274 p16AlignState->pfnSetSamplerStates = VpHal_16AlignSetSamplerStates;
1275
1276 return eStatus;
1277 }
1278
1279 //!
1280 //! \brief check 16 bytes alignment whether can be processed
1281 //! \details check 16 bytes alignment whether can be processed
1282 //! \param PVPHAL_RENDER_PARAMS pRenderParams
1283 //! [in] Pointer to VPHAL render parameter
1284 //! \return bool
1285 //! Return true if 16 bytes alignment can be processed, otherwise false
1286 //!
VpHal_RndrIs16Align(PVPHAL_16_ALIGN_STATE p16AlignState,PVPHAL_RENDER_PARAMS pRenderParams)1287 bool VpHal_RndrIs16Align(
1288 PVPHAL_16_ALIGN_STATE p16AlignState,
1289 PVPHAL_RENDER_PARAMS pRenderParams)
1290 {
1291 PVPHAL_SURFACE pSource;
1292 PVPHAL_SURFACE pTarget;
1293 bool b16alignment = false;
1294
1295 pSource = pRenderParams->pSrc[0];
1296 pTarget = pRenderParams->pTarget[0];
1297
1298 if (!GFX_IS_RENDERCORE(p16AlignState->pRenderHal->Platform, IGFX_GEN9_CORE))
1299 {
1300 VPHAL_RENDER_ASSERTMESSAGE("Invalid 16UserPtr platforms!");
1301 return false;
1302 }
1303
1304 if (pRenderParams->uSrcCount == 1 &&
1305 pRenderParams->uDstCount == 1 &&
1306 pRenderParams->pConstriction == nullptr &&
1307 (pSource->pBlendingParams == nullptr ||
1308 (pSource->pBlendingParams != nullptr &&
1309 pSource->pBlendingParams->BlendType == BLEND_NONE)) &&
1310 pSource->pLumaKeyParams == nullptr &&
1311 pSource->pProcampParams == nullptr &&
1312 pSource->pIEFParams == nullptr &&
1313 pSource->bInterlacedScaling == false &&
1314 pSource->bFieldWeaving == false &&
1315 pSource->pDenoiseParams == nullptr &&
1316 pSource->pColorPipeParams == nullptr &&
1317 !(pSource->pDeinterlaceParams &&
1318 pSource->pDeinterlaceParams->DIMode == DI_MODE_BOB))
1319 {
1320 b16alignment = ((pSource->Format == Format_NV12 ||
1321 pSource->Format == Format_YUY2 ||
1322 pSource->Format == Format_YV12) &&
1323 (pTarget->Format == Format_NV12 ||
1324 pTarget->Format == Format_YUY2 ||
1325 pTarget->Format == Format_YV12 ||
1326 pTarget->Format == Format_A8R8G8B8));
1327 if (pSource->b16UsrPtr && pSource->TileType != MOS_TILE_LINEAR)
1328 {
1329 b16alignment = false;
1330 }
1331
1332 }
1333 VPHAL_RENDER_NORMALMESSAGE("%s support(s) %s %s %s surface convert to %s %s surface",
1334 b16alignment?"16UsrPtr":"16UsrPtr doesn't",
1335 (pSource->TileType == MOS_TILE_LINEAR)?"":"non",
1336 pSource->b16UsrPtr?"16 bytes aligned linear":"2D", VphalDumperTool::GetFormatStr(pSource->Format),
1337 pTarget->b16UsrPtr?"16 bytes aligned linear":"2D", VphalDumperTool::GetFormatStr(pTarget->Format));
1338
1339 return b16alignment;
1340 }
1341