1 /*
2 * Copyright (c) 2018-2022, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     vphal_render_16alignment.cpp
24 //! \brief    Surface alignment as 16 bytes
25 //! \details  Unified VP HAL Surface 16 bytes alignment module interfaces
26 //!
27 #include "vphal_render_16alignment.h"
28 #include "vphal_debug.h"
29 #include "vpkrnheader.h"
30 #include "vphal_render_composite.h"
31 #include "vphal_render_ief.h"
32 #include "vphal_renderer.h"
33 
34 #define AVS_SAMPLER_INDEX       1
35 #define THREED_SAMPLER_INDEX    1 // 3D sampler
36 
37 #define ALIGN16_SRC_INDEX       0
38 #define ALIGN16_SRC_Y_INDEX     0
39 #define ALIGN16_SRC_U_INDEX     1
40 #define ALIGN16_SRC_UV_INDEX    1
41 #define ALIGN16_SRC_V_INDEX     2
42 #define ALIGN16_TRG_INDEX       3
43 #define ALIGN16_TRG_Y_INDEX     3
44 #define ALIGN16_TRG_U_INDEX     4
45 #define ALIGN16_TRG_UV_INDEX    4
46 #define ALIGN16_TRG_V_INDEX     5
47 //!
48 //! \brief 16 Bytes Alignment Kernel params for Gen9 Media Walker
49 //!
50 static const RENDERHAL_KERNEL_PARAM g_16Align_MW_KernelParam[1] =
51 {
52 /*    GRF_Count
53       |  BT_Count
54       |  |    Sampler_Count
55       |  |    |  Thread_Count
56       |  |    |  |                             GRF_Start_Register
57       |  |    |  |                             |   CURBE_Length
58       |  |    |  |                             |   |   block_width
59       |  |    |  |                             |   |   |    block_height
60       |  |    |  |                             |   |   |    |   blocks_x
61       |  |    |  |                             |   |   |    |   |   blocks_y
62       |  |    |  |                             |   |   |    |   |   |*/
63     { 4, 34,  1, VPHAL_USE_MEDIA_THREADS_MAX,  0,  4,  16,  16,  1,  1 }    // NV12 and YUY2 and YV12
64 };
65 
66 //!
67 //! \brief    16Align load the curbe data
68 //! \details  Curbe data for 16Align
69 //! \param    PVPHAL_16_ALIGN_STATE p16AlignState
70 //!           [in] Pointer to the 16Align State
71 //! \param    PVPHAL_16_ALIGN_RENDER_DATA pRenderData
72 //!           [in] Pointer to 16Align render data
73 //! \param    int32_t* piCurbeOffset
74 //!           [out] Pointer to curbe data offset
75 //! \return   MOS_STATUS
76 //!           Return MOS_STATUS_SUCCESS if successful, otherwise failed
77 //!
VpHal_16AlignLoadStaticData(PVPHAL_16_ALIGN_STATE p16AlignState,PVPHAL_16_ALIGN_RENDER_DATA pRenderData,int32_t * piCurbeOffset)78 MOS_STATUS VpHal_16AlignLoadStaticData(
79     PVPHAL_16_ALIGN_STATE           p16AlignState,
80     PVPHAL_16_ALIGN_RENDER_DATA     pRenderData,
81     int32_t*                        piCurbeOffset)
82 {
83     PRENDERHAL_INTERFACE                    pRenderHal;
84     MEDIA_WALKER_16ALIGN_STATIC_DATA        WalkerStatic;
85     MOS_STATUS                              eStatus;
86     int32_t                                 iCurbeLength;
87     float                                   fOffsetY, fOffsetX;
88     float                                   fShiftX, fShiftY;
89     float                                   fStepX, fStepY;
90 
91     VPHAL_RENDER_CHK_NULL(p16AlignState);
92     VPHAL_RENDER_CHK_NULL(p16AlignState->pRenderHal);
93     eStatus          = MOS_STATUS_SUCCESS;
94     pRenderHal       = p16AlignState->pRenderHal;
95 
96     // Set relevant static data
97     MOS_ZeroMemory(&WalkerStatic, sizeof(MEDIA_WALKER_16ALIGN_STATIC_DATA));
98     if (pRenderData->ScalingRatio_H < 0.0625f ||
99         pRenderData->ScalingRatio_V < 0.0625f)
100     {
101         WalkerStatic.DW0.Sampler_Index = THREED_SAMPLER_INDEX;
102         WalkerStatic.DW11.ScalingMode  = 0;
103         fShiftX = VPHAL_HW_LINEAR_SHIFT;
104         fShiftY = VPHAL_HW_LINEAR_SHIFT;
105     }
106     else
107     {
108         WalkerStatic.DW0.Sampler_Index = AVS_SAMPLER_INDEX;
109         WalkerStatic.DW11.ScalingMode  = 1;
110         fOffsetX = 0.0f;
111         fOffsetY = 0.0f;
112         fShiftX  = 0.0f;
113         fShiftY  = 0.0f;
114     }
115     switch (p16AlignState->pSource->Format)
116     {
117         case Format_NV12:
118             WalkerStatic.DW1.pSrcSurface_Y    = ALIGN16_SRC_Y_INDEX;
119             WalkerStatic.DW2.pSrcSurface_UV   = ALIGN16_SRC_UV_INDEX;
120             WalkerStatic.DW9.Input_Format     = 0;
121             break;
122         case Format_YUY2:
123             WalkerStatic.DW1.pSrcSurface      = ALIGN16_SRC_INDEX;
124             WalkerStatic.DW9.Input_Format     = 1;
125             break;
126         case Format_YV12:
127             WalkerStatic.DW1.pSrcSurface_Y    = ALIGN16_SRC_Y_INDEX;
128             WalkerStatic.DW2.pSrcSurface_U    = ALIGN16_SRC_U_INDEX;
129             WalkerStatic.DW3.pSrcSurface_V    = ALIGN16_SRC_V_INDEX;
130             WalkerStatic.DW9.Input_Format     = 2;
131             break;
132         case Format_A8R8G8B8:
133             WalkerStatic.DW1.pSrcSurface      = ALIGN16_SRC_INDEX;
134             WalkerStatic.DW9.Input_Format     = 3;
135             WalkerStatic.DW16.CSC_COEFF_0     = 0;
136             WalkerStatic.DW16.CSC_COEFF_1     = 0;
137             WalkerStatic.DW17.CSC_COEFF_2     = 0;
138             WalkerStatic.DW17.CSC_COEFF_3     = 0;
139             WalkerStatic.DW18.CSC_COEFF_4     = 0;
140             WalkerStatic.DW18.CSC_COEFF_5     = 0;
141             WalkerStatic.DW19.CSC_COEFF_6     = 0;
142             WalkerStatic.DW19.CSC_COEFF_7     = 0;
143             WalkerStatic.DW20.CSC_COEFF_8     = 0;
144             WalkerStatic.DW20.CSC_COEFF_9     = 0;
145             WalkerStatic.DW21.CSC_COEFF_10    = 0;
146             WalkerStatic.DW21.CSC_COEFF_11    = 0;
147             break;
148         default:
149             VPHAL_RENDER_ASSERTMESSAGE("16 align input format doesn't support.");
150             eStatus = MOS_STATUS_INVALID_PARAMETER;
151             break;
152     }
153 #if defined(LINUX) && !defined(WDDM_LINUX)
154     WalkerStatic.DW10.Output_Pitch            = p16AlignState->pTarget->OsResource.iPitch;
155     WalkerStatic.DW10.Output_Height           = p16AlignState->pTarget->OsResource.iHeight;
156 #endif
157     switch (p16AlignState->pTarget->Format)
158     {
159         case Format_NV12:
160             WalkerStatic.DW4.pOutSurface_Y    = ALIGN16_TRG_Y_INDEX;
161             WalkerStatic.DW5.pOutSurface_UV   = ALIGN16_TRG_UV_INDEX;
162             WalkerStatic.DW9.Output_Format    = 0;
163             break;
164         case Format_YUY2:
165             WalkerStatic.DW4.pOutSurface      = ALIGN16_TRG_INDEX;
166             WalkerStatic.DW9.Output_Format    = 1;
167             break;
168         case Format_YV12:
169             WalkerStatic.DW4.pOutSurface_Y    = ALIGN16_TRG_Y_INDEX;
170             WalkerStatic.DW5.pOutSurface_U    = ALIGN16_TRG_U_INDEX;
171             WalkerStatic.DW6.pOutSurface_V    = ALIGN16_TRG_V_INDEX;
172             WalkerStatic.DW9.Output_Format    = 2;
173             break;
174         default:
175             VPHAL_RENDER_ASSERTMESSAGE("16 align output format doesn't support.");
176             eStatus = MOS_STATUS_INVALID_PARAMETER;
177             break;
178     }
179     if (p16AlignState->pTarget->b16UsrPtr)
180     {
181         WalkerStatic.DW22.OutputMode = 0;
182     }
183     else
184     {
185         WalkerStatic.DW22.OutputMode = 1;
186     }
187     fStepX    = ((p16AlignState->pSource->rcSrc.right - p16AlignState->pSource->rcSrc.left) * 1.0f) /
188                  ((p16AlignState->pSource->rcDst.right - p16AlignState->pSource->rcDst.left) > 0 ?
189                   (p16AlignState->pSource->rcDst.right - p16AlignState->pSource->rcDst.left) : 1);
190     fStepY    = ((p16AlignState->pSource->rcSrc.bottom - p16AlignState->pSource->rcSrc.top) * 1.0f) /
191                  ((p16AlignState->pSource->rcDst.bottom - p16AlignState->pSource->rcDst.top) > 0 ?
192                   (p16AlignState->pSource->rcDst.bottom - p16AlignState->pSource->rcDst.top) : 1);
193     fOffsetX  = (float)p16AlignState->pSource->rcSrc.left;
194     fOffsetY  = (float)p16AlignState->pSource->rcSrc.top;
195     fShiftX  -= p16AlignState->pSource->rcDst.left;
196     fShiftY  -= p16AlignState->pSource->rcDst.top;
197     WalkerStatic.DW12.Original_X     = (fOffsetX + fShiftX * fStepX) / pRenderData->dwSurfStateWd;
198     WalkerStatic.DW13.Original_Y     = (fOffsetY + fShiftY * fStepY) / pRenderData->dwSurfStateHt;
199     WalkerStatic.DW22.Output_Top     = p16AlignState->pSource->rcDst.top;
200     WalkerStatic.DW23.Output_Bottom  = p16AlignState->pSource->rcDst.bottom - 1;
201     WalkerStatic.DW23.Output_Left    = p16AlignState->pSource->rcDst.left;
202     WalkerStatic.DW24.Output_Right   = p16AlignState->pSource->rcDst.right - 1;
203     WalkerStatic.DW24.bClearFlag     = 0;   // do not clear outside region of crop area.
204 
205     WalkerStatic.DW7.ScalingStep_H   = fStepX / pRenderData->dwSurfStateWd;
206     WalkerStatic.DW8.ScalingStep_V   = fStepY / pRenderData->dwSurfStateHt;
207 
208     iCurbeLength = sizeof(MEDIA_WALKER_16ALIGN_STATIC_DATA);
209 
210     *piCurbeOffset = pRenderHal->pfnLoadCurbeData(
211         pRenderHal,
212         pRenderData->pMediaState,
213         &WalkerStatic,
214         iCurbeLength);
215 
216     if (*piCurbeOffset < 0)
217     {
218         eStatus = MOS_STATUS_UNKNOWN;
219         goto finish;
220     }
221 
222 finish:
223     VPHAL_RENDER_ASSERT(eStatus == MOS_STATUS_SUCCESS);
224     return eStatus;
225 }
226 
227 //!
228 //! \brief    16Align kernel setup
229 //! \details  Kernel setup for bitcopy
230 //! \param    PVPHAL_16_ALIGN_STATE p16AlignState
231 //!           [in] Pointer to the 16Align State
232 //! \param    PVPHAL_16_ALIGN_RENDER_DATA pRenderData
233 //!           [in] Pointer to 16Align render data
234 //! \return   MOS_STATUS
235 //!           Return MOS_STATUS_SUCCESS if successful, otherwise failed
236 //!
VpHal_16AlignSetupKernel(PVPHAL_16_ALIGN_STATE p16AlignState,PVPHAL_16_ALIGN_RENDER_DATA pRenderData)237 MOS_STATUS VpHal_16AlignSetupKernel(
238     PVPHAL_16_ALIGN_STATE        p16AlignState,
239     PVPHAL_16_ALIGN_RENDER_DATA  pRenderData)
240 {
241     MOS_STATUS      eStatus;
242     Kdll_CacheEntry *pCacheEntryTable;
243 
244     VPHAL_RENDER_CHK_NULL(p16AlignState);
245     eStatus             = MOS_STATUS_SUCCESS;
246     pCacheEntryTable    =
247         p16AlignState->pKernelDllState->ComponentKernelCache.pCacheEntries;
248 
249     // Set the Kernel Parameters
250     pRenderData->pKernelParam   = p16AlignState->pKernelParamTable;
251     pRenderData->PerfTag        = VPHAL_NONE;
252 
253     // Set curbe & inline data size
254     pRenderData->iCurbeLength   = pRenderData->pKernelParam->CURBE_Length * GRF_SIZE;
255 
256     // Set Kernel entry
257     pRenderData->KernelEntry.iKUID     = IDR_VP_1_1_16aligned;
258     pRenderData->KernelEntry.iKCID     = -1;
259     pRenderData->KernelEntry.iSize     = pCacheEntryTable[IDR_VP_1_1_16aligned].iSize;
260     pRenderData->KernelEntry.pBinary   = pCacheEntryTable[IDR_VP_1_1_16aligned].pBinary;
261 
262 finish:
263     return eStatus;
264 }
265 
266 //!
267 //! \brief    Recalculate Sampler Avs 8x8 Horizontal/Vertical scaling table
268 //! \details  Recalculate Sampler Avs 8x8 Horizontal/Vertical scaling table
269 //! \param    MOS_FORMAT SrcFormat
270 //!           [in] Source Format
271 //! \param    float fScale
272 //!           [in] Horizontal or Vertical Scale Factor
273 //! \param    bool bVertical
274 //!           [in] true if Vertical Scaling, else Horizontal Scaling
275 //! \param    uint32_t dwChromaSiting
276 //!           [in] Chroma Siting
277 //! \param    bool bBalancedFilter
278 //!           [in] true if Gen9+, balanced filter
279 //! \param    bool b8TapAdaptiveEnable
280 //!           [in] true if 8Tap Adaptive Enable
281 //! \param    PVPHAL_AVS_PARAMS pAvsParams
282 //!           [in/out] Pointer to AVS Params
283 //! \return   MOS_STATUS
284 //!
VpHal_16AlignSamplerAvsCalcScalingTable(MOS_FORMAT SrcFormat,float fScale,bool bVertical,uint32_t dwChromaSiting,bool bBalancedFilter,bool b8TapAdaptiveEnable,PMHW_AVS_PARAMS pAvsParams)285 static MOS_STATUS VpHal_16AlignSamplerAvsCalcScalingTable(
286     MOS_FORMAT                      SrcFormat,
287     float                           fScale,
288     bool                            bVertical,
289     uint32_t                        dwChromaSiting,
290     bool                            bBalancedFilter,
291     bool                            b8TapAdaptiveEnable,
292     PMHW_AVS_PARAMS                 pAvsParams)
293 {
294     MOS_STATUS                      eStatus = MOS_STATUS_SUCCESS;
295     MHW_PLANE                       Plane;
296     int32_t                         iUvPhaseOffset;
297     uint32_t                        dwHwPhrase;
298     uint32_t                        YCoefTableSize;
299     uint32_t                        UVCoefTableSize;
300     float                           fScaleParam;
301     int32_t*                        piYCoefsParam;
302     int32_t*                        piUVCoefsParam;
303     float                           fHPStrength;
304 
305     VPHAL_RENDER_CHK_NULL(pAvsParams);
306     VPHAL_RENDER_CHK_NULL(pAvsParams->piYCoefsY);
307     VPHAL_RENDER_CHK_NULL(pAvsParams->piYCoefsX);
308     VPHAL_RENDER_CHK_NULL(pAvsParams->piUVCoefsY);
309     VPHAL_RENDER_CHK_NULL(pAvsParams->piUVCoefsX);
310 
311     if (bBalancedFilter)
312     {
313         YCoefTableSize      = POLYPHASE_Y_COEFFICIENT_TABLE_SIZE_G9;
314         UVCoefTableSize     = POLYPHASE_UV_COEFFICIENT_TABLE_SIZE_G9;
315         dwHwPhrase          = NUM_HW_POLYPHASE_TABLES_G9;
316     }
317     else
318     {
319         YCoefTableSize      = POLYPHASE_Y_COEFFICIENT_TABLE_SIZE_G8;
320         UVCoefTableSize     = POLYPHASE_UV_COEFFICIENT_TABLE_SIZE_G8;
321         dwHwPhrase          = MHW_NUM_HW_POLYPHASE_TABLES;
322     }
323 
324     fHPStrength = 0.0F;
325     piYCoefsParam   = bVertical ? pAvsParams->piYCoefsY : pAvsParams->piYCoefsX;
326     piUVCoefsParam  = bVertical ? pAvsParams->piUVCoefsY : pAvsParams->piUVCoefsX;
327     fScaleParam     = bVertical ? pAvsParams->fScaleY : pAvsParams->fScaleX;
328 
329     // Recalculate Horizontal or Vertical scaling table
330     if (SrcFormat != pAvsParams->Format || fScale != fScaleParam)
331     {
332         MOS_ZeroMemory(piYCoefsParam, YCoefTableSize);
333         MOS_ZeroMemory(piUVCoefsParam, UVCoefTableSize);
334 
335         // 4-tap filtering for RGB format G-channel if 8tap adaptive filter is not enabled.
336         Plane = (IS_RGB32_FORMAT(SrcFormat) && !b8TapAdaptiveEnable) ? MHW_U_PLANE : MHW_Y_PLANE;
337         if (bVertical)
338         {
339             pAvsParams->fScaleY = fScale;
340         }
341         else
342         {
343             pAvsParams->fScaleX = fScale;
344         }
345 
346         // For 1x scaling in horizontal direction, use special coefficients for filtering
347         // we don't do this when bForcePolyPhaseCoefs flag is set
348         if (fScale == 1.0F && !pAvsParams->bForcePolyPhaseCoefs)
349         {
350             VPHAL_RENDER_CHK_STATUS(Mhw_SetNearestModeTable(
351                 piYCoefsParam,
352                 Plane,
353                 bBalancedFilter));
354             // If the 8-tap adaptive is enabled for all channel, then UV/RB use the same coefficient as Y/G
355             // So, coefficient for UV/RB channels caculation can be passed
356             if (!b8TapAdaptiveEnable)
357             {
358                 VPHAL_RENDER_CHK_STATUS(Mhw_SetNearestModeTable(
359                     piUVCoefsParam,
360                     MHW_U_PLANE,
361                     bBalancedFilter));
362             }
363         }
364         else
365         {
366             // Clamp the Scaling Factor if > 1.0x
367             fScale = MOS_MIN(1.0F, fScale);
368 
369             VPHAL_RENDER_CHK_STATUS(Mhw_CalcPolyphaseTablesY(
370                 piYCoefsParam,
371                 fScale,
372                 Plane,
373                 SrcFormat,
374                 fHPStrength,
375                 true,
376                 dwHwPhrase,
377                 0));
378 
379             // If the 8-tap adaptive is enabled for all channel, then UV/RB use the same coefficient as Y/G
380             // So, coefficient for UV/RB channels caculation can be passed
381             if (!b8TapAdaptiveEnable)
382             {
383                 if (!bBalancedFilter)
384                 {
385                     VPHAL_RENDER_CHK_STATUS(Mhw_CalcPolyphaseTablesY(
386                         piUVCoefsParam,
387                         fScale,
388                         MHW_U_PLANE,
389                         SrcFormat,
390                         fHPStrength,
391                         true,
392                         dwHwPhrase,
393                         0));
394                 }
395                 else
396                 {
397                     // If Chroma Siting info is present
398                     if (dwChromaSiting & (bVertical ? MHW_CHROMA_SITING_VERT_TOP : MHW_CHROMA_SITING_HORZ_LEFT))
399                     {
400                         // No Chroma Siting
401                         VPHAL_RENDER_CHK_STATUS(Mhw_CalcPolyphaseTablesUV(
402                             piUVCoefsParam,
403                             2.0F,
404                             fScale));
405                     }
406                     else
407                     {
408                         // Chroma siting offset needs to be added
409                         if (dwChromaSiting & (bVertical ? MHW_CHROMA_SITING_VERT_CENTER : MHW_CHROMA_SITING_HORZ_CENTER))
410                         {
411                             iUvPhaseOffset = MOS_UF_ROUND(0.5F * 16.0F);   // U0.4
412                         }
413                         else //if (ChromaSiting & (bVertical ? MHW_CHROMA_SITING_VERT_BOTTOM : MHW_CHROMA_SITING_HORZ_RIGHT))
414                         {
415                             iUvPhaseOffset = MOS_UF_ROUND(1.0F * 16.0F);   // U0.4
416                         }
417 
418                         VPHAL_RENDER_CHK_STATUS(Mhw_CalcPolyphaseTablesUVOffset(
419                             piUVCoefsParam,
420                             3.0F,
421                             fScale,
422                             iUvPhaseOffset));
423                     }
424                 }
425             }
426         }
427     }
428 
429 finish:
430     return eStatus;
431 }
432 
433 //!
434 //! \brief    Set Sampler Avs 8x8 Table for LGCA
435 //! \details  Set Sampler Avs 8x8 Table for LGCA
436 //! \param    PRENDERHAL_INTERFACE pRenderHal
437 //!           [in] Pointer to RenderHal Interface Structure
438 //! \param    PMHW_SAMPLER_STATE_PARAM pSamplerStateParams
439 //!           [in] Pointer to Sampler State Params
440 //! \param    PMHW_AVS_PARAMS pAvsParams
441 //!           [in/out] Pointer to AVS Params
442 //! \param    MOS_FORMAT SrcFormat
443 //!           [in] Source Format
444 //! \return   MOS_STATUS
445 //!
VpHal_16AlignSetSamplerAvsTableParam(PRENDERHAL_INTERFACE pRenderHal,PMHW_SAMPLER_STATE_PARAM pSamplerStateParams,PMHW_AVS_PARAMS pAvsParams,MOS_FORMAT SrcFormat,float fScaleX,float fScaleY,uint32_t dwChromaSiting)446 static MOS_STATUS VpHal_16AlignSetSamplerAvsTableParam(
447     PRENDERHAL_INTERFACE            pRenderHal,
448     PMHW_SAMPLER_STATE_PARAM        pSamplerStateParams,
449     PMHW_AVS_PARAMS                 pAvsParams,
450     MOS_FORMAT                      SrcFormat,
451     float                           fScaleX,
452     float                           fScaleY,
453     uint32_t                        dwChromaSiting)
454 {
455     MOS_STATUS                   eStatus = MOS_STATUS_SUCCESS;
456     bool                         bBalancedFilter;
457     PMHW_SAMPLER_AVS_TABLE_PARAM pMhwSamplerAvsTableParam;
458 
459     VPHAL_RENDER_CHK_NULL(pRenderHal);
460     VPHAL_RENDER_CHK_NULL(pSamplerStateParams);
461     VPHAL_RENDER_CHK_NULL(pAvsParams);
462     if (pAvsParams->piUVCoefsX == nullptr || pAvsParams->piYCoefsY  == nullptr ||
463         pAvsParams->piYCoefsX  == nullptr || pAvsParams->piUVCoefsY == nullptr ||
464         pAvsParams             == nullptr)
465     {
466         VPHAL_RENDER_ASSERTMESSAGE("meet null ptr!");
467     }
468 
469     pMhwSamplerAvsTableParam = pSamplerStateParams->Avs.pMhwSamplerAvsTableParam;
470 
471     pMhwSamplerAvsTableParam->b8TapAdaptiveEnable         = pSamplerStateParams->Avs.b8TapAdaptiveEnable;
472     pMhwSamplerAvsTableParam->byteTransitionArea8Pixels   = MEDIASTATE_AVS_TRANSITION_AREA_8_PIXELS;
473     pMhwSamplerAvsTableParam->byteTransitionArea4Pixels   = MEDIASTATE_AVS_TRANSITION_AREA_4_PIXELS;
474     pMhwSamplerAvsTableParam->byteMaxDerivative8Pixels    = MEDIASTATE_AVS_MAX_DERIVATIVE_8_PIXELS;
475     pMhwSamplerAvsTableParam->byteMaxDerivative4Pixels    = MEDIASTATE_AVS_MAX_DERIVATIVE_4_PIXELS;
476     pMhwSamplerAvsTableParam->byteDefaultSharpnessLevel   = MEDIASTATE_AVS_SHARPNESS_LEVEL_SHARP;
477 
478     // Enable Adaptive Filtering, if it is being upscaled
479     // in either direction. we must check for this before clamping the SF.
480     if ((IS_YUV_FORMAT(SrcFormat) && (fScaleX > 1.0F || fScaleY > 1.0F)) ||
481         pMhwSamplerAvsTableParam->b8TapAdaptiveEnable)
482     {
483         pMhwSamplerAvsTableParam->bBypassXAdaptiveFiltering = false;
484         pMhwSamplerAvsTableParam->bBypassYAdaptiveFiltering = false;
485         if (pMhwSamplerAvsTableParam->b8TapAdaptiveEnable)
486         {
487             pMhwSamplerAvsTableParam->bAdaptiveFilterAllChannels = true;
488 
489             if (IS_RGB_FORMAT(SrcFormat))
490             {
491                 pMhwSamplerAvsTableParam->bEnableRGBAdaptive     = true;
492             }
493         }
494     }
495     else
496     {
497         pMhwSamplerAvsTableParam->bBypassXAdaptiveFiltering = true;
498         pMhwSamplerAvsTableParam->bBypassYAdaptiveFiltering = true;
499     }
500 
501     // No changes to AVS parameters -> skip
502     if (SrcFormat == pAvsParams->Format &&
503         fScaleX == pAvsParams->fScaleX &&
504         fScaleY == pAvsParams->fScaleY)
505     {
506         goto finish;
507     }
508 
509     // AVS Coefficients don't change for Scaling Factors > 1.0x
510     // Hence recalculation is avoided
511     if (fScaleX > 1.0F && pAvsParams->fScaleX > 1.0F)
512     {
513         pAvsParams->fScaleX = fScaleX;
514     }
515 
516     // AVS Coefficients don't change for Scaling Factors > 1.0x
517     // Hence recalculation is avoided
518     if (fScaleY > 1.0F && pAvsParams->fScaleY > 1.0F)
519     {
520         pAvsParams->fScaleY = fScaleY;
521     }
522 
523     bBalancedFilter = true;
524     // Recalculate Horizontal scaling table
525     VPHAL_HW_CHK_STATUS(VpHal_16AlignSamplerAvsCalcScalingTable(
526         SrcFormat,
527         fScaleX,
528         false,
529         dwChromaSiting,
530         bBalancedFilter,
531         pMhwSamplerAvsTableParam->b8TapAdaptiveEnable ? true : false,
532         pAvsParams));
533 
534     // Recalculate Vertical scaling table
535     VPHAL_HW_CHK_STATUS(VpHal_16AlignSamplerAvsCalcScalingTable(
536         SrcFormat,
537         fScaleY,
538         true,
539         dwChromaSiting,
540         bBalancedFilter,
541         pMhwSamplerAvsTableParam->b8TapAdaptiveEnable ? true : false,
542         pAvsParams));
543 
544     pMhwSamplerAvsTableParam->bIsCoeffExtraEnabled = true;
545     // Save format used to calculate AVS parameters
546     pAvsParams->Format                             = SrcFormat;
547     pMhwSamplerAvsTableParam->b4TapGY              = (IS_RGB32_FORMAT(SrcFormat) && !pMhwSamplerAvsTableParam->b8TapAdaptiveEnable);
548     pMhwSamplerAvsTableParam->b4TapRBUV            = (!pMhwSamplerAvsTableParam->b8TapAdaptiveEnable);
549 
550     VpHal_RenderCommonSetAVSTableParam(pAvsParams, pMhwSamplerAvsTableParam);
551 
552 finish:
553     return eStatus;
554 }
555 
556 //!
557 //! \brief    16Align setup HW states
558 //! \details  Setup HW states for 16Align
559 //! \param    PVPHAL_16_ALIGN_STATE p16AlignState
560 //!           [in] Pointer to the 16Align State
561 //! \param    PVPHAL_16_ALIGN_RENDER_DATA pRenderData
562 //!           [in/out] Pointer to 16Align render data
563 //! \return   MOS_STATUS
564 //!           Return MOS_STATUS_SUCCESS if successful, otherwise failed
565 //!
VpHal_16AlignSetSamplerStates(PVPHAL_16_ALIGN_STATE p16AlignState,PVPHAL_16_ALIGN_RENDER_DATA pRenderData)566 MOS_STATUS VpHal_16AlignSetSamplerStates(
567     PVPHAL_16_ALIGN_STATE        p16AlignState,
568     PVPHAL_16_ALIGN_RENDER_DATA  pRenderData)
569 {
570     MOS_STATUS                  eStatus;
571     PRENDERHAL_INTERFACE        pRenderHal;
572     PMHW_SAMPLER_STATE_PARAM    pSamplerStateParams;
573 
574     VPHAL_PUBLIC_CHK_NULL(p16AlignState);
575     VPHAL_PUBLIC_CHK_NULL(pRenderData);
576 
577     pRenderHal = p16AlignState->pRenderHal;
578 
579     VPHAL_PUBLIC_CHK_NULL(pRenderHal);
580     pSamplerStateParams                          = &pRenderData->SamplerStateParams;
581     pSamplerStateParams->bInUse                  = true;
582 
583     if (pRenderData->ScalingRatio_H < 0.0625f ||
584         pRenderData->ScalingRatio_V < 0.0625f)
585     {
586         p16AlignState->pSource->bUseSampleUnorm      = true;
587         pSamplerStateParams->SamplerType             = MHW_SAMPLER_TYPE_3D;
588         pSamplerStateParams->Unorm.SamplerFilterMode = MHW_SAMPLER_FILTER_BILINEAR;
589         pSamplerStateParams->Unorm.AddressU          = MHW_GFX3DSTATE_TEXCOORDMODE_CLAMP;
590         pSamplerStateParams->Unorm.AddressV          = MHW_GFX3DSTATE_TEXCOORDMODE_CLAMP;
591         pSamplerStateParams->Unorm.AddressW          = MHW_GFX3DSTATE_TEXCOORDMODE_CLAMP;
592     }
593     else
594     {
595         pSamplerStateParams->SamplerType             = MHW_SAMPLER_TYPE_AVS;
596         pSamplerStateParams->Avs.AvsType             = false;
597         pSamplerStateParams->Avs.bEnableIEF          = false;
598         pSamplerStateParams->Avs.b8TapAdaptiveEnable = false;
599         pSamplerStateParams->Avs.bHdcDwEnable        = false;
600         pSamplerStateParams->Avs.bEnableAVS          = true;
601         pSamplerStateParams->Avs.WeakEdgeThr         = DETAIL_WEAK_EDGE_THRESHOLD;
602         pSamplerStateParams->Avs.StrongEdgeThr       = DETAIL_STRONG_EDGE_THRESHOLD;
603         pSamplerStateParams->Avs.StrongEdgeWght      = DETAIL_STRONG_EDGE_WEIGHT;
604         pSamplerStateParams->Avs.RegularWght         = DETAIL_REGULAR_EDGE_WEIGHT;
605         pSamplerStateParams->Avs.NonEdgeWght         = DETAIL_NON_EDGE_WEIGHT;
606         pSamplerStateParams->Avs.pMhwSamplerAvsTableParam = &p16AlignState->mhwSamplerAvsTableParam;
607 
608         VPHAL_RENDER_CHK_STATUS(VpHal_16AlignSetSamplerAvsTableParam(
609                         pRenderHal,
610                         pSamplerStateParams,
611                         pRenderData->pAVSParameters,
612                         p16AlignState->pSource->Format,
613                         pRenderData->ScalingRatio_H,
614                         pRenderData->ScalingRatio_V,
615                         MHW_CHROMA_SITING_HORZ_LEFT | MHW_CHROMA_SITING_VERT_TOP));
616     }
617 
618 
619     eStatus = pRenderHal->pfnSetSamplerStates(
620         pRenderHal,
621         pRenderData->iMediaID,
622         pSamplerStateParams,
623         1);
624 
625 finish:
626     return eStatus;
627 }
628 
629 //!
630 //! \brief    16Align setup HW states
631 //! \details  Setup HW states for 16Align
632 //! \param    PVPHAL_16_ALIGN_STATE p16AlignState
633 //!           [in] Pointer to the 16Align State
634 //! \param    PVPHAL_16_ALIGN_RENDER_DATA pRenderData
635 //!           [in/out] Pointer to 16Align render data
636 //! \return   MOS_STATUS
637 //!           Return MOS_STATUS_SUCCESS if successful, otherwise failed
638 //!
VpHal_16AlignSetupHwStates(PVPHAL_16_ALIGN_STATE p16AlignState,PVPHAL_16_ALIGN_RENDER_DATA pRenderData)639 MOS_STATUS VpHal_16AlignSetupHwStates(
640     PVPHAL_16_ALIGN_STATE        p16AlignState,
641     PVPHAL_16_ALIGN_RENDER_DATA  pRenderData)
642 {
643     PRENDERHAL_INTERFACE        pRenderHal;
644     int32_t                     iKrnAllocation;
645     int32_t                     iCurbeOffset;
646     MOS_STATUS                  eStatus;
647     int32_t                     iThreadCount;
648     MHW_KERNEL_PARAM            MhwKernelParam;
649 
650     VPHAL_RENDER_CHK_NULL(p16AlignState);
651     VPHAL_RENDER_CHK_NULL(pRenderData);
652 
653     eStatus                     = MOS_STATUS_SUCCESS;
654     pRenderHal                  = p16AlignState->pRenderHal;
655     VPHAL_RENDER_CHK_NULL(pRenderHal);
656 
657     // Allocate and reset media state
658     pRenderData->pMediaState = pRenderHal->pfnAssignMediaState(pRenderHal, (RENDERHAL_COMPONENT)RENDERHAL_COMPONENT_16ALIGN);
659     VPHAL_RENDER_CHK_NULL(pRenderData->pMediaState);
660 
661     // Allocate and reset SSH instance
662     VPHAL_RENDER_CHK_STATUS(pRenderHal->pfnAssignSshInstance(pRenderHal));
663 
664     // Assign and Reset Binding Table
665     VPHAL_RENDER_CHK_STATUS(pRenderHal->pfnAssignBindingTable(
666             pRenderHal,
667             &pRenderData->iBindingTable));
668 
669     // Setup surface states
670     VPHAL_RENDER_CHK_STATUS(p16AlignState->pfnSetupSurfaceStates(
671             p16AlignState,
672             pRenderData));
673 
674     // load static data
675     VPHAL_RENDER_CHK_STATUS(p16AlignState->pfnLoadStaticData(
676             p16AlignState,
677             pRenderData,
678             &iCurbeOffset));
679 
680     if (p16AlignState->pPerfData->CompMaxThreads.bEnabled)
681     {
682         iThreadCount = p16AlignState->pPerfData->CompMaxThreads.uiVal;
683     }
684     else
685     {
686         iThreadCount = pRenderData->pKernelParam->Thread_Count;
687     }
688 
689     // Setup VFE State params.
690     VPHAL_RENDER_CHK_STATUS(pRenderHal->pfnSetVfeStateParams(
691         pRenderHal,
692         MEDIASTATE_DEBUG_COUNTER_FREE_RUNNING,
693         iThreadCount,
694         pRenderData->iCurbeLength,
695         pRenderData->iInlineLength,
696         nullptr));
697 
698     // Load kernel to GSH
699     INIT_MHW_KERNEL_PARAM(MhwKernelParam, &pRenderData->KernelEntry);
700     iKrnAllocation = pRenderHal->pfnLoadKernel(
701         pRenderHal,
702         pRenderData->pKernelParam,
703         &MhwKernelParam,
704         nullptr);
705 
706     if (iKrnAllocation < 0)
707     {
708         eStatus = MOS_STATUS_UNKNOWN;
709         goto finish;
710     }
711 
712     // Allocate Media ID, link to kernel
713     pRenderData->iMediaID = pRenderHal->pfnAllocateMediaID(
714         pRenderHal,
715         iKrnAllocation,
716         pRenderData->iBindingTable,
717         iCurbeOffset,
718         (pRenderData->pKernelParam->CURBE_Length << 5),
719         0,
720         nullptr);
721 
722     if (pRenderData->iMediaID < 0)
723     {
724         eStatus = MOS_STATUS_UNKNOWN;
725         goto finish;
726     }
727 
728     // Set Sampler states for this Media ID
729     VPHAL_RENDER_CHK_STATUS(p16AlignState->pfnSetSamplerStates(
730         p16AlignState,
731         pRenderData));
732 
733 finish:
734     VPHAL_RENDER_ASSERT(eStatus == MOS_STATUS_SUCCESS);
735     return eStatus;
736 }
737 
738 //!
739 //! \brief    16Align media walker setup
740 //! \details  Media walker setup for bitcopy
741 //! \param    PVPHAL_16_ALIGN_STATE p16AlignState
742 //!           [in] Pointer to the 16Align State
743 //! \param    PVPHAL_16_ALIGN_RENDER_DATA pRenderData
744 //!           [in] Pointer to 16Align render data
745 //! \param    PMHW_WALKER_PARAMS pWalkerParams
746 //!           [in/out] Pointer to Walker params
747 //! \return   MOS_STATUS
748 //!           Return MOS_STATUS_SUCCESS if successful, otherwise failed
749 //!
VpHal_16AlignRenderMediaWalker(PVPHAL_16_ALIGN_STATE p16AlignState,PVPHAL_16_ALIGN_RENDER_DATA pRenderData,PMHW_WALKER_PARAMS pWalkerParams)750 MOS_STATUS VpHal_16AlignRenderMediaWalker(
751     PVPHAL_16_ALIGN_STATE        p16AlignState,
752     PVPHAL_16_ALIGN_RENDER_DATA  pRenderData,
753     PMHW_WALKER_PARAMS               pWalkerParams)
754 {
755     PRENDERHAL_INTERFACE            pRenderHal;
756     uint32_t                        dwWidth;
757     uint32_t                        dwHeight;
758     MOS_STATUS                      eStatus;
759 
760     eStatus     = MOS_STATUS_SUCCESS;
761     pRenderHal  = p16AlignState->pRenderHal;
762 
763     // Calculate how many media object commands are needed.
764     dwWidth  = MOS_ALIGN_CEIL((p16AlignState->pTarget->rcDst.right -
765                            p16AlignState->pTarget->rcDst.left),
766                            pRenderData->pKernelParam->block_width);
767     dwHeight = MOS_ALIGN_CEIL((p16AlignState->pTarget->rcDst.bottom -
768                            p16AlignState->pTarget->rcDst.top),
769                            pRenderData->pKernelParam->block_height);
770 
771     pRenderData->iBlocksX = dwWidth  / pRenderData->pKernelParam->block_width;
772     pRenderData->iBlocksY = dwHeight / pRenderData->pKernelParam->block_height;
773 
774     // Set walker cmd params - Rasterscan
775     MOS_ZeroMemory(pWalkerParams, sizeof(*pWalkerParams));
776 
777     pWalkerParams->InterfaceDescriptorOffset    = pRenderData->iMediaID;
778 
779     pWalkerParams->dwGlobalLoopExecCount        = 1;
780     pWalkerParams->dwLocalLoopExecCount         = pRenderData->iBlocksY - 1;
781 
782     pWalkerParams->GlobalResolution.x           = pRenderData->iBlocksX;
783     pWalkerParams->GlobalResolution.y           = pRenderData->iBlocksY;
784 
785     pWalkerParams->GlobalStart.x                = 0;
786     pWalkerParams->GlobalStart.y                = 0;
787 
788     pWalkerParams->GlobalOutlerLoopStride.x     = pRenderData->iBlocksX;
789     pWalkerParams->GlobalOutlerLoopStride.y     = 0;
790 
791     pWalkerParams->GlobalInnerLoopUnit.x        = 0;
792     pWalkerParams->GlobalInnerLoopUnit.y        = pRenderData->iBlocksY;
793 
794     pWalkerParams->BlockResolution.x            = pRenderData->iBlocksX;
795     pWalkerParams->BlockResolution.y            = pRenderData->iBlocksY;
796 
797     pWalkerParams->LocalStart.x                 = 0;
798     pWalkerParams->LocalStart.y                 = 0;
799 
800     pWalkerParams->LocalEnd.x                   = pRenderData->iBlocksX - 1;
801     pWalkerParams->LocalEnd.y                   = 0;
802 
803     pWalkerParams->LocalOutLoopStride.x         = 0;
804     pWalkerParams->LocalOutLoopStride.y         = 1;
805 
806     pWalkerParams->LocalInnerLoopUnit.x         = 1;
807     pWalkerParams->LocalInnerLoopUnit.y         = 0;
808 
809     return eStatus;
810 }
811 
812 //!
813 //! \brief    16Align renderer
814 //! \details  Renderer function for 16Align
815 //! \param    PVPHAL_16_ALIGN_STATE p16AlignState
816 //!           [in] Pointer to the 16Align State
817 //! \param    PVPHAL_RENDER_PARAMS pRenderParams
818 //!           [in] Pointer to 16Align render params
819 //! \return   MOS_STATUS
820 //!           Return MOS_STATUS_SUCCESS if successful, otherwise failed
821 //!
VpHal_16AlignRender(PVPHAL_16_ALIGN_STATE p16AlignState,PVPHAL_RENDER_PARAMS pRenderParams)822 MOS_STATUS VpHal_16AlignRender(
823     PVPHAL_16_ALIGN_STATE    p16AlignState,
824     PVPHAL_RENDER_PARAMS     pRenderParams)
825 {
826     MOS_STATUS                              eStatus;
827     PRENDERHAL_INTERFACE                    pRenderHal;
828     PMOS_INTERFACE                          pOsInterface;
829     MHW_WALKER_PARAMS                       WalkerParams;
830     VPHAL_16_ALIGN_RENDER_DATA              RenderData;
831     PRENDERHAL_L3_CACHE_SETTINGS            pCacheSettings = nullptr;
832     uint32_t                                dwInputRegionHeight;
833     uint32_t                                dwInputRegionWidth;
834     uint32_t                                dwOutputRegionHeight;
835     uint32_t                                dwOutputRegionWidth;
836 
837     VPHAL_RENDER_ASSERT(p16AlignState);
838     VPHAL_RENDER_ASSERT(pRenderParams);
839     VPHAL_RENDER_ASSERT(p16AlignState->pOsInterface);
840     VPHAL_RENDER_ASSERT(p16AlignState->pRenderHal);
841     VPHAL_RENDER_ASSERT(p16AlignState->pPerfData);
842 
843     eStatus                     = MOS_STATUS_SUCCESS;
844     pOsInterface                = p16AlignState->pOsInterface;
845     pRenderHal                  = p16AlignState->pRenderHal;
846     MOS_ZeroMemory(&RenderData, sizeof(RenderData));
847 
848     // Reset reporting
849     p16AlignState->Reporting.InitReportValue();
850 
851     // Reset states before rendering
852     pOsInterface->pfnResetOsStates(pOsInterface);
853     VPHAL_RENDER_CHK_STATUS(pRenderHal->pfnReset(pRenderHal));
854     pOsInterface->pfnResetPerfBufferID(pOsInterface);   // reset once per frame
855 
856     VPHAL_DBG_STATE_DUMPPER_SET_CURRENT_STAGE(VPHAL_DBG_STAGE_COMP);
857 
858     // Configure cache settings for this render operation
859     pCacheSettings      = &pRenderHal->L3CacheSettings;
860     MOS_ZeroMemory(pCacheSettings, sizeof(*pCacheSettings));
861     pCacheSettings->bOverride                  = true;
862     pCacheSettings->bL3CachingEnabled          = p16AlignState->SurfMemObjCtl.bL3CachingEnabled;
863 
864     if (p16AlignState->pPerfData->L3SQCReg1Override.bEnabled)
865     {
866         pCacheSettings->bSqcReg1Override       = true;
867         pCacheSettings->dwSqcReg1              = p16AlignState->pPerfData->L3SQCReg1Override.uiVal;
868     }
869 
870     if (p16AlignState->pPerfData->L3CntlReg2Override.bEnabled)
871     {
872         pCacheSettings->bCntlReg2Override      = true;
873         pCacheSettings->dwCntlReg2             = p16AlignState->pPerfData->L3CntlReg2Override.uiVal;
874     }
875 
876     if (p16AlignState->pPerfData->L3CntlReg3Override.bEnabled)
877     {
878         pCacheSettings->bCntlReg3Override      = true;
879         pCacheSettings->dwCntlReg3             = p16AlignState->pPerfData->L3CntlReg3Override.uiVal;
880     }
881 
882     if (p16AlignState->pPerfData->L3LRA1RegOverride.bEnabled)
883     {
884         pCacheSettings->bLra1RegOverride       = true;
885         pCacheSettings->dwLra1Reg              = p16AlignState->pPerfData->L3LRA1RegOverride.uiVal;
886     }
887 
888     // Setup Source/Target surface and get the Source width/height for
889     p16AlignState->pSource           = pRenderParams->pSrc[0];
890     p16AlignState->pTarget           = pRenderParams->pTarget[0];
891     dwInputRegionWidth               = p16AlignState->pSource->rcSrc.right  - p16AlignState->pSource->rcSrc.left;
892     dwInputRegionHeight              = p16AlignState->pSource->rcSrc.bottom - p16AlignState->pSource->rcSrc.top;
893     dwOutputRegionWidth              = p16AlignState->pSource->rcDst.right  - p16AlignState->pSource->rcDst.left;
894     dwOutputRegionHeight             = p16AlignState->pSource->rcDst.bottom - p16AlignState->pSource->rcDst.top;
895 
896     RenderData.ScalingRatio_H       = (float)dwOutputRegionWidth / (float)dwInputRegionWidth;
897     RenderData.ScalingRatio_V       = (float)dwOutputRegionHeight / (float)dwInputRegionHeight;
898 
899     RenderData.pAVSParameters = &p16AlignState->AVSParameters;
900     RenderData.SamplerStateParams.Avs.pMhwSamplerAvsTableParam = &RenderData.mhwSamplerAvsTableParam;
901 
902     p16AlignState->pKernelParamTable = (PRENDERHAL_KERNEL_PARAM)&g_16Align_MW_KernelParam[0];
903 
904     // Ensure input can be read
905     pOsInterface->pfnSyncOnResource(
906         pOsInterface,
907         &p16AlignState->pSource->OsResource,
908         pOsInterface->CurrentGpuContextOrdinal,
909         false);
910 
911     // Ensure the output can be written
912     pOsInterface->pfnSyncOnResource(
913         pOsInterface,
914         &p16AlignState->pTarget->OsResource,
915         pOsInterface->CurrentGpuContextOrdinal,
916         true);
917 
918     // Setup copy kernel
919     VPHAL_RENDER_CHK_STATUS(p16AlignState->pfnSetupKernel(
920             p16AlignState,
921             &RenderData));
922 
923     // Submit HW States and Commands
924     VPHAL_RENDER_CHK_STATUS(VpHal_16AlignSetupHwStates(
925             p16AlignState,
926             &RenderData));
927 
928     // Set perftag information
929     pOsInterface->pfnResetPerfBufferID(pOsInterface);
930     pOsInterface->pfnSetPerfTag(pOsInterface, RenderData.PerfTag);
931 
932     VPHAL_RENDER_CHK_STATUS(VpHal_16AlignRenderMediaWalker(
933             p16AlignState,
934             &RenderData,
935             &WalkerParams));
936 
937     VPHAL_DBG_STATE_DUMPPER_DUMP_GSH(pRenderHal);
938     VPHAL_DBG_STATE_DUMPPER_DUMP_SSH(pRenderHal);
939 
940     VPHAL_RENDER_CHK_STATUS(VpHal_RndrSubmitCommands(
941         pRenderHal,
942         nullptr,
943         p16AlignState->bNullHwRender16Align,
944         &WalkerParams,
945         nullptr,
946         &p16AlignState->StatusTableUpdateParams,
947         kernelUserPtr,
948         0,
949         nullptr,
950         true));
951 
952 finish:
953     MOS_ZeroMemory(pCacheSettings, sizeof(*pCacheSettings));
954     VPHAL_RENDER_ASSERT(eStatus == MOS_STATUS_SUCCESS);
955     VPHAL_RENDER_NORMALMESSAGE("finished UsrPtr process!");
956     return eStatus;
957 }
958 
959 //!
960 //! \brief    16Align Destroy state
961 //! \details  Function to destroy 16Align state
962 //! \param    PVPHAL_16_ALIGN_STATE p16AlignState
963 //!           [in] Pointer to the 16Align State
964 //! \return   MOS_STATUS
965 //!           Return MOS_STATUS_SUCCESS if successful, otherwise failed
966 //!
VpHal_16AlignDestroy(PVPHAL_16_ALIGN_STATE p16AlignState)967 MOS_STATUS VpHal_16AlignDestroy(
968     PVPHAL_16_ALIGN_STATE    p16AlignState)
969 {
970     MOS_STATUS eStatus;
971     eStatus = MOS_STATUS_SUCCESS;
972     VPHAL_RENDER_CHK_NULL(p16AlignState);
973     VpHal_RenderDestroyAVSParams(&p16AlignState->AVSParameters);
974     MOS_UNUSED(p16AlignState);
975 
976 finish:
977     return eStatus;
978 }
979 
980 //!
981 //! \brief    16Align kernel state Initializations
982 //! \details  Kernel state Initializations for 16Align
983 //! \param    PVPHAL_16_ALIGN_STATE p16AlignState
984 //!           [in] Pointer to the 16Align State
985 //! \param    const VphalSettings* pSettings
986 //!           [in] Pointer to VPHAL Setting
987 //! \param    Kdll_State pKernelDllState
988 //!           [in/out] Pointer to bitcopy kernel Dll state
989 //! \return   MOS_STATUS
990 //!           Return MOS_STATUS_SUCCESS if successful, otherwise failed
991 //!
VpHal_16AlignInitialize(PVPHAL_16_ALIGN_STATE p16AlignState,const VphalSettings * pSettings,Kdll_State * pKernelDllState)992 MOS_STATUS VpHal_16AlignInitialize(
993     PVPHAL_16_ALIGN_STATE    p16AlignState,
994     const VphalSettings      *pSettings,
995     Kdll_State               *pKernelDllState)
996 {
997     MOS_NULL_RENDERING_FLAGS  NullRenderingFlags;
998 
999     VPHAL_RENDER_ASSERT(p16AlignState);
1000     VPHAL_RENDER_ASSERT(p16AlignState->pOsInterface);
1001 
1002     NullRenderingFlags            =
1003                     p16AlignState->pOsInterface->pfnGetNullHWRenderFlags(p16AlignState->pOsInterface);
1004     p16AlignState->bNullHwRender16Align =
1005                     NullRenderingFlags.VPLgca ||
1006                     NullRenderingFlags.VPGobal;
1007 
1008     // Setup interface to KDLL
1009     p16AlignState->pKernelDllState   = pKernelDllState;
1010     VpHal_RenderInitAVSParams(&p16AlignState->AVSParameters,
1011             POLYPHASE_Y_COEFFICIENT_TABLE_SIZE_G9,
1012             POLYPHASE_UV_COEFFICIENT_TABLE_SIZE_G9);
1013 
1014     return MOS_STATUS_SUCCESS;
1015 }
1016 
1017 //!
1018 //! \brief    Set Surface for HW Access
1019 //! \details  Common Function for setting up surface state, need to use this function
1020 //!           if render would use CP HM
1021 //! \param    [in] bSrc
1022 //!           indicate the surface is input source.
1023 //! \param    [in] pRenderHal
1024 //!           Pointer to RenderHal Interface Structure
1025 //! \param    [in] pSurface
1026 //!           Pointer to Surface
1027 //! \param    [in] pRenderSurface
1028 //!           Pointer to Render Surface
1029 //! \param    [in] pSurfaceParams
1030 //!           Pointer to RenderHal Surface Params
1031 //! \param    [in] PVPHAL_16_ALIGN_RENDER_DATA
1032 //!           Pointer to Rendering data
1033 //! \return   MOS_STATUS
1034 //!           MOS_STATUS_SUCCESS if success. Error code otherwise
1035 //!
VpHal_16AlignSetupSurfaceStatesInt(bool bSrc,PRENDERHAL_INTERFACE pRenderHal,PVPHAL_SURFACE pSurface,PRENDERHAL_SURFACE pRenderSurface,PRENDERHAL_SURFACE_STATE_PARAMS pSurfaceParams,PVPHAL_16_ALIGN_RENDER_DATA pRenderData)1036 MOS_STATUS VpHal_16AlignSetupSurfaceStatesInt(
1037     bool                                bSrc,
1038     PRENDERHAL_INTERFACE                pRenderHal,
1039     PVPHAL_SURFACE                      pSurface,
1040     PRENDERHAL_SURFACE                  pRenderSurface,
1041     PRENDERHAL_SURFACE_STATE_PARAMS     pSurfaceParams,
1042     PVPHAL_16_ALIGN_RENDER_DATA         pRenderData)
1043 {
1044     MOS_STATUS                          eStatus = MOS_STATUS_SUCCESS;
1045     PRENDERHAL_SURFACE_STATE_ENTRY      pSurfaceEntry;
1046     MOS_FORMAT                          format  = pSurface->Format;
1047     uint32_t                            width   = pSurface->dwWidth;
1048 #if defined(LINUX) && !defined(WDDM_LINUX)
1049     uint32_t                            dwSize  = pSurface->dwHeight * pSurface->OsResource.iPitch;
1050 #else
1051     uint32_t                            dwSize  = pSurface->dwHeight * pSurface->dwPitch;
1052 #endif
1053 
1054     if (!bSrc && pSurface->b16UsrPtr)
1055     {
1056         // system linear surface.
1057         // reset the output surface format as Raw and calculate the surface size.
1058         pSurface->Format      = Format_RAW;
1059         switch (format)
1060         {
1061             case Format_NV12:
1062                 for (int i = 0; i < 2; i++)
1063                 {
1064                     pSurface->dwWidth = (i==0)?dwSize:dwSize/2;
1065                     VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetBufferSurfaceForHwAccess(
1066                         pRenderHal,
1067                         pSurface,
1068                         pRenderSurface,
1069                         pSurfaceParams,
1070                         pRenderData->iBindingTable,
1071                         ((i==0)?ALIGN16_TRG_Y_INDEX:ALIGN16_TRG_UV_INDEX),
1072                         bSrc?false:true));
1073                     // add UV offset which was missed in raw buffer common configuration.
1074                     if (i > 0)
1075                     {
1076                         pSurfaceEntry   = &pRenderHal->pStateHeap->pSurfaceEntry[pRenderHal->pStateHeap->iCurrentSurfaceState-1]; // fetch the surface plane
1077                         pSurfaceEntry->SurfaceToken.DW2.SurfaceOffset = dwSize;
1078                     }
1079                 }
1080                 break;
1081             case Format_YUY2:
1082                 pSurface->dwWidth = dwSize * 2;
1083                 VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetBufferSurfaceForHwAccess(
1084                     pRenderHal,
1085                     pSurface,
1086                     pRenderSurface,
1087                     pSurfaceParams,
1088                     pRenderData->iBindingTable,
1089                     ALIGN16_TRG_INDEX,
1090                     bSrc?false:true));
1091                 break;
1092             case Format_YV12:
1093                 // YV12 should be allocated as 3 linear buffer for every Y U V output plane.
1094                 for (int i = 0; i < 3; i++)
1095                 {
1096                     pSurface->dwWidth = (i == 0)?dwSize:dwSize/4;
1097                     VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetBufferSurfaceForHwAccess(
1098                         pRenderHal,
1099                         pSurface,
1100                         pRenderSurface,
1101                         pSurfaceParams,
1102                         pRenderData->iBindingTable,
1103                         (i==0)?ALIGN16_TRG_Y_INDEX:((i==1)?ALIGN16_TRG_V_INDEX:ALIGN16_TRG_U_INDEX),
1104                         bSrc?false:true));
1105                     // add U, V offset which was missed in raw buffer common configuration.
1106                     // recalculate U, V offset based on 16aligned pitch.
1107                     if (i > 0)
1108                     {
1109                         pSurfaceEntry   = &pRenderHal->pStateHeap->pSurfaceEntry[pRenderHal->pStateHeap->iCurrentSurfaceState-1]; // fetch the surface plane
1110                         pSurfaceEntry->SurfaceToken.DW2.SurfaceOffset = (i == 1)?(dwSize*5/4):dwSize;
1111                     }
1112                 }
1113                 break;
1114             default:
1115                 VPHAL_RENDER_ASSERTMESSAGE("16 align output format doesn't support.");
1116                 eStatus = MOS_STATUS_INVALID_PARAMETER;
1117                 break;
1118         }
1119         // resotre the target format and width for curbe data.
1120         pSurface->Format      = format;
1121         pSurface->dwWidth     = width;
1122     }
1123     else
1124     {
1125         // input source keep using 2D surface foramt. set tile mode as linear.
1126         // VA 2D surface
1127         VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetSurfaceForHwAccess(
1128             pRenderHal,
1129             pSurface,
1130             pRenderSurface,
1131             pSurfaceParams,
1132             pRenderData->iBindingTable,
1133             bSrc?ALIGN16_SRC_INDEX:ALIGN16_TRG_INDEX,
1134             bSrc?false:true));
1135         // for 1 sampler access YV12 3plane input, Y plane should use the R8 sampler type, the same as U,V plane
1136         // for 3 samplers access YV12 3plane input, Y plane should use Y8 sampler type
1137         // 16-alignment kernel always uses 1-sampler, legacy FC kernel always uses 3-sampler
1138         if (pSurface->Format == Format_YV12)
1139         {
1140             uint32_t * pSrcPlaneYSampler  = nullptr;
1141             pSurfaceEntry       = &pRenderHal->pStateHeap->pSurfaceEntry[0];   // input Y plane
1142             pSrcPlaneYSampler   = (uint32_t*)pSurfaceEntry->pSurfaceState + 2; // DW2
1143             *pSrcPlaneYSampler  = (*pSrcPlaneYSampler & 0x07FFFFFF) | (0x0B<<27);
1144             if (pSurface->b16UsrPtr)
1145             {
1146                 // correct the input surface index, from YVU to YUV.
1147                 pSurfaceEntry   = &pRenderHal->pStateHeap->pSurfaceEntry[1];
1148                 VPHAL_RENDER_CHK_STATUS(pRenderHal->pfnBindSurfaceState(pRenderHal, pRenderData->iBindingTable,
1149                     ALIGN16_SRC_V_INDEX, pSurfaceEntry));
1150                 pSurfaceEntry   = &pRenderHal->pStateHeap->pSurfaceEntry[2];
1151                 VPHAL_RENDER_CHK_STATUS(pRenderHal->pfnBindSurfaceState(pRenderHal, pRenderData->iBindingTable,
1152                     ALIGN16_SRC_U_INDEX, pSurfaceEntry));
1153             }
1154         }
1155         if (bSrc)
1156         {
1157             pSurfaceEntry              = &pRenderHal->pStateHeap->pSurfaceEntry[0];
1158             pRenderData->dwSurfStateHt = pSurfaceEntry->dwHeight;
1159             pRenderData->dwSurfStateWd = pSurfaceEntry->dwWidth;
1160         }
1161     }
1162 finish:
1163     VPHAL_RENDER_ASSERT(eStatus == MOS_STATUS_SUCCESS);
1164     return eStatus;
1165 }
1166 
1167 //!
1168 //! \brief    16alignment setup surface states
1169 //! \details  Setup surface states for 16Align
1170 //! \param    PVPHAL_16_ALIGN_STATE p16AlignState
1171 //!           [in] Pointer to the 16Align State
1172 //! \param    PVPHAL_16_ALIGN_RENDER_DATA pRenderData
1173 //!           [in] Pointer to 16Align render data
1174 //! \return   MOS_STATUS
1175 //!           Return MOS_STATUS_SUCCESS if successful, otherwise failed
1176 //!
VpHal_16AlignSetupSurfaceStates(PVPHAL_16_ALIGN_STATE p16AlignState,PVPHAL_16_ALIGN_RENDER_DATA pRenderData)1177 MOS_STATUS VpHal_16AlignSetupSurfaceStates(
1178     PVPHAL_16_ALIGN_STATE        p16AlignState,
1179     PVPHAL_16_ALIGN_RENDER_DATA  pRenderData)
1180 {
1181     PRENDERHAL_INTERFACE            pRenderHal;
1182     RENDERHAL_SURFACE_STATE_PARAMS  SurfaceParams;
1183     MOS_STATUS                      eStatus;
1184     PRENDERHAL_SURFACE_STATE_ENTRY  pSurfaceEntry;
1185 
1186     eStatus             = MOS_STATUS_SUCCESS;
1187     pRenderHal          = p16AlignState->pRenderHal;
1188 
1189     // Source surface
1190     MOS_ZeroMemory(&SurfaceParams, sizeof(SurfaceParams));
1191 
1192     if (pRenderData->ScalingRatio_H < 0.0625f ||
1193         pRenderData->ScalingRatio_V < 0.0625f)
1194     {
1195         SurfaceParams.bAVS          = false;
1196     }
1197     else
1198     {
1199         SurfaceParams.bAVS          = true;
1200     }
1201     SurfaceParams.Boundary          = RENDERHAL_SS_BOUNDARY_SRCRECT;
1202     SurfaceParams.isOutput     = false;
1203     SurfaceParams.MemObjCtl         =
1204         p16AlignState->SurfMemObjCtl.SourceSurfMemObjCtl;
1205     SurfaceParams.Type              = RENDERHAL_SURFACE_TYPE_ADV_G9;
1206     SurfaceParams.bWidthInDword_Y   = false;
1207     SurfaceParams.bWidthInDword_UV  = false;
1208     SurfaceParams.bWidth16Align     = false;
1209 
1210     VPHAL_RENDER_CHK_STATUS(VpHal_16AlignSetupSurfaceStatesInt(true,
1211         pRenderHal,
1212         p16AlignState->pSource,
1213         &p16AlignState->RenderHalSource,
1214         &SurfaceParams,
1215         pRenderData));
1216 
1217     // Target surface
1218     SurfaceParams.MemObjCtl         =
1219         p16AlignState->SurfMemObjCtl.TargetSurfMemObjCtl;
1220     SurfaceParams.Type              = pRenderHal->SurfaceTypeDefault;
1221     SurfaceParams.isOutput     = true;
1222     SurfaceParams.bAVS              = false;
1223     SurfaceParams.Boundary          = RENDERHAL_SS_BOUNDARY_DSTRECT;
1224 
1225     VPHAL_RENDER_CHK_STATUS(VpHal_16AlignSetupSurfaceStatesInt(false,
1226         pRenderHal,
1227         p16AlignState->pTarget,
1228         &p16AlignState->RenderHalTarget,
1229         &SurfaceParams,
1230         pRenderData));
1231 
1232 finish:
1233     VPHAL_RENDER_ASSERT(eStatus == MOS_STATUS_SUCCESS);
1234     return eStatus;
1235 }
1236 
1237 //!
1238 //! \brief    16Align interface Initializations
1239 //! \details  Interface Initializations for 16Align
1240 //! \param    PVPHAL_16_ALIGN_STATE p16AlignState
1241 //!           [in] Pointer to the 16Align State
1242 //! \param    PRENDERHAL_INTERFACE pRenderHal
1243 //!           [in/out] Pointer to RenderHal Interface Structure
1244 //! \return   MOS_STATUS
1245 //!           Return MOS_STATUS_SUCCESS if successful, otherwise failed
1246 //!
VpHal_16AlignInitInterface(PVPHAL_16_ALIGN_STATE p16AlignState,PRENDERHAL_INTERFACE pRenderHal)1247 MOS_STATUS VpHal_16AlignInitInterface(
1248     PVPHAL_16_ALIGN_STATE    p16AlignState,
1249     PRENDERHAL_INTERFACE        pRenderHal)
1250 {
1251     PMOS_INTERFACE                  pOsInterface;
1252     MOS_STATUS                      eStatus;
1253 
1254     eStatus      = MOS_STATUS_SUCCESS;
1255     pOsInterface = pRenderHal->pOsInterface;
1256 
1257     // Connect renderer to other VPHAL components (HW/OS interfaces)
1258     p16AlignState->pRenderHal      = pRenderHal;
1259     p16AlignState->pOsInterface    = pOsInterface;
1260     p16AlignState->pSkuTable       = pRenderHal->pSkuTable;
1261 
1262     // Setup functions
1263     p16AlignState->pfnInitialize         = VpHal_16AlignInitialize;
1264     p16AlignState->pfnDestroy            = VpHal_16AlignDestroy;
1265     p16AlignState->pfnRender             = VpHal_16AlignRender;
1266     p16AlignState->pfnSetupSurfaceStates = VpHal_16AlignSetupSurfaceStates;
1267 
1268     // States
1269     p16AlignState->bFtrMediaWalker       =
1270         p16AlignState->pRenderHal->pfnGetMediaWalkerStatus(p16AlignState->pRenderHal) ? true : false;
1271 
1272     p16AlignState->pfnLoadStaticData     = VpHal_16AlignLoadStaticData;
1273     p16AlignState->pfnSetupKernel        = VpHal_16AlignSetupKernel;
1274     p16AlignState->pfnSetSamplerStates   = VpHal_16AlignSetSamplerStates;
1275 
1276     return eStatus;
1277 }
1278 
1279 //!
1280 //! \brief    check 16 bytes alignment whether can be processed
1281 //! \details  check 16 bytes alignment whether can be processed
1282 //! \param    PVPHAL_RENDER_PARAMS  pRenderParams
1283 //!           [in] Pointer to VPHAL render parameter
1284 //! \return   bool
1285 //!           Return true if 16 bytes alignment can be processed, otherwise false
1286 //!
VpHal_RndrIs16Align(PVPHAL_16_ALIGN_STATE p16AlignState,PVPHAL_RENDER_PARAMS pRenderParams)1287 bool VpHal_RndrIs16Align(
1288     PVPHAL_16_ALIGN_STATE   p16AlignState,
1289     PVPHAL_RENDER_PARAMS    pRenderParams)
1290 {
1291     PVPHAL_SURFACE  pSource;
1292     PVPHAL_SURFACE  pTarget;
1293     bool            b16alignment = false;
1294 
1295     pSource = pRenderParams->pSrc[0];
1296     pTarget = pRenderParams->pTarget[0];
1297 
1298     if (!GFX_IS_RENDERCORE(p16AlignState->pRenderHal->Platform, IGFX_GEN9_CORE))
1299     {
1300         VPHAL_RENDER_ASSERTMESSAGE("Invalid 16UserPtr platforms!");
1301         return false;
1302     }
1303 
1304     if (pRenderParams->uSrcCount == 1                           &&
1305         pRenderParams->uDstCount == 1                           &&
1306         pRenderParams->pConstriction == nullptr                 &&
1307         (pSource->pBlendingParams == nullptr                    ||
1308          (pSource->pBlendingParams != nullptr                   &&
1309           pSource->pBlendingParams->BlendType == BLEND_NONE))   &&
1310         pSource->pLumaKeyParams == nullptr                      &&
1311         pSource->pProcampParams == nullptr                      &&
1312         pSource->pIEFParams == nullptr                          &&
1313         pSource->bInterlacedScaling == false                    &&
1314         pSource->bFieldWeaving == false                         &&
1315         pSource->pDenoiseParams == nullptr                      &&
1316         pSource->pColorPipeParams == nullptr                    &&
1317         !(pSource->pDeinterlaceParams                           &&
1318           pSource->pDeinterlaceParams->DIMode == DI_MODE_BOB))
1319     {
1320         b16alignment = ((pSource->Format == Format_NV12         ||
1321                          pSource->Format == Format_YUY2         ||
1322                          pSource->Format == Format_YV12)        &&
1323                         (pTarget->Format == Format_NV12         ||
1324                          pTarget->Format == Format_YUY2         ||
1325                          pTarget->Format == Format_YV12         ||
1326                          pTarget->Format == Format_A8R8G8B8));
1327         if (pSource->b16UsrPtr && pSource->TileType != MOS_TILE_LINEAR)
1328         {
1329             b16alignment = false;
1330         }
1331 
1332     }
1333     VPHAL_RENDER_NORMALMESSAGE("%s support(s) %s %s %s surface convert to %s %s surface",
1334         b16alignment?"16UsrPtr":"16UsrPtr doesn't",
1335         (pSource->TileType == MOS_TILE_LINEAR)?"":"non",
1336         pSource->b16UsrPtr?"16 bytes aligned linear":"2D", VphalDumperTool::GetFormatStr(pSource->Format),
1337         pTarget->b16UsrPtr?"16 bytes aligned linear":"2D", VphalDumperTool::GetFormatStr(pTarget->Format));
1338 
1339     return b16alignment;
1340 }
1341