1 /*
2 * Copyright (c) 2017-2022, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     codechal_encode_csc_ds.cpp
24 //! \brief    Defines base class for CSC and Downscaling
25 //!
26 
27 #include "codechal_encoder_base.h"
28 #include "codechal_encode_csc_ds.h"
29 #include "hal_oca_interface.h"
30 
AllocateSurfaceCsc()31 MOS_STATUS CodechalEncodeCscDs::AllocateSurfaceCsc()
32 {
33     CODECHAL_ENCODE_FUNCTION_ENTER;
34 
35     if (!m_cscFlag)
36     {
37         return MOS_STATUS_SUCCESS;
38     }
39 
40     return m_encoder->m_trackedBuf->AllocateSurfaceCsc();
41 }
42 
AllocateSurfaceCopy(MOS_FORMAT format)43 MOS_STATUS CodechalEncodeCscDs::AllocateSurfaceCopy(MOS_FORMAT format)
44 {
45     CODECHAL_ENCODE_FUNCTION_ENTER;
46 
47     if (!m_cscFlag)
48     {
49         return MOS_STATUS_SUCCESS;
50     }
51 
52     return m_encoder->m_trackedBuf->AllocateSurfaceCopy(format, m_rawSurfaceToEnc->OsResource.pGmmResInfo->GetSetCpSurfTag(false, 0));
53 }
54 
CheckRawColorFormat(MOS_FORMAT format,MOS_TILE_TYPE tileType)55 MOS_STATUS CodechalEncodeCscDs::CheckRawColorFormat(MOS_FORMAT format, MOS_TILE_TYPE tileType)
56 {
57     CODECHAL_ENCODE_FUNCTION_ENTER;
58 
59     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
60 
61     // check input color format, and set target traverse thread space size
62     switch (format)
63     {
64     case Format_NV12:
65         m_colorRawSurface = cscColorNv12Linear;
66         m_cscRequireColor = 1;
67         m_threadTraverseSizeX = 5;    // for NV12, thread space is 32x4
68         break;
69     case Format_YUY2:
70     case Format_YUYV:
71         m_colorRawSurface = cscColorYUY2;
72         m_cscRequireColor = (uint8_t)HCP_CHROMA_FORMAT_YUV420 == m_outputChromaFormat;
73         m_cscRequireConvTo8bPlanar = (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat;
74         m_threadTraverseSizeX = 4;    // for YUY2, thread space is 16x4
75         break;
76     case Format_A8R8G8B8:
77         m_colorRawSurface = cscColorARGB;
78         m_cscRequireColor = 1;
79         m_cscUsingSfc = m_cscEnableSfc ? 1 : 0;
80         // Use EU for better performance in big resolution cases or TU1
81         if ((m_cscRawSurfWidth * m_cscRawSurfHeight > 1920 * 1088) || m_16xMeSupported)
82         {
83             m_cscUsingSfc = 0;
84         }
85         m_threadTraverseSizeX = 3;    // for ARGB, thread space is 8x4
86         break;
87     case Format_A8B8G8R8:
88         m_colorRawSurface = cscColorABGR;
89         m_cscRequireColor = 1;
90         m_cscUsingSfc = m_cscEnableSfc ? 1 : 0;
91         // Use EU for better performance in big resolution cases or TU1
92         if ((m_cscRawSurfWidth * m_cscRawSurfHeight > 1920 * 1088) || m_16xMeSupported)
93         {
94             m_cscUsingSfc = 0;
95         }
96         m_threadTraverseSizeX = 3;    // for ABGR, thread space is 8x4
97         break;
98     case Format_P010:
99         m_colorRawSurface = cscColorP010;
100         m_cscRequireConvTo8bPlanar = 1;
101         break;
102     default:
103         CODECHAL_ENCODE_ASSERTMESSAGE("Input color format = %d not supported!", format);
104         eStatus = MOS_STATUS_INVALID_PARAMETER;
105         break;
106     }
107 
108     return eStatus;
109 }
110 
InitSfcState()111 MOS_STATUS CodechalEncodeCscDs::InitSfcState()
112 {
113     CODECHAL_ENCODE_FUNCTION_ENTER;
114 
115     if (!m_sfcState)
116     {
117         m_sfcState = (CodecHalEncodeSfc*)MOS_New(CodecHalEncodeSfc);
118         CODECHAL_ENCODE_CHK_NULL_RETURN(m_sfcState);
119 
120         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_sfcState->Initialize(m_hwInterface, m_osInterface));
121 
122         m_sfcState->SetInputColorSpace(MHW_CSpace_sRGB);
123     }
124 
125     return MOS_STATUS_SUCCESS;
126 }
127 
SetParamsSfc(CODECHAL_ENCODE_SFC_PARAMS * sfcParams)128 MOS_STATUS CodechalEncodeCscDs::SetParamsSfc(CODECHAL_ENCODE_SFC_PARAMS* sfcParams)
129 {
130     CODECHAL_ENCODE_FUNCTION_ENTER;
131 
132     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
133 
134     CODECHAL_ENCODE_CHK_NULL_RETURN(sfcParams);
135 
136     // color space parameters have been set to pSfcState already, no need set here
137     sfcParams->pInputSurface = m_rawSurfaceToEnc;
138     sfcParams->pOutputSurface = m_encoder->m_trackedBuf->GetCscSurface(CODEC_CURR_TRACKED_BUFFER);
139     sfcParams->rcInputSurfaceRegion.X = 0;
140     sfcParams->rcInputSurfaceRegion.Y = 0;
141     sfcParams->rcInputSurfaceRegion.Width = m_cscRawSurfWidth;
142     sfcParams->rcInputSurfaceRegion.Height = m_cscRawSurfHeight;
143 
144     sfcParams->rcOutputSurfaceRegion.X = 0;
145     sfcParams->rcOutputSurfaceRegion.Y = 0;
146     sfcParams->rcOutputSurfaceRegion.Width = m_cscRawSurfWidth;
147     sfcParams->rcOutputSurfaceRegion.Height = m_cscRawSurfHeight;
148 
149     sfcParams->uiChromaSitingType = MHW_CHROMA_SITING_HORZ_CENTER | MHW_CHROMA_SITING_VERT_CENTER;
150 
151     return eStatus;
152 }
153 
InitKernelStateCsc()154 MOS_STATUS CodechalEncodeCscDs::InitKernelStateCsc()
155 {
156     CODECHAL_ENCODE_FUNCTION_ENTER;
157 
158     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
159 
160     auto kernelHeaderTable = (CscKernelHeader*)m_kernelBase;
161     auto currKrnHeader = kernelHeaderTable->header;
162 
163     m_cscKernelState->KernelParams.iBTCount = cscNumSurfaces;
164     m_cscKernelState->KernelParams.iThreadCount = m_hwInterface->GetRenderInterface()->GetHwCaps()->dwMaxThreads;
165     m_cscKernelState->KernelParams.iCurbeLength = m_cscCurbeLength;
166     m_cscKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
167     m_cscKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
168     m_cscKernelState->KernelParams.iIdCount = 1;
169     m_cscKernelState->KernelParams.iInlineDataLength = 0;
170     m_cscKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
171     m_cscKernelState->KernelParams.pBinary =
172         m_kernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
173     m_cscKernelState->KernelParams.iSize = m_combinedKernelSize - (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
174 
175     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
176         m_cscKernelState->KernelParams.iBTCount,
177         &m_cscKernelState->dwSshSize,
178         &m_cscKernelState->dwBindingTableSize));
179 
180     CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
181     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_cscKernelState));
182 
183     return eStatus;
184 }
185 
SetKernelParamsCsc(KernelParams * params)186 MOS_STATUS CodechalEncodeCscDs::SetKernelParamsCsc(KernelParams* params)
187 {
188     CODECHAL_ENCODE_FUNCTION_ENTER;
189 
190     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
191 
192     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
193 
194     /* calling mode for Ds+Copy kernel and/or 4x DS kernel
195     *
196     * For Progressive:
197     * ------------------------------------------------------------------------------------------------
198     *  bScalingEnabled  cscReqdForRawSurf  bFirstField     call Ds+Copy kernel?    call 4x DS kernel?
199     * ------------------------------------------------------------------------------------------------
200     *        1                  0               1                                        Yes
201     *        1                  1               1             COPY_DS mode
202     *        0                  0               1
203     *        0                  1               1             COPY_ONLY mode
204     *
205     * For Interlaced:
206     *        1                  0               1                                        Yes
207     *        1                  1               1             COPY_ONLY mode             Yes, see note 2
208     *        0                  0           dont care
209     *        0                  1               1             COPY_ONLY mode
210     *        0                  1               0             do nothing for 2nd field
211     *
212     * Note 1: bFirstField must be == 1 when (1) bScalingEnabled == 1, or (2) Progressive content
213     * Note 2: so far Ds+Copy kernel does not support Interlaced, so we would have to do a COPY_ONLY, followed by 4x DS
214     *         these 2 steps can combine into a single COPY_DS once Interlaced is supported
215     */
216 
217     m_lastTaskInPhase = params->bLastTaskInPhaseCSC;
218     m_currRefList->b4xScalingUsed = m_scalingEnabled;
219 
220     // setup Curbe
221     m_curbeParams.dwInputPictureWidth = m_cscRawSurfWidth;
222     m_curbeParams.dwInputPictureHeight = m_cscRawSurfHeight;
223     m_curbeParams.bFlatnessCheckEnabled = m_flatnessCheckEnabled;
224     m_curbeParams.bMBVarianceOutputEnabled = m_mbStatsEnabled;
225     m_curbeParams.bMBPixelAverageOutputEnabled = m_mbStatsEnabled;
226     m_curbeParams.bCscOrCopyOnly = !m_scalingEnabled || params->cscOrCopyOnly;
227     m_curbeParams.inputColorSpace = params->inputColorSpace;
228 
229     // setup surface states
230     m_surfaceParamsCsc.psInputSurface = m_rawSurfaceToEnc;
231     m_surfaceParamsCsc.psOutputCopiedSurface = m_cscFlag ? m_encoder->m_trackedBuf->GetCscSurface(CODEC_CURR_TRACKED_BUFFER) : nullptr;
232     m_surfaceParamsCsc.psOutput4xDsSurface =
233         !m_curbeParams.bCscOrCopyOnly ? m_encoder->m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER) : nullptr;
234 
235     if (m_mbStatsSupported)
236     {
237         m_surfaceParamsCsc.bMBVProcStatsEnabled = true;
238         m_surfaceParamsCsc.presMBVProcStatsBuffer = &m_resMbStatsBuffer;
239     }
240     else
241     {
242         m_surfaceParamsCsc.bFlatnessCheckEnabled = m_flatnessCheckEnabled;
243         m_surfaceParamsCsc.psFlatnessCheckSurface = &m_encoder->m_flatnessCheckSurface;
244     }
245 
246     // setup walker param
247     m_walkerResolutionX = MOS_ROUNDUP_SHIFT(m_downscaledWidth4x, m_threadTraverseSizeX);
248     m_walkerResolutionY = MOS_ROUNDUP_SHIFT(m_downscaledHeight4x, m_threadTraverseSizeY);
249 
250     return eStatus;
251 }
252 
SetCurbeCsc()253 MOS_STATUS CodechalEncodeCscDs::SetCurbeCsc()
254 {
255     CODECHAL_ENCODE_FUNCTION_ENTER;
256 
257     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
258 
259     CscKernelCurbeData curbe;
260 
261     curbe.DW0_InputPictureWidth = m_curbeParams.dwInputPictureWidth;
262     curbe.DW0_InputPictureHeight = m_curbeParams.dwInputPictureHeight;
263 
264     curbe.DW1_SrcNV12SurfYIndex = cscSrcYPlane;
265     curbe.DW2_DstYSurfIndex = cscDstDsYPlane;
266     curbe.DW3_FlatDstSurfIndex = cscDstFlatOrMbStats;
267     curbe.DW4_CopyDstNV12SurfIndex = cscDstCopyYPlane;
268 
269     if (m_curbeParams.bCscOrCopyOnly)
270     {
271         curbe.DW5_CscDsCopyOpCode = 0;    // Copy only
272     }
273     else
274     {
275         // Enable DS kernel (0  disable, 1  enable)
276         curbe.DW5_CscDsCopyOpCode = 1;    // 0x01 to 0x7F: DS + Copy
277     }
278 
279     if (cscColorNv12TileY == m_colorRawSurface ||
280         cscColorNv12Linear == m_colorRawSurface)
281     {
282         curbe.DW5_InputColorFormat = 0;
283     }
284     else if (cscColorYUY2 == m_colorRawSurface)
285     {
286         curbe.DW5_InputColorFormat = 1;
287     }
288     else if (cscColorARGB == m_colorRawSurface)
289     {
290         curbe.DW5_InputColorFormat = 2;
291     }
292 
293     if (m_curbeParams.bFlatnessCheckEnabled ||
294         m_curbeParams.bMBVarianceOutputEnabled ||
295         m_curbeParams.bMBPixelAverageOutputEnabled)
296     {
297         curbe.DW6_FlatnessThreshold = 128;
298         curbe.DW7_EnableMBFlatnessCheck = true;
299     }
300     else
301     {
302         curbe.DW7_EnableMBFlatnessCheck = false;
303     }
304 
305     curbe.DW8_SrcNV12SurfUVIndex = cscSrcUVPlane;
306 
307     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscKernelState->m_dshRegion.AddData(
308         &curbe,
309         m_cscKernelState->dwCurbeOffset,
310         sizeof(curbe)));
311 
312     return eStatus;
313 }
314 
SendSurfaceCsc(PMOS_COMMAND_BUFFER cmdBuffer)315 MOS_STATUS CodechalEncodeCscDs::SendSurfaceCsc(PMOS_COMMAND_BUFFER cmdBuffer)
316 {
317     CODECHAL_ENCODE_FUNCTION_ENTER;
318 
319     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
320 
321     // Source surface/s
322     CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
323     MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
324     surfaceParams.bIs2DSurface = true; // linear surface is not 2D -> changed kernel
325     surfaceParams.bUseUVPlane = (cscColorNv12TileY == m_colorRawSurface ||
326         cscColorNv12Linear == m_colorRawSurface);
327     surfaceParams.bMediaBlockRW = true;
328     surfaceParams.psSurface = m_surfaceParamsCsc.psInputSurface;
329     surfaceParams.bUseARGB8Format = true;
330     surfaceParams.dwCacheabilityControl =
331         m_hwInterface->ComposeSurfaceCacheabilityControl(
332             MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
333             (codechalL3 | codechalLLC));
334 
335     surfaceParams.dwVerticalLineStride = m_verticalLineStride;
336     surfaceParams.dwBindingTableOffset = cscSrcYPlane;
337     surfaceParams.dwUVBindingTableOffset = cscSrcUVPlane;
338     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
339         m_hwInterface,
340         cmdBuffer,
341         &surfaceParams,
342         m_cscKernelState));
343 
344     // Destination surface/s - 4x downscaled surface
345     if (m_surfaceParamsCsc.psOutput4xDsSurface)
346     {
347         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
348         surfaceParams.bIs2DSurface =
349         surfaceParams.bIsWritable = true;
350         surfaceParams.psSurface = m_surfaceParamsCsc.psOutput4xDsSurface;
351         surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
352             MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
353             codechalLLC);
354         surfaceParams.dwVerticalLineStride = m_verticalLineStride;
355         surfaceParams.dwBindingTableOffset = cscDstDsYPlane;
356         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
357             m_hwInterface,
358             cmdBuffer,
359             &surfaceParams,
360             m_cscKernelState));
361     }
362 
363     // FlatnessCheck or MbStats surface
364     MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
365     if (m_surfaceParamsCsc.bMBVProcStatsEnabled)
366     {
367         surfaceParams.bRawSurface =
368         surfaceParams.bIsWritable = true;
369         surfaceParams.dwSize = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_surfaceParamsCsc.psInputSurface->dwWidth) *
370             CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_surfaceParamsCsc.psInputSurface->dwHeight) * 16 * sizeof(uint32_t);
371         surfaceParams.presBuffer = m_surfaceParamsCsc.presMBVProcStatsBuffer;
372         surfaceParams.dwCacheabilityControl =
373             m_hwInterface->ComposeSurfaceCacheabilityControl(
374                 MOS_CODEC_RESOURCE_USAGE_MB_STATS_ENCODE,
375                 codechalLLC | codechalL3);
376         surfaceParams.dwBindingTableOffset = cscDstFlatOrMbStats;
377         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
378             m_hwInterface,
379             cmdBuffer,
380             &surfaceParams,
381             m_cscKernelState));
382     }
383     else if (m_surfaceParamsCsc.bFlatnessCheckEnabled)
384     {
385         surfaceParams.bIs2DSurface =
386         surfaceParams.bMediaBlockRW =
387         surfaceParams.bIsWritable = true;
388         surfaceParams.psSurface = m_surfaceParamsCsc.psFlatnessCheckSurface;
389         surfaceParams.dwCacheabilityControl =
390             m_hwInterface->ComposeSurfaceCacheabilityControl(
391                 MOS_CODEC_RESOURCE_USAGE_SURFACE_FLATNESS_CHECK_ENCODE,
392                 codechalLLC | codechalL3);
393         surfaceParams.dwBindingTableOffset = cscDstFlatOrMbStats;
394         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
395             m_hwInterface,
396             cmdBuffer,
397             &surfaceParams,
398             m_cscKernelState));
399     }
400 
401     // copy kernel output luma + chroma
402     if (m_surfaceParamsCsc.psOutputCopiedSurface)
403     {
404         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
405         surfaceParams.bIs2DSurface =
406         surfaceParams.bUseUVPlane =
407         surfaceParams.bMediaBlockRW =
408         surfaceParams.bIsWritable = true;
409         surfaceParams.psSurface = m_surfaceParamsCsc.psOutputCopiedSurface;
410         surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
411             MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
412             codechalLLC);
413         surfaceParams.dwBindingTableOffset = cscDstCopyYPlane;
414         surfaceParams.dwUVBindingTableOffset = cscDstCopyUVPlane;
415         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
416             m_hwInterface,
417             cmdBuffer,
418             &surfaceParams,
419             m_cscKernelState));
420     }
421 
422     return eStatus;
423 }
424 
SetSurfacesToEncPak()425 MOS_STATUS CodechalEncodeCscDs::SetSurfacesToEncPak()
426 {
427     CODECHAL_ENCODE_FUNCTION_ENTER;
428 
429     auto cscSurface = m_encoder->m_trackedBuf->GetCscSurface(CODEC_CURR_TRACKED_BUFFER);
430 
431     // assign CSC output surface according to different operation
432     if (RenderConsumesCscSurface())
433     {
434         m_rawSurfaceToEnc = cscSurface;
435 
436         // update the RawBuffer and RefBuffer (if Raw is used as Ref)
437         m_currRefList->sRefRawBuffer = *cscSurface;
438         if (m_useRawForRef)
439         {
440             m_currRefList->sRefBuffer = *cscSurface;
441         }
442         CODECHAL_ENCODE_NORMALMESSAGE("Set m_rawSurfaceToEnc %d x %d",
443             m_rawSurfaceToEnc->dwWidth, m_rawSurfaceToEnc->dwHeight);
444     }
445 
446     if (VdboxConsumesCscSurface())
447     {
448         m_rawSurfaceToPak = cscSurface;
449         CODECHAL_ENCODE_NORMALMESSAGE("Set m_rawSurfaceToPak %d x %d",
450             m_rawSurfaceToPak->dwWidth, m_rawSurfaceToPak->dwHeight);
451     }
452 
453     // dump copied surface from Ds+Copy kernel
454     if (m_cscFlag)
455     {
456         CODECHAL_DEBUG_TOOL(
457             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
458                 cscSurface,
459                 CodechalDbgAttr::attrEncodeRawInputSurface,
460                 "Copied_SrcSurf")); // needs to consider YUV420
461             if (m_cscUsingSfc)
462             {
463                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_sfcState->DumpBuffers(m_debugInterface));
464             }
465         )
466     }
467 
468     return MOS_STATUS_SUCCESS;
469 }
470 
InitKernelStateDS()471 MOS_STATUS CodechalEncodeCscDs::InitKernelStateDS()
472 {
473     CODECHAL_ENCODE_FUNCTION_ENTER;
474 
475     uint32_t kernelSize, combinedKernelSize, numKernelsToLoad;
476 
477     numKernelsToLoad = m_encoder->m_interlacedFieldDisabled ? 1 : CODEC_NUM_FIELDS_PER_FRAME;
478 
479     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
480         m_encoder->m_kernelBase,
481         m_encoder->m_kuid,
482         &m_dsKernelBase,
483         &combinedKernelSize));
484 
485     CODECHAL_KERNEL_HEADER currKrnHeader;
486     for (uint32_t krnStateIdx = 0; krnStateIdx < numKernelsToLoad; krnStateIdx++)
487     {
488         kernelSize = combinedKernelSize;
489 
490         m_dsKernelState = &m_encoder->m_scaling4xKernelStates[krnStateIdx];
491 
492         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->pfnGetKernelHeaderAndSize(
493             m_dsKernelBase,
494             ENC_SCALING4X,
495             krnStateIdx,
496             &currKrnHeader,
497             &kernelSize))
498 
499         m_dsKernelState->KernelParams.iBTCount = m_dsBTCount[0];
500         m_dsKernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
501         m_dsKernelState->KernelParams.iCurbeLength = m_dsCurbeLength[0];
502         m_dsKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
503         m_dsKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
504         m_dsKernelState->KernelParams.iIdCount = 1;
505         m_dsKernelState->KernelParams.iInlineDataLength = m_dsInlineDataLength;
506 
507         m_dsKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
508         m_dsKernelState->KernelParams.pBinary = m_dsKernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
509         m_dsKernelState->KernelParams.iSize = kernelSize;
510         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
511             m_dsKernelState->KernelParams.iBTCount,
512             &m_dsKernelState->dwSshSize,
513             &m_dsKernelState->dwBindingTableSize));
514 
515         CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
516         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_dsKernelState));
517 
518         if (m_32xMeSupported)
519         {
520             m_dsKernelState = &m_encoder->m_scaling2xKernelStates[krnStateIdx];
521 
522             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->pfnGetKernelHeaderAndSize(
523                 m_dsKernelBase,
524                 ENC_SCALING2X,
525                 krnStateIdx,
526                 &currKrnHeader,
527                 &kernelSize))
528 
529             m_dsKernelState->KernelParams.iBTCount = m_dsBTCount[1];
530             m_dsKernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
531             m_dsKernelState->KernelParams.iCurbeLength = m_dsCurbeLength[1];
532             m_dsKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
533             m_dsKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
534 
535             m_dsKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
536             m_dsKernelState->KernelParams.pBinary = m_dsKernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
537             m_dsKernelState->KernelParams.iSize = kernelSize;
538             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
539                 m_dsKernelState->KernelParams.iBTCount,
540                 &m_dsKernelState->dwSshSize,
541                 &m_dsKernelState->dwBindingTableSize));
542 
543             CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
544             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_dsKernelState));
545         }
546 
547         if (m_encoder->m_interlacedFieldDisabled)
548         {
549             m_encoder->m_scaling4xKernelStates[1] = m_encoder->m_scaling4xKernelStates[0];
550 
551             if (m_32xMeSupported)
552             {
553                 m_encoder->m_scaling2xKernelStates[1] = m_encoder->m_scaling2xKernelStates[0];
554             }
555         }
556     }
557 
558     return MOS_STATUS_SUCCESS;
559 }
560 
SetCurbeDS4x()561 MOS_STATUS CodechalEncodeCscDs::SetCurbeDS4x()
562 {
563     CODECHAL_ENCODE_FUNCTION_ENTER;
564 
565     Ds4xKernelCurbeData curbe;
566 
567     curbe.DW0_InputPictureWidth = m_curbeParams.dwInputPictureWidth;
568     curbe.DW0_InputPictureHeight = m_curbeParams.dwInputPictureHeight;
569 
570     curbe.DW1_InputYBTIFrame = ds4xSrcYPlane;
571     curbe.DW2_OutputYBTIFrame = ds4xDstYPlane;
572 
573     if (m_curbeParams.bFieldPicture)
574     {
575         curbe.DW3_InputYBTIBottomField = ds4xSrcYPlaneBtmField;
576         curbe.DW4_OutputYBTIBottomField = ds4xDstYPlaneBtmField;
577     }
578 
579     if ((curbe.DW6_EnableMBFlatnessCheck = m_curbeParams.bFlatnessCheckEnabled))
580     {
581         curbe.DW5_FlatnessThreshold = 128;
582         curbe.DW8_FlatnessOutputBTIFrame = ds4xDstFlatness;
583 
584         if (m_curbeParams.bFieldPicture)
585         {
586             curbe.DW9_FlatnessOutputBTIBottomField = ds4xDstFlatnessBtmField;
587         }
588     }
589 
590     curbe.DW6_EnableMBVarianceOutput = m_curbeParams.bMBVarianceOutputEnabled;
591     curbe.DW6_EnableMBPixelAverageOutput = m_curbeParams.bMBPixelAverageOutputEnabled;
592     curbe.DW6_EnableBlock8x8StatisticsOutput = m_curbeParams.bBlock8x8StatisticsEnabled;
593 
594     if (curbe.DW6_EnableMBVarianceOutput || curbe.DW6_EnableMBPixelAverageOutput)
595     {
596         curbe.DW10_MBVProcStatsBTIFrame = ds4xDstMbVProc;
597 
598         if (m_curbeParams.bFieldPicture)
599         {
600             curbe.DW11_MBVProcStatsBTIBottomField = ds4xDstMbVProcBtmField;
601         }
602     }
603 
604     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_dsKernelState->m_dshRegion.AddData(
605         &curbe,
606         m_dsKernelState->dwCurbeOffset,
607         sizeof(curbe)));
608 
609     return MOS_STATUS_SUCCESS;
610 }
611 
SetCurbeDS2x()612 MOS_STATUS CodechalEncodeCscDs::SetCurbeDS2x()
613 {
614     CODECHAL_ENCODE_FUNCTION_ENTER;
615 
616     Ds2xKernelCurbeData curbe;
617 
618     curbe.DW0_InputPictureWidth = m_curbeParams.dwInputPictureWidth;
619     curbe.DW0_InputPictureHeight = m_curbeParams.dwInputPictureHeight;
620 
621     curbe.DW8_InputYBTIFrame = ds2xSrcYPlane;
622     curbe.DW9_OutputYBTIFrame = ds2xDstYPlane;
623 
624     if (m_curbeParams.bFieldPicture)
625     {
626         curbe.DW10_InputYBTIBottomField = ds2xSrcYPlaneBtmField;
627         curbe.DW11_OutputYBTIBottomField = ds2xDstYPlaneBtmField;
628     }
629 
630     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_dsKernelState->m_dshRegion.AddData(
631         &curbe,
632         m_dsKernelState->dwCurbeOffset,
633         sizeof(curbe)));
634 
635     return MOS_STATUS_SUCCESS;
636 }
637 
SetSurfaceParamsDS(KernelParams * params)638 MOS_STATUS CodechalEncodeCscDs::SetSurfaceParamsDS(KernelParams* params)
639 {
640     CODECHAL_ENCODE_FUNCTION_ENTER;
641 
642     uint32_t scaleFactor, downscaledWidthInMb, downscaledHeightInMb;
643     uint32_t inputFrameWidth, inputFrameHeight, outputFrameWidth, outputFrameHeight;
644     uint32_t inputBottomFieldOffset, outputBottomFieldOffset;
645     PMOS_SURFACE inputSurface, outputSurface;
646     bool scaling4xInUse = !(params->b32xScalingInUse || params->b16xScalingInUse);
647     bool fieldPicture = CodecHal_PictureIsField(m_encoder->m_currOriginalPic);
648 
649     if (params->b32xScalingInUse)
650     {
651         scaleFactor = SCALE_FACTOR_32x;
652         downscaledWidthInMb = m_downscaledWidth32x / CODECHAL_MACROBLOCK_WIDTH;
653         downscaledHeightInMb = m_downscaledHeight32x / CODECHAL_MACROBLOCK_HEIGHT;
654         if (fieldPicture)
655         {
656             downscaledHeightInMb = (downscaledHeightInMb + 1) >> 1 << 1;
657         }
658 
659         inputSurface = m_encoder->m_trackedBuf->Get16xDsSurface(m_encoder->m_currRefList->ucScalingIdx);
660         inputFrameWidth = m_downscaledWidth16x;
661         inputFrameHeight = m_downscaledHeight16x;
662         inputBottomFieldOffset = m_scaled16xBottomFieldOffset;
663 
664         outputSurface = m_encoder->m_trackedBuf->Get32xDsSurface(m_encoder->m_currRefList->ucScalingIdx);
665         outputFrameWidth = m_downscaledWidth32x;
666         outputFrameHeight = downscaledHeightInMb * CODECHAL_MACROBLOCK_HEIGHT;
667         outputBottomFieldOffset = m_scaled32xBottomFieldOffset;
668         m_lastTaskInPhase = params->bLastTaskInPhase32xDS;
669         m_currRefList->b32xScalingUsed = true;
670     }
671     else if (params->b16xScalingInUse)
672     {
673         scaleFactor = SCALE_FACTOR_16x;
674         downscaledWidthInMb = m_downscaledWidth16x / CODECHAL_MACROBLOCK_WIDTH;
675         downscaledHeightInMb = m_downscaledHeight16x / CODECHAL_MACROBLOCK_HEIGHT;
676         if (fieldPicture)
677         {
678             downscaledHeightInMb = (downscaledHeightInMb + 1) >> 1 << 1;
679         }
680 
681         inputSurface = m_encoder->m_trackedBuf->Get4xDsSurface(m_encoder->m_currRefList->ucScalingIdx);
682         inputFrameWidth = m_downscaledWidth4x;
683         inputFrameHeight = m_downscaledHeight4x;
684         inputBottomFieldOffset = m_scaledBottomFieldOffset;
685 
686         outputSurface = m_encoder->m_trackedBuf->Get16xDsSurface(m_encoder->m_currRefList->ucScalingIdx);
687         outputFrameWidth = m_downscaledWidth16x;
688         outputFrameHeight = downscaledHeightInMb * CODECHAL_MACROBLOCK_HEIGHT;
689         outputBottomFieldOffset = m_scaled16xBottomFieldOffset;
690         m_lastTaskInPhase = params->bLastTaskInPhase16xDS;
691         m_currRefList->b16xScalingUsed = true;
692     }
693     else
694     {
695         scaleFactor = SCALE_FACTOR_4x;
696         downscaledWidthInMb = m_downscaledWidth4x / CODECHAL_MACROBLOCK_WIDTH;
697         downscaledHeightInMb = m_downscaledHeight4x / CODECHAL_MACROBLOCK_HEIGHT;
698         if (fieldPicture)
699         {
700             downscaledHeightInMb = (downscaledHeightInMb + 1) >> 1 << 1;
701         }
702 
703         inputSurface = (params->bRawInputProvided) ? &params->sInputRawSurface : m_rawSurfaceToEnc;
704         inputFrameWidth = m_encoder->m_oriFrameWidth;
705         inputFrameHeight = m_encoder->m_oriFrameHeight;
706         inputBottomFieldOffset = 0;
707 
708         outputSurface = m_encoder->m_trackedBuf->Get4xDsSurface(m_encoder->m_currRefList->ucScalingIdx);
709         outputFrameWidth = m_downscaledWidth4x;
710         outputFrameHeight = downscaledHeightInMb * CODECHAL_MACROBLOCK_HEIGHT;
711         outputBottomFieldOffset = m_scaledBottomFieldOffset;
712         m_lastTaskInPhase = params->bLastTaskInPhase4xDS;
713         m_currRefList->b4xScalingUsed = true;
714     }
715 
716     CODEC_PICTURE originalPic = (params->bRawInputProvided) ? params->inputPicture : m_encoder->m_currOriginalPic;
717     FeiPreEncParams *preEncParams = nullptr;
718     if (m_encoder->m_codecFunction == CODECHAL_FUNCTION_FEI_PRE_ENC)
719     {
720         preEncParams = (FeiPreEncParams*)m_encoder->m_encodeParams.pPreEncParams;
721         CODECHAL_ENCODE_CHK_NULL_RETURN(preEncParams);
722     }
723 
724     // setup surface states
725     m_surfaceParamsDS.bCurrPicIsFrame = !CodecHal_PictureIsField(originalPic);
726     m_surfaceParamsDS.psInputSurface = inputSurface;
727     m_surfaceParamsDS.dwInputFrameWidth = inputFrameWidth;
728     m_surfaceParamsDS.dwInputFrameHeight = inputFrameHeight;
729     m_surfaceParamsDS.psOutputSurface = outputSurface;
730     m_surfaceParamsDS.dwOutputFrameWidth = outputFrameWidth;
731     m_surfaceParamsDS.dwOutputFrameHeight = outputFrameHeight;
732     m_surfaceParamsDS.dwInputBottomFieldOffset = (uint32_t)inputBottomFieldOffset;
733     m_surfaceParamsDS.dwOutputBottomFieldOffset = (uint32_t)outputBottomFieldOffset;
734     m_surfaceParamsDS.bScalingOutUses16UnormSurfFmt = params->b32xScalingInUse;
735     m_surfaceParamsDS.bScalingOutUses32UnormSurfFmt = !params->b32xScalingInUse;
736 
737     if (preEncParams)
738     {
739         m_surfaceParamsDS.bPreEncInUse = true;
740         m_surfaceParamsDS.bEnable8x8Statistics = preEncParams->bEnable8x8Statistics;
741         if (params->bScalingforRef)
742         {
743             m_surfaceParamsDS.bMBVProcStatsEnabled = params->bStatsInputProvided;
744             m_surfaceParamsDS.presMBVProcStatsBuffer = (params->bStatsInputProvided) ? &(params->sInputStatsBuffer) : nullptr;
745             m_surfaceParamsDS.presMBVProcStatsBotFieldBuffer = (params->bStatsInputProvided) ? &(params->sInputStatsBotFieldBuffer) : nullptr;
746         }
747         else
748         {
749             m_surfaceParamsDS.bMBVProcStatsEnabled = !preEncParams->bDisableStatisticsOutput;
750             m_surfaceParamsDS.presMBVProcStatsBuffer = &(preEncParams->resStatsBuffer);
751             m_surfaceParamsDS.presMBVProcStatsBotFieldBuffer = &preEncParams->resStatsBotFieldBuffer;
752         }
753         m_surfaceParamsDS.dwMBVProcStatsBottomFieldOffset = m_mbVProcStatsBottomFieldOffset;
754     }
755     else if (m_mbStatsSupported)
756     {
757         //Currently Only Based on Flatness Check, later on Adaptive Transform Decision too
758         m_surfaceParamsDS.bMBVProcStatsEnabled = scaling4xInUse && (m_flatnessCheckEnabled || m_mbStatsEnabled);
759         m_surfaceParamsDS.presMBVProcStatsBuffer = &m_resMbStatsBuffer;
760         m_surfaceParamsDS.dwMBVProcStatsBottomFieldOffset = m_mbStatsBottomFieldOffset;
761 
762         m_surfaceParamsDS.bFlatnessCheckEnabled = false; // Disabling flatness check as its encompassed in Mb stats
763     }
764     else
765     {
766         // Enable flatness check only for 4x scaling.
767         m_surfaceParamsDS.bFlatnessCheckEnabled = scaling4xInUse && m_flatnessCheckEnabled;
768         m_surfaceParamsDS.psFlatnessCheckSurface = &m_encoder->m_flatnessCheckSurface;
769         m_surfaceParamsDS.dwFlatnessCheckBottomFieldOffset = m_flatnessCheckBottomFieldOffset;
770     }
771 
772     return MOS_STATUS_SUCCESS;
773 }
774 
SendSurfaceDS(PMOS_COMMAND_BUFFER cmdBuffer)775 MOS_STATUS CodechalEncodeCscDs::SendSurfaceDS(PMOS_COMMAND_BUFFER cmdBuffer)
776 {
777     CODECHAL_ENCODE_FUNCTION_ENTER;
778 
779     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
780 
781     auto currPicIsFrame = m_surfaceParamsDS.bCurrPicIsFrame;
782 
783     auto verticalLineStride = m_verticalLineStride;
784     auto verticalLineOffsetTop = CODECHAL_VLINESTRIDEOFFSET_TOP_FIELD;
785     auto verticalLineOffsetBottom = CODECHAL_VLINESTRIDEOFFSET_BOT_FIELD;
786 
787     auto originalSurface = *m_surfaceParamsDS.psInputSurface;
788     originalSurface.dwWidth = m_surfaceParamsDS.dwInputFrameWidth;
789     originalSurface.dwHeight = m_surfaceParamsDS.dwInputFrameHeight;
790 
791     // Use actual width and height for scaling source, not padded allocated dimensions
792     auto scaledSurface = m_surfaceParamsDS.psOutputSurface;
793     scaledSurface->dwWidth = m_surfaceParamsDS.dwOutputFrameWidth;
794     scaledSurface->dwHeight = m_surfaceParamsDS.dwOutputFrameHeight;
795 
796     // Account for field case
797     if (!m_fieldScalingOutputInterleaved)
798     {
799         verticalLineStride = verticalLineOffsetTop = verticalLineOffsetBottom = 0;
800         originalSurface.dwHeight =
801             MOS_ALIGN_CEIL((currPicIsFrame) ? originalSurface.dwHeight : originalSurface.dwHeight / 2, 16);
802         scaledSurface->dwHeight =
803             MOS_ALIGN_CEIL((currPicIsFrame) ? scaledSurface->dwHeight : scaledSurface->dwHeight / 2, 16);
804     }
805     originalSurface.UPlaneOffset.iYOffset = originalSurface.dwHeight;
806 
807     // Source surface/s
808     CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
809     MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
810     surfaceParams.bIs2DSurface = true;
811     surfaceParams.bMediaBlockRW = true;
812     if (m_surfaceParamsDS.bScalingOutUses16UnormSurfFmt)
813     {
814         // 32x scaling requires R16_UNROM
815         surfaceParams.bUse16UnormSurfaceFormat = true;
816     }
817     else
818     {
819         surfaceParams.bUse32UnormSurfaceFormat = true;
820     }
821     surfaceParams.psSurface = &originalSurface;
822     surfaceParams.dwCacheabilityControl =
823         m_hwInterface->ComposeSurfaceCacheabilityControl(
824             MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
825             (codechalL3 | codechalLLC));
826     surfaceParams.dwVerticalLineStride = verticalLineStride;
827 
828     CODECHAL_ENCODE_CHK_NULL_RETURN(m_encoder->m_mmcState);
829     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->m_mmcState->SetSurfaceParams(&surfaceParams));
830 
831     if (currPicIsFrame)
832     {
833         // Frame
834         surfaceParams.dwBindingTableOffset = m_dsBTISrcY;
835         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
836             m_hwInterface,
837             cmdBuffer,
838             &surfaceParams,
839             m_dsKernelState));
840     }
841     else
842     {
843         // Top field
844         surfaceParams.dwVerticalLineStrideOffset = verticalLineOffsetTop;
845         surfaceParams.dwBindingTableOffset = m_dsBTISrcYTopField;
846         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
847             m_hwInterface,
848             cmdBuffer,
849             &surfaceParams,
850             m_dsKernelState));
851 
852         // Bot field
853         surfaceParams.dwOffset = m_surfaceParamsDS.dwInputBottomFieldOffset;
854         surfaceParams.dwVerticalLineStrideOffset = verticalLineOffsetBottom;
855         surfaceParams.dwBindingTableOffset = m_dsBTISrcYBtmField;
856         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
857             m_hwInterface,
858             cmdBuffer,
859             &surfaceParams,
860             m_dsKernelState));
861     }
862 
863     // Destination surface/s
864     MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
865     surfaceParams.bIs2DSurface = true;
866     surfaceParams.bIsWritable = true;
867     surfaceParams.bRenderTarget = true;
868     surfaceParams.psSurface = scaledSurface;
869     if (m_surfaceParamsDS.bScalingOutUses32UnormSurfFmt)
870     {
871         surfaceParams.bMediaBlockRW = true;
872         surfaceParams.bUse32UnormSurfaceFormat = true;
873     }
874     else if (m_surfaceParamsDS.bScalingOutUses16UnormSurfFmt)
875     {
876         surfaceParams.bMediaBlockRW = true;
877         surfaceParams.bUse16UnormSurfaceFormat = true;
878     }
879     surfaceParams.dwCacheabilityControl =
880         m_hwInterface->ComposeSurfaceCacheabilityControl(
881             MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE_DST,
882             codechalLLC);
883 
884     surfaceParams.dwVerticalLineStride = verticalLineStride;
885     surfaceParams.bRenderTarget = true;
886     surfaceParams.bIsWritable = true;
887 
888     if (currPicIsFrame)
889     {
890         // Frame
891         surfaceParams.dwBindingTableOffset = m_dsBTIDstY;
892         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
893             m_hwInterface,
894             cmdBuffer,
895             &surfaceParams,
896             m_dsKernelState));
897     }
898     else
899     {
900         // Top field
901         surfaceParams.dwVerticalLineStrideOffset = verticalLineOffsetTop;
902         surfaceParams.dwBindingTableOffset = m_dsBTIDstYTopField;
903         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
904             m_hwInterface,
905             cmdBuffer,
906             &surfaceParams,
907             m_dsKernelState));
908 
909         // Bot field
910         surfaceParams.dwOffset = m_surfaceParamsDS.dwOutputBottomFieldOffset;
911         surfaceParams.dwVerticalLineStrideOffset = verticalLineOffsetBottom;
912         surfaceParams.dwBindingTableOffset = m_dsBTIDstYBtmField;
913         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
914             m_hwInterface,
915             cmdBuffer,
916             &surfaceParams,
917             m_dsKernelState));
918     }
919 
920     if (m_surfaceParamsDS.bFlatnessCheckEnabled)
921     {
922         // flatness check surface
923         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
924         surfaceParams.bIs2DSurface = true;
925         surfaceParams.psSurface = m_surfaceParamsDS.psFlatnessCheckSurface;
926         surfaceParams.dwCacheabilityControl =
927             m_hwInterface->ComposeSurfaceCacheabilityControl(
928                 MOS_CODEC_RESOURCE_USAGE_SURFACE_FLATNESS_CHECK_ENCODE,
929                 codechalL3 | codechalLLC);
930         surfaceParams.bMediaBlockRW = true;
931         surfaceParams.dwVerticalLineStride = 0;
932         surfaceParams.bRenderTarget = true;
933         surfaceParams.bIsWritable = true;
934 
935         if (currPicIsFrame)
936         {
937             // Frame
938             surfaceParams.dwBindingTableOffset = m_dsBTIDstFlatness;
939             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
940                 m_hwInterface,
941                 cmdBuffer,
942                 &surfaceParams,
943                 m_dsKernelState));
944         }
945         else
946         {
947             // Top field
948             surfaceParams.bUseHalfHeight = true;
949             surfaceParams.dwVerticalLineStrideOffset = 0;
950             surfaceParams.dwBindingTableOffset = m_dsBTIDstFlatnessTopField;
951             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
952                 m_hwInterface,
953                 cmdBuffer,
954                 &surfaceParams,
955                 m_dsKernelState));
956 
957             // Bot field
958             surfaceParams.dwOffset = m_surfaceParamsDS.dwFlatnessCheckBottomFieldOffset;
959             surfaceParams.dwVerticalLineStrideOffset = 0;
960             surfaceParams.dwBindingTableOffset = m_dsBTIDstFlatnessBtmField;
961             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
962                 m_hwInterface,
963                 cmdBuffer,
964                 &surfaceParams,
965                 m_dsKernelState));
966         }
967     }
968 
969     if (m_surfaceParamsDS.bMBVProcStatsEnabled)
970     {
971         uint32_t size;
972         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
973         surfaceParams.presBuffer = m_surfaceParamsDS.presMBVProcStatsBuffer;
974         surfaceParams.dwCacheabilityControl =
975             m_hwInterface->ComposeSurfaceCacheabilityControl(
976                 MOS_CODEC_RESOURCE_USAGE_MB_STATS_ENCODE,
977                 codechalL3 | codechalLLC);
978         surfaceParams.bRenderTarget = true;
979         surfaceParams.bIsWritable = true;
980         surfaceParams.bRawSurface = true;
981 
982         if (currPicIsFrame)
983         {
984             if (m_surfaceParamsDS.bPreEncInUse)
985             {
986                 size = ((originalSurface.dwWidth + 15) / 16) * ((originalSurface.dwHeight + 15) / 16) * 16 * sizeof(uint32_t);
987             }
988             else
989             {
990                 size = ((originalSurface.dwWidth + 15) / 16) * 16 * sizeof(uint32_t) * ((originalSurface.dwHeight + 15) / 16);
991             }
992             surfaceParams.dwSize = size;
993             surfaceParams.dwBindingTableOffset = m_dsBTIDstMbVProc;
994             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
995                 m_hwInterface,
996                 cmdBuffer,
997                 &surfaceParams,
998                 m_dsKernelState));
999         }
1000         else
1001         {
1002             if (m_surfaceParamsDS.bPreEncInUse)
1003             {
1004                 size = ((originalSurface.dwWidth + 15) / 16) * ((originalSurface.dwHeight / 2 + 15) / 16) * 16 * sizeof(uint32_t);
1005             }
1006             else
1007             {
1008                 size = ((originalSurface.dwWidth + 15) / 16) * 16 * sizeof(uint32_t) * ((originalSurface.dwHeight / 2 + 15) / 16);
1009             }
1010             surfaceParams.dwSize = size;
1011 
1012             // Top field
1013             surfaceParams.dwBindingTableOffset = m_dsBTIDstMbVProcTopField;
1014             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1015                 m_hwInterface,
1016                 cmdBuffer,
1017                 &surfaceParams,
1018                 m_dsKernelState));
1019 
1020             // Bot field
1021             if (m_surfaceParamsDS.bPreEncInUse)
1022             {
1023                 surfaceParams.presBuffer = m_surfaceParamsDS.presMBVProcStatsBotFieldBuffer;
1024             }
1025             surfaceParams.dwOffset = m_surfaceParamsDS.dwMBVProcStatsBottomFieldOffset;
1026             surfaceParams.dwBindingTableOffset = m_dsBTIDstMbVProcBtmField;
1027             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1028                 m_hwInterface,
1029                 cmdBuffer,
1030                 &surfaceParams,
1031                 m_dsKernelState));
1032         }
1033     }
1034 
1035     return eStatus;
1036 }
1037 
GetBTCount() const1038 uint8_t CodechalEncodeCscDs::GetBTCount() const
1039 {
1040     return (uint8_t)cscNumSurfaces;
1041 }
1042 
GetCscAllocation(uint32_t & width,uint32_t & height,MOS_FORMAT & format)1043 void CodechalEncodeCscDs::GetCscAllocation(uint32_t &width, uint32_t &height, MOS_FORMAT &format)
1044 {
1045     uint32_t surfaceWidth, surfaceHeight;
1046     if (m_mode == CODECHAL_ENCODE_MODE_HEVC)
1047     {
1048         // The raw input surface to HEVC Enc should be 32 aligned because of VME hardware restriction as mentioned in DDI.
1049         surfaceWidth = MOS_ALIGN_CEIL(m_encoder->m_oriFrameWidth, 32);
1050         surfaceHeight = MOS_ALIGN_CEIL(m_encoder->m_oriFrameHeight, 32);
1051     }
1052     else
1053     {
1054         surfaceWidth = MOS_ALIGN_CEIL(m_encoder->m_frameWidth, m_rawSurfAlignment);
1055         surfaceHeight = MOS_ALIGN_CEIL(m_encoder->m_frameHeight, m_rawSurfAlignment);
1056     }
1057 
1058     if ( (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat)
1059     {
1060         //P208 is 422 8 bit planar with UV interleaved. It has the same memory layout as YUY2V
1061         format = Format_P208;
1062         width = surfaceWidth;
1063         height = surfaceHeight;
1064     }
1065     else
1066     {
1067         format = Format_NV12;
1068         width = surfaceWidth;
1069         height = surfaceHeight;
1070     }
1071 }
1072 
Initialize()1073 MOS_STATUS CodechalEncodeCscDs::Initialize()
1074 {
1075     CODECHAL_ENCODE_FUNCTION_ENTER;
1076 
1077     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1078 
1079     if (m_cscKernelUID)
1080     {
1081         uint8_t* binary;
1082         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
1083             m_kernelBase,
1084             m_cscKernelUID,
1085             &binary,
1086             &m_combinedKernelSize));
1087 
1088         CODECHAL_ENCODE_CHK_NULL_RETURN(m_kernelBase = binary);
1089 
1090         m_hwInterface->GetStateHeapSettings()->dwIshSize +=
1091             MOS_ALIGN_CEIL(m_combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
1092     }
1093 
1094     return eStatus;
1095 }
1096 
CheckCondition()1097 MOS_STATUS CodechalEncodeCscDs::CheckCondition()
1098 {
1099     CODECHAL_ENCODE_FUNCTION_ENTER;
1100 
1101     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1102 
1103     MOS_SURFACE details;
1104     MOS_ZeroMemory(&details, sizeof(details));
1105     details.Format = Format_Invalid;
1106     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetResourceInfo(m_osInterface, &m_rawSurfaceToEnc->OsResource, &details));
1107 
1108     auto cscFlagPrev = m_cscFlag;
1109     m_cscFlag = 0;
1110     // Source surface width/height for CSC must be set using the lowest value between
1111     // SequenceParametersSet width/height and real surface width/height
1112     m_cscRawSurfWidth  = MOS_MIN(details.dwWidth,  m_encoder->m_frameWidth);
1113     m_cscRawSurfHeight = MOS_MIN(details.dwHeight, m_encoder->m_frameHeight);
1114     m_colorRawSurface = cscColorNv12TileY; // by default assume NV12 Tile-Y format
1115     m_threadTraverseSizeX = 5;
1116     m_threadTraverseSizeY = 2;    // for NV12, thread space is 32x4
1117 
1118     // check raw surface's color/tile format
1119     if (!m_encoder->CheckSupportedFormat(&details))
1120     {
1121         CODECHAL_ENCODE_CHK_COND_RETURN(!m_cscEnableColor && !m_cscEnableSfc, "Input color format = %d is not supported!", details.Format);
1122         CODECHAL_ENCODE_CHK_STATUS_RETURN(CheckRawColorFormat(details.Format, details.TileType));
1123     }
1124 
1125     // check raw surface's alignment
1126     CODECHAL_ENCODE_CHK_STATUS_RETURN(CheckRawSurfaceAlignment(details));
1127 
1128     // check raw surface's MMC state
1129     if (m_cscEnableMmc)
1130     {
1131         MOS_MEMCOMP_STATE mmcState;
1132 
1133         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetMemoryCompressionMode(
1134             m_osInterface, &m_rawSurfaceToEnc->OsResource, &mmcState));
1135 
1136         // Gen9 HEVC only: HCP on SKL does not support MMC surface, invoke Ds+Copy kernel to decompress MMC surface
1137         m_cscRequireMmc = (MOS_MEMCOMP_DISABLED != mmcState);
1138     }
1139 
1140     // CSC no longer required, free existing CSC surface
1141     if (cscFlagPrev && !m_cscFlag)
1142     {
1143         m_encoder->m_trackedBuf->ResizeCsc();
1144     }
1145 
1146     if (RequireCopyOnly())
1147     {
1148         CODECHAL_ENCODE_NORMALMESSAGE("raw surf = %d x %d, tile = %d, raw color format = %d, cscRequireCopy = %d",
1149             details.dwWidth,
1150             details.dwHeight,
1151             details.TileType,
1152             details.Format,
1153             m_cscRequireCopy);
1154     }
1155     else
1156     {
1157         CODECHAL_ENCODE_NORMALMESSAGE("raw surf = %d x %d, tile = %d, color = %d, cscFlag = %d",
1158             details.dwWidth,
1159             details.dwHeight,
1160             details.TileType,
1161             m_colorRawSurface,
1162             m_cscFlag);
1163     }
1164 
1165     return eStatus;
1166 }
1167 
CheckRawSurfaceAlignment(MOS_SURFACE surface)1168 MOS_STATUS CodechalEncodeCscDs::CheckRawSurfaceAlignment(MOS_SURFACE surface)
1169 {
1170     if (m_cscEnableCopy && (surface.dwWidth % m_rawSurfAlignment || surface.dwHeight % m_rawSurfAlignment))
1171     {
1172         m_cscRequireCopy = 1;
1173     }
1174     return MOS_STATUS_SUCCESS;
1175 }
1176 
CheckReconSurfaceAlignment(PMOS_SURFACE surface)1177 MOS_STATUS CodechalEncodeCscDs::CheckReconSurfaceAlignment(PMOS_SURFACE surface)
1178 {
1179     CODECHAL_ENCODE_FUNCTION_ENTER;
1180 
1181     uint8_t alignment;
1182     if (m_standard == CODECHAL_HEVC ||
1183         m_standard == CODECHAL_VP9)
1184     {
1185         alignment = m_hcpReconSurfAlignment;
1186     }
1187     else
1188     {
1189         alignment = m_mfxReconSurfAlignment;
1190     }
1191 
1192     MOS_SURFACE resDetails;
1193     MOS_ZeroMemory(&resDetails, sizeof(resDetails));
1194     resDetails.Format = Format_Invalid;
1195     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetResourceInfo(m_osInterface, &surface->OsResource, &resDetails));
1196 
1197     if (resDetails.dwHeight % alignment)
1198     {
1199         CODECHAL_ENCODE_ASSERTMESSAGE("Recon surface alignment does not meet HW requirement!");
1200         return MOS_STATUS_INVALID_PARAMETER;
1201     }
1202 
1203     return MOS_STATUS_SUCCESS;
1204 }
1205 
CheckRawSurfaceAlignment(PMOS_SURFACE surface)1206 MOS_STATUS CodechalEncodeCscDs::CheckRawSurfaceAlignment(PMOS_SURFACE surface)
1207 {
1208     CODECHAL_ENCODE_FUNCTION_ENTER;
1209 
1210     MOS_SURFACE resDetails;
1211     MOS_ZeroMemory(&resDetails, sizeof(resDetails));
1212     resDetails.Format = Format_Invalid;
1213     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetResourceInfo(m_osInterface, &surface->OsResource, &resDetails));
1214 
1215     if (resDetails.dwHeight % m_rawSurfAlignment)
1216     {
1217         CODECHAL_ENCODE_ASSERTMESSAGE("Raw surface alignment does not meet HW requirement!");
1218         return MOS_STATUS_INVALID_PARAMETER;
1219     }
1220 
1221     return MOS_STATUS_SUCCESS;
1222 }
1223 
SetHcpReconAlignment(uint8_t alignment)1224 void CodechalEncodeCscDs::SetHcpReconAlignment(uint8_t alignment)
1225 {
1226     m_hcpReconSurfAlignment = alignment;
1227 }
1228 
WaitCscSurface(MOS_GPU_CONTEXT gpuContext,bool readOnly)1229 MOS_STATUS CodechalEncodeCscDs::WaitCscSurface(MOS_GPU_CONTEXT gpuContext, bool readOnly)
1230 {
1231     CODECHAL_ENCODE_FUNCTION_ENTER;
1232 
1233     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1234 
1235     auto syncParams = g_cInitSyncParams;
1236     syncParams.GpuContext = gpuContext;
1237     syncParams.bReadOnly = readOnly;
1238     syncParams.presSyncResource = &m_encoder->m_trackedBuf->GetCscSurface(CODEC_CURR_TRACKED_BUFFER)->OsResource;
1239 
1240     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
1241     m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams);
1242 
1243     return eStatus;
1244 }
1245 
KernelFunctions(KernelParams * params)1246 MOS_STATUS CodechalEncodeCscDs::KernelFunctions(
1247     KernelParams* params)
1248 {
1249     CODECHAL_ENCODE_FUNCTION_ENTER;
1250 
1251     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1252 
1253     bool useDsConvInCombinedKernel = m_useCommonKernel
1254         && !(CODECHAL_AVC == m_standard || CODECHAL_MPEG2 == m_standard || CODECHAL_VP8 == m_standard);
1255 
1256     // call Ds+Copy
1257     if (m_cscFlag || useDsConvInCombinedKernel)
1258     {
1259         CODECHAL_ENCODE_CHK_STATUS_RETURN(CscKernel(params));
1260     }
1261 
1262     // call 4x DS
1263     if (m_scalingEnabled && !m_currRefList->b4xScalingUsed)
1264     {
1265         params->b32xScalingInUse = false;
1266         params->b16xScalingInUse = false;
1267         CODECHAL_ENCODE_CHK_STATUS_RETURN(DsKernel(params));
1268     }
1269 
1270     // call 16x/32x DS
1271     if (m_scalingEnabled && m_16xMeSupported)
1272     {
1273         //disable csc and reset colorFormat in 16x/32x stage since their inputs are 4x/16x DS results (only Y component)
1274         if(m_cscFlag && m_encoder->m_vdencEnabled && CODECHAL_HEVC == m_standard)
1275         {
1276             m_colorRawSurface = cscColorNv12TileY;
1277             m_cscFlag = false;
1278         }
1279 
1280         // 4x downscaled images used as the input for 16x downscaling
1281         if (useDsConvInCombinedKernel)
1282         {
1283             params->stageDsConversion = dsStage16x;
1284             CODECHAL_ENCODE_CHK_STATUS_RETURN(CscKernel(params));
1285         }
1286         else
1287         {
1288             params->b16xScalingInUse = true;
1289             CODECHAL_ENCODE_CHK_STATUS_RETURN(DsKernel(params));
1290         }
1291 
1292         if (m_32xMeSupported)
1293         {
1294             // 16x downscaled images used as the input for 32x downscaling
1295             if (useDsConvInCombinedKernel)
1296             {
1297                 params->stageDsConversion = dsStage32x;
1298                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CscKernel(params));
1299             }
1300             else
1301             {
1302                 params->b32xScalingInUse = true;
1303                 params->b16xScalingInUse = false;
1304                 CODECHAL_ENCODE_CHK_STATUS_RETURN(DsKernel(params));
1305             }
1306         }
1307     }
1308 
1309     return MOS_STATUS_SUCCESS;
1310 }
1311 
CscUsingSfc(ENCODE_INPUT_COLORSPACE colorSpace)1312 MOS_STATUS CodechalEncodeCscDs::CscUsingSfc(ENCODE_INPUT_COLORSPACE colorSpace)
1313 {
1314     CODECHAL_ENCODE_FUNCTION_ENTER;
1315 
1316     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1317 
1318     // init SFC state
1319     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSfcState());
1320 
1321     // wait for raw surface on VEBox context
1322     auto syncParams = g_cInitSyncParams;
1323     syncParams.GpuContext = MOS_GPU_CONTEXT_VEBOX;
1324     syncParams.presSyncResource = &m_rawSurfaceToEnc->OsResource;
1325     syncParams.bReadOnly = true;
1326     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
1327     m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams);
1328 
1329     // allocate CSC surface (existing surfaces will be re-used when associated frame goes out of RefList)
1330     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurfaceCsc());
1331 
1332     if (m_encoder->m_trackedBuf->GetWaitCsc())
1333     {
1334         // on-demand sync for CSC surface re-use
1335         CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitCscSurface(MOS_GPU_CONTEXT_VEBOX, false));
1336     }
1337 
1338     CODECHAL_ENCODE_SFC_PARAMS sfcParams;
1339     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetParamsSfc(&sfcParams));
1340 
1341     // set-up color space
1342     switch (colorSpace)
1343     {
1344     case ECOLORSPACE_P601:
1345         m_sfcState->SetOutputColorSpace(MHW_CSpace_BT601);
1346         break;
1347     case ECOLORSPACE_P709:
1348         m_sfcState->SetOutputColorSpace(MHW_CSpace_BT709);
1349         break;
1350     case ECOLORSPACE_P2020:
1351         m_sfcState->SetOutputColorSpace(MHW_CSpace_BT2020);
1352         break;
1353     default:
1354         CODECHAL_ENCODE_ASSERTMESSAGE("Unknow input color space = %d!", colorSpace);
1355         eStatus = MOS_STATUS_INVALID_PARAMETER;
1356     }
1357 
1358     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_sfcState->SetParams(
1359         &sfcParams));
1360 
1361     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_sfcState->RenderStart(
1362         m_encoder));
1363 
1364     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesToEncPak());
1365 
1366     return eStatus;
1367 }
1368 
CscKernel(KernelParams * params)1369 MOS_STATUS CodechalEncodeCscDs::CscKernel(
1370     KernelParams* params)
1371 {
1372     CODECHAL_ENCODE_FUNCTION_ENTER;
1373 
1374     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1375 
1376     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1377 
1378     if (!m_cscKernelState)
1379     {
1380         CODECHAL_ENCODE_CHK_NULL_RETURN(m_cscKernelState = MOS_New(MHW_KERNEL_STATE));
1381 
1382         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateCsc());
1383     }
1384 
1385     // allocate CSC surface (existing surfaces will be re-used when associated frame retires from RefList)
1386     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurfaceCsc());
1387 
1388     if (m_scalingEnabled)
1389     {
1390         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->m_trackedBuf->AllocateSurfaceDS());
1391         if (m_standard == CODECHAL_VP9)
1392         {
1393             auto seqParams = (PCODEC_VP9_ENCODE_SEQUENCE_PARAMS)(m_encoder->m_encodeParams.pSeqParams);
1394             CODECHAL_ENCODE_CHK_NULL_RETURN(seqParams);
1395             if (seqParams->SeqFlags.fields.EnableDynamicScaling) {
1396                 m_encoder->m_trackedBuf->ResizeSurfaceDS();
1397             }
1398         }
1399     }
1400 
1401     if (m_2xScalingEnabled)
1402     {
1403         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->m_trackedBuf->AllocateSurface2xDS());
1404     }
1405 
1406     if (m_encoder->m_trackedBuf->GetWaitCsc())
1407     {
1408         // on-demand sync for CSC surface re-use
1409         CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitCscSurface(m_renderContext, false));
1410     }
1411 
1412     // setup kernel params
1413     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParamsCsc(params));
1414 
1415     if(m_encoder->m_vdencEnabled && CODECHAL_HEVC == m_standard)
1416     {
1417         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1418             m_surfaceParamsCsc.psInputSurface,
1419             CodechalDbgAttr::attrEncodeRawInputSurface,
1420             m_curbeParams.downscaleStage == dsStage4x ? "4xDS_Input" : (m_curbeParams.downscaleStage == dsStage16x ? "16xDS_Input" : "32xDS_Input"))));
1421     }
1422 
1423     PerfTagSetting perfTag;
1424     perfTag.Value = 0;
1425     perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
1426     perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_DS_CONVERSION_KERNEL;
1427     perfTag.PictureCodingType = m_encoder->m_pictureCodingType;
1428     m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
1429     // Each scaling kernel buffer counts as a separate perf task
1430     m_osInterface->pfnResetPerfBufferID(m_osInterface);
1431 
1432     // if Single Task Phase is not enabled, use BT count for the kernel state.
1433     if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
1434     {
1435         auto maxBtCount = m_singleTaskPhaseSupported ?
1436             m_maxBtCount : m_cscKernelState->KernelParams.iBTCount;
1437         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->RequestSshSpaceForCmdBuf(maxBtCount));
1438         m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
1439         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->VerifySpaceAvailable());
1440     }
1441 
1442     // setup CscDsCopy DSH and Interface Descriptor
1443     auto stateHeapInterface = m_renderInterface->m_stateHeapInterface;
1444     CODECHAL_ENCODE_CHK_NULL_RETURN(stateHeapInterface);
1445     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
1446         stateHeapInterface,
1447         m_cscKernelState,
1448         false,
1449         0,
1450         false,
1451         m_storeData));
1452 
1453     MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
1454     MOS_ZeroMemory(&idParams, sizeof(idParams));
1455     idParams.pKernelState = m_cscKernelState;
1456     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->SetInterfaceDescriptor(1, &idParams));
1457 
1458     // send CURBE
1459     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeCsc());
1460 
1461     if(m_encoder->m_vdencEnabled && CODECHAL_HEVC == m_standard)
1462     {
1463         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
1464                 m_curbeParams.downscaleStage == dsStage4x ? CODECHAL_MEDIA_STATE_4X_SCALING :
1465                         (m_curbeParams.downscaleStage == dsStage16x ? CODECHAL_MEDIA_STATE_16X_SCALING : CODECHAL_MEDIA_STATE_32X_SCALING),
1466                 m_cscKernelState)));
1467     }
1468 
1469     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_CSC_DS_COPY;
1470     CODECHAL_DEBUG_TOOL(
1471         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
1472             encFunctionType,
1473             m_cscKernelState));
1474 
1475         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1476             encFunctionType,
1477             MHW_DSH_TYPE,
1478             m_cscKernelState));
1479     )
1480 
1481     MOS_COMMAND_BUFFER cmdBuffer;
1482     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
1483 
1484     SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
1485     sendKernelCmdsParams.EncFunctionType = encFunctionType;
1486     sendKernelCmdsParams.pKernelState = m_cscKernelState;
1487     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
1488 
1489     // add binding table
1490     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->SetBindingTable(m_cscKernelState));
1491 
1492     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendSurfaceCsc(&cmdBuffer));
1493 
1494     CODECHAL_DEBUG_TOOL(
1495         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1496             encFunctionType,
1497             MHW_SSH_TYPE,
1498             m_cscKernelState));
1499     )
1500 
1501         // If m_pollingSyncEnabled is set, insert HW semaphore to wait for external
1502         // raw surface processing to complete, before start CSC. Once the marker in
1503         // raw surface is overwritten by external operation, HW semaphore will be
1504         // signalled and CSC will start. This is to reduce SW latency between
1505         // external raw surface processing and CSC, in usages like remote gaming.
1506         if (m_pollingSyncEnabled)
1507         {
1508             MHW_MI_SEMAPHORE_WAIT_PARAMS miSemaphoreWaitParams;
1509             MOS_ZeroMemory((&miSemaphoreWaitParams), sizeof(miSemaphoreWaitParams));
1510             miSemaphoreWaitParams.presSemaphoreMem = &m_surfaceParamsCsc.psInputSurface->OsResource;
1511             miSemaphoreWaitParams.dwResourceOffset = m_syncMarkerOffset;
1512             miSemaphoreWaitParams.bPollingWaitMode = true;
1513             miSemaphoreWaitParams.dwSemaphoreData = m_syncMarkerValue;
1514             miSemaphoreWaitParams.CompareOperation = MHW_MI_SAD_NOT_EQUAL_SDD;
1515             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiSemaphoreWaitCmd(&cmdBuffer, &miSemaphoreWaitParams));
1516         }
1517 
1518     HalOcaInterface::TraceMessage(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, __FUNCTION__, sizeof(__FUNCTION__));
1519     HalOcaInterface::OnDispatch(cmdBuffer, *m_osInterface, *m_miInterface, *m_renderInterface->GetMmioRegisters());
1520     if (!m_encoder->m_computeContextEnabled)
1521     {
1522         MHW_WALKER_PARAMS walkerParams;
1523         MOS_ZeroMemory(&walkerParams, sizeof(walkerParams));
1524         walkerParams.WalkerMode = m_walkerMode;
1525         walkerParams.UseScoreboard = m_useHwScoreboard;
1526         walkerParams.BlockResolution.x =
1527             walkerParams.GlobalResolution.x =
1528             walkerParams.GlobalOutlerLoopStride.x = m_walkerResolutionX;
1529         walkerParams.BlockResolution.y =
1530             walkerParams.GlobalResolution.y =
1531             walkerParams.GlobalInnerLoopUnit.y = m_walkerResolutionY;
1532 
1533         //MAX VALUE
1534         walkerParams.dwLocalLoopExecCount = 0xFFFF;
1535         walkerParams.dwGlobalLoopExecCount = 0xFFFF;
1536 
1537         // Raster scan walking pattern
1538         walkerParams.LocalOutLoopStride.y = 1;
1539         walkerParams.LocalInnerLoopUnit.x = 1;
1540         walkerParams.LocalEnd.x = m_walkerResolutionX - 1;
1541 
1542         if (m_groupIdSelectSupported)
1543         {
1544             walkerParams.GroupIdLoopSelect = m_groupId;
1545         }
1546 
1547         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderInterface->AddMediaObjectWalkerCmd(&cmdBuffer, &walkerParams));
1548     }
1549     else
1550     {
1551         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetWalkerCmd(&cmdBuffer, m_cscKernelState));
1552     }
1553 
1554     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->EndStatusReport(&cmdBuffer, encFunctionType));
1555 
1556     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->SubmitBlocks(m_cscKernelState));
1557 
1558     // If m_pollingSyncEnabled is set, write the marker to source surface for next MI_SEMAPHORE_WAIT to check.
1559     if (m_pollingSyncEnabled)
1560     {
1561         MHW_MI_STORE_DATA_PARAMS storeDataParams;
1562         storeDataParams.pOsResource      = &m_surfaceParamsCsc.psInputSurface->OsResource;
1563         storeDataParams.dwResourceOffset = m_syncMarkerOffset;
1564         storeDataParams.dwValue          = m_syncMarkerValue;
1565         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
1566     }
1567 
1568     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1569     {
1570         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->UpdateGlobalCmdBufId());
1571         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
1572     }
1573 
1574     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
1575         &cmdBuffer,
1576         encFunctionType,
1577         nullptr)));
1578 
1579     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(
1580         &cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
1581 
1582     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
1583 
1584     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1585     {
1586         HalOcaInterface::On1stLevelBBEnd(cmdBuffer, *m_osInterface);
1587         m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
1588         m_lastTaskInPhase = false;
1589     }
1590 
1591     if (dsDisabled == params->stageDsConversion && !(m_encoder->m_vdencEnabled && CODECHAL_HEVC == m_standard))
1592     {
1593         // send appropriate surface to Enc/Pak depending on different CSC operation type
1594         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesToEncPak());
1595     }
1596 
1597     return eStatus;
1598 }
1599 
DsKernel(KernelParams * params)1600 MOS_STATUS CodechalEncodeCscDs::DsKernel(
1601     KernelParams* params)
1602 {
1603     CODECHAL_ENCODE_FUNCTION_ENTER;
1604 
1605     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1606 
1607     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1608 
1609     if (!m_firstField)
1610     {
1611         // Both fields are scaled when the first field comes in, no need to scale again
1612         return eStatus;
1613     }
1614 
1615     if (!m_dsKernelState)
1616     {
1617         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateDS());
1618     }
1619 
1620     if (m_scalingEnabled)
1621     {
1622         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->m_trackedBuf->AllocateSurfaceDS());
1623         if (m_standard == CODECHAL_VP9)
1624         {
1625             auto seqParams = (PCODEC_VP9_ENCODE_SEQUENCE_PARAMS)(m_encoder->m_encodeParams.pSeqParams);
1626             CODECHAL_ENCODE_CHK_NULL_RETURN(seqParams);
1627             if (seqParams->SeqFlags.fields.EnableDynamicScaling) {
1628                 m_encoder->m_trackedBuf->ResizeSurfaceDS();
1629             }
1630         }
1631     }
1632 
1633     if (m_2xScalingEnabled)
1634     {
1635         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->m_trackedBuf->AllocateSurface2xDS());
1636     }
1637 
1638     PerfTagSetting perfTag;
1639     perfTag.Value = 0;
1640     perfTag.Mode = m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
1641     perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL;
1642     perfTag.PictureCodingType = m_encoder->m_pictureCodingType;
1643     m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
1644     m_osInterface->pfnIncPerfBufferID(m_osInterface);
1645     // Each scaling kernel buffer counts as a separate perf task
1646     m_osInterface->pfnResetPerfBufferID(m_osInterface);
1647 
1648     bool fieldPicture = CodecHal_PictureIsField(m_encoder->m_currOriginalPic);
1649     m_dsKernelState = params->b32xScalingInUse ?
1650         &m_encoder->m_scaling2xKernelStates[fieldPicture] :
1651         &m_encoder->m_scaling4xKernelStates[fieldPicture];
1652 
1653     // If Single Task Phase is not enabled, use BT count for the kernel state.
1654     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
1655     {
1656         auto maxBtCount = m_singleTaskPhaseSupported ?
1657             m_maxBtCount : m_dsKernelState->KernelParams.iBTCount;
1658         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->RequestSshSpaceForCmdBuf(maxBtCount));
1659         m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
1660         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->VerifySpaceAvailable());
1661     }
1662 
1663     //Setup Scaling DSH
1664     auto stateHeapInterface = m_renderInterface->m_stateHeapInterface;
1665     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
1666         stateHeapInterface,
1667         m_dsKernelState,
1668         false,
1669         0,
1670         false,
1671         m_storeData));
1672 
1673     MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
1674     MOS_ZeroMemory(&idParams, sizeof(idParams));
1675     idParams.pKernelState = m_dsKernelState;
1676     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->SetInterfaceDescriptor(1, &idParams));
1677 
1678     uint32_t downscaledWidthInMb, downscaledHeightInMb;
1679     uint32_t inputFrameWidth, inputFrameHeight;
1680 
1681     if (params->b32xScalingInUse)
1682     {
1683         downscaledWidthInMb = m_downscaledWidth32x / CODECHAL_MACROBLOCK_WIDTH;
1684         downscaledHeightInMb = m_downscaledHeight32x / CODECHAL_MACROBLOCK_HEIGHT;
1685         if (fieldPicture)
1686         {
1687             downscaledHeightInMb = (downscaledHeightInMb + 1) >> 1 << 1;
1688         }
1689 
1690         inputFrameWidth = m_downscaledWidth16x;
1691         inputFrameHeight = m_downscaledHeight16x;
1692 
1693         m_lastTaskInPhase = params->bLastTaskInPhase32xDS;
1694         m_currRefList->b32xScalingUsed = true;
1695     }
1696     else if (params->b16xScalingInUse)
1697     {
1698         downscaledWidthInMb = m_downscaledWidth16x / CODECHAL_MACROBLOCK_WIDTH;
1699         downscaledHeightInMb = m_downscaledHeight16x / CODECHAL_MACROBLOCK_HEIGHT;
1700         if (fieldPicture)
1701         {
1702             downscaledHeightInMb = (downscaledHeightInMb + 1) >> 1 << 1;
1703         }
1704 
1705         inputFrameWidth = m_downscaledWidth4x;
1706         inputFrameHeight = m_downscaledHeight4x;
1707 
1708         m_lastTaskInPhase = params->bLastTaskInPhase16xDS;
1709         m_currRefList->b16xScalingUsed = true;
1710     }
1711     else
1712     {
1713         downscaledWidthInMb = m_downscaledWidth4x / CODECHAL_MACROBLOCK_WIDTH;
1714         downscaledHeightInMb = m_downscaledHeight4x / CODECHAL_MACROBLOCK_HEIGHT;
1715         if (fieldPicture)
1716         {
1717             downscaledHeightInMb = (downscaledHeightInMb + 1) >> 1 << 1;
1718         }
1719 
1720         inputFrameWidth = m_encoder->m_oriFrameWidth;
1721         inputFrameHeight = m_encoder->m_oriFrameHeight;
1722 
1723         m_lastTaskInPhase = params->bLastTaskInPhase4xDS;
1724         m_currRefList->b4xScalingUsed = true;
1725     }
1726 
1727     CODEC_PICTURE originalPic = (params->bRawInputProvided) ? params->inputPicture : m_encoder->m_currOriginalPic;
1728     FeiPreEncParams *preEncParams = nullptr;
1729     if (m_encoder->m_codecFunction == CODECHAL_FUNCTION_FEI_PRE_ENC)
1730     {
1731         preEncParams = (FeiPreEncParams*)m_encoder->m_encodeParams.pPreEncParams;
1732         CODECHAL_ENCODE_CHK_NULL_RETURN(preEncParams);
1733     }
1734 
1735     bool scaling4xInUse = !(params->b32xScalingInUse || params->b16xScalingInUse);
1736     m_curbeParams.pKernelState = m_dsKernelState;
1737     m_curbeParams.dwInputPictureWidth = inputFrameWidth;
1738     m_curbeParams.dwInputPictureHeight = inputFrameHeight;
1739     m_curbeParams.b16xScalingInUse = params->b16xScalingInUse;
1740     m_curbeParams.b32xScalingInUse = params->b32xScalingInUse;
1741     m_curbeParams.bFieldPicture = fieldPicture;
1742     // Enable flatness check only for 4x scaling.
1743     m_curbeParams.bFlatnessCheckEnabled = scaling4xInUse && m_flatnessCheckEnabled;
1744     m_curbeParams.bMBVarianceOutputEnabled = m_curbeParams.bMBPixelAverageOutputEnabled =
1745         preEncParams ? !preEncParams->bDisableStatisticsOutput : scaling4xInUse && m_mbStatsEnabled;
1746     m_curbeParams.bBlock8x8StatisticsEnabled = preEncParams ? preEncParams->bEnable8x8Statistics : false;
1747 
1748     if (params->b32xScalingInUse)
1749     {
1750         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeDS2x());
1751     }
1752     else
1753     {
1754         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeDS4x());
1755     }
1756 
1757     auto encFunctionType = params->b32xScalingInUse ? CODECHAL_MEDIA_STATE_32X_SCALING :
1758         (params->b16xScalingInUse ? CODECHAL_MEDIA_STATE_16X_SCALING : CODECHAL_MEDIA_STATE_4X_SCALING);
1759     CODECHAL_DEBUG_TOOL(
1760         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1761             encFunctionType,
1762             MHW_DSH_TYPE,
1763             m_dsKernelState));
1764     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
1765         encFunctionType,
1766         m_dsKernelState));
1767     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1768         encFunctionType,
1769         MHW_ISH_TYPE,
1770         m_dsKernelState));
1771     )
1772 
1773     MOS_COMMAND_BUFFER cmdBuffer;
1774     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
1775 
1776     SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
1777     sendKernelCmdsParams.EncFunctionType = encFunctionType;
1778     sendKernelCmdsParams.pKernelState = m_dsKernelState;
1779     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
1780 
1781     // Add binding table
1782     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->SetBindingTable(m_dsKernelState));
1783     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfaceParamsDS(params));
1784     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendSurfaceDS(&cmdBuffer));
1785 
1786     // Add dump for scaling surface state heap here
1787     CODECHAL_DEBUG_TOOL(
1788         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1789             encFunctionType,
1790             MHW_SSH_TYPE,
1791             m_dsKernelState));
1792     )
1793 
1794     uint32_t resolutionX, resolutionY;
1795     if (params->b32xScalingInUse)
1796     {
1797         resolutionX = downscaledWidthInMb;
1798         resolutionY = downscaledHeightInMb;
1799     }
1800     else
1801     {
1802         resolutionX = downscaledWidthInMb * 2; /* looping for Walker is needed at 8x8 block level */
1803         resolutionY = downscaledHeightInMb * 2;
1804         if (fieldPicture && (m_encoder->m_codecFunction == CODECHAL_FUNCTION_FEI_PRE_ENC))
1805         {
1806             resolutionY = MOS_ALIGN_CEIL(downscaledHeightInMb, 2) * 2;
1807         }
1808     }
1809 
1810     MHW_WALKER_PARAMS walkerParams;
1811     MOS_ZeroMemory(&walkerParams, sizeof(MHW_WALKER_PARAMS));
1812     walkerParams.WalkerMode = m_walkerMode;
1813     walkerParams.BlockResolution.x =
1814     walkerParams.GlobalResolution.x =
1815     walkerParams.GlobalOutlerLoopStride.x = resolutionX;
1816     walkerParams.BlockResolution.y =
1817     walkerParams.GlobalResolution.y =
1818     walkerParams.GlobalInnerLoopUnit.y = resolutionY;
1819     walkerParams.dwLocalLoopExecCount = 0xFFFF;  //MAX VALUE
1820     walkerParams.dwGlobalLoopExecCount = 0xFFFF;  //MAX VALUE
1821 
1822     // Raster scan walking pattern
1823     walkerParams.LocalOutLoopStride.y = 1;
1824     walkerParams.LocalInnerLoopUnit.x = 1;
1825     walkerParams.LocalEnd.x = resolutionX - 1;
1826 
1827     if (m_groupIdSelectSupported)
1828     {
1829         walkerParams.GroupIdLoopSelect = m_groupId;
1830     }
1831 
1832     HalOcaInterface::TraceMessage(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, __FUNCTION__, sizeof(__FUNCTION__));
1833     HalOcaInterface::OnDispatch(cmdBuffer, *m_osInterface, *m_miInterface, *m_renderInterface->GetMmioRegisters());
1834 
1835     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderInterface->AddMediaObjectWalkerCmd(&cmdBuffer, &walkerParams));
1836 
1837     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->EndStatusReport(&cmdBuffer, encFunctionType));
1838 
1839     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->SubmitBlocks(m_dsKernelState));
1840 
1841     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1842     {
1843         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->UpdateGlobalCmdBufId());
1844         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
1845     }
1846 
1847     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
1848         &cmdBuffer,
1849         encFunctionType,
1850         nullptr)));
1851 
1852     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(
1853         &cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
1854 
1855     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
1856 
1857     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1858     {
1859         HalOcaInterface::On1stLevelBBEnd(cmdBuffer, *m_osInterface);
1860         m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
1861         m_lastTaskInPhase = false;
1862     }
1863 
1864     return eStatus;
1865 }
1866 
RawSurfaceMediaCopy(MOS_FORMAT srcFormat)1867 MOS_STATUS CodechalEncodeCscDs::RawSurfaceMediaCopy(MOS_FORMAT srcFormat)
1868 {
1869     CODECHAL_ENCODE_FUNCTION_ENTER;
1870 
1871     // Call m_hwInterface->CreateMediaCopy directly for legacy code
1872     if (nullptr == m_mediaCopyBaseState)
1873     {
1874         m_mediaCopyBaseState = m_hwInterface->CreateMediaCopy(m_osInterface);
1875     }
1876     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mediaCopyBaseState);
1877 
1878     // Call raw surface Copy function
1879     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurfaceCopy(srcFormat));
1880 
1881     auto cscSurface = m_encoder->m_trackedBuf->GetCscSurface(CODEC_CURR_TRACKED_BUFFER);
1882 
1883     // Copy through VEBOX from Linear/TileY to TileY
1884     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mediaCopyBaseState->SurfaceCopy(
1885         &m_rawSurfaceToEnc->OsResource,
1886         &cscSurface->OsResource,
1887         MCPY_METHOD_BALANCE));
1888 
1889     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesToEncPak());
1890 
1891     return MOS_STATUS_SUCCESS;
1892 }
1893 
SetHevcCscFlagAndRawColor()1894 MOS_STATUS CodechalEncodeCscDs::SetHevcCscFlagAndRawColor()
1895 {
1896     CODECHAL_ENCODE_FUNCTION_ENTER;
1897 
1898     if(m_rawSurfaceToEnc->Format != Format_NV12 && CheckRawColorFormat(m_rawSurfaceToEnc->Format, m_rawSurfaceToEnc->TileType) == MOS_STATUS_SUCCESS)
1899     {
1900         m_cscFlag = true;
1901     }
1902 
1903     return MOS_STATUS_SUCCESS;
1904 }
1905 
CodechalEncodeCscDs(CodechalEncoderState * encoder)1906 CodechalEncodeCscDs::CodechalEncodeCscDs(CodechalEncoderState *encoder)
1907     : m_useRawForRef(encoder->m_useRawForRef),
1908       m_useCommonKernel(encoder->m_useCommonKernel),
1909       m_useHwScoreboard(encoder->m_useHwScoreboard),
1910       m_renderContextUsesNullHw(encoder->m_renderContextUsesNullHw),
1911       m_groupIdSelectSupported(encoder->m_groupIdSelectSupported),
1912       m_16xMeSupported(encoder->m_16xMeSupported),
1913       m_32xMeSupported(encoder->m_32xMeSupported),
1914       m_scalingEnabled(encoder->m_scalingEnabled),
1915       m_2xScalingEnabled(encoder->m_2xScalingEnabled),
1916       m_firstField(encoder->m_firstField),
1917       m_fieldScalingOutputInterleaved(encoder->m_fieldScalingOutputInterleaved),
1918       m_flatnessCheckEnabled(encoder->m_flatnessCheckEnabled),
1919       m_mbStatsEnabled(encoder->m_mbStatsEnabled),
1920       m_mbStatsSupported(encoder->m_mbStatsSupported),
1921       m_singleTaskPhaseSupported(encoder->m_singleTaskPhaseSupported),
1922       m_firstTaskInPhase(encoder->m_firstTaskInPhase),
1923       m_lastTaskInPhase(encoder->m_lastTaskInPhase),
1924       m_pollingSyncEnabled(encoder->m_pollingSyncEnabled),
1925       m_groupId(encoder->m_groupId),
1926       m_outputChromaFormat(encoder->m_outputChromaFormat),
1927       m_standard(encoder->m_standard),
1928       m_mode(encoder->m_mode),
1929       m_downscaledWidth4x(encoder->m_downscaledWidth4x),
1930       m_downscaledHeight4x(encoder->m_downscaledHeight4x),
1931       m_downscaledWidth16x(encoder->m_downscaledWidth16x),
1932       m_downscaledHeight16x(encoder->m_downscaledHeight16x),
1933       m_downscaledWidth32x(encoder->m_downscaledWidth32x),
1934       m_downscaledHeight32x(encoder->m_downscaledHeight32x),
1935       m_scaledBottomFieldOffset(encoder->m_scaledBottomFieldOffset),
1936       m_scaled16xBottomFieldOffset(encoder->m_scaled16xBottomFieldOffset),
1937       m_scaled32xBottomFieldOffset(encoder->m_scaled32xBottomFieldOffset),
1938       m_mbVProcStatsBottomFieldOffset(encoder->m_mbvProcStatsBottomFieldOffset),
1939       m_mbStatsBottomFieldOffset(encoder->m_mbStatsBottomFieldOffset),
1940       m_flatnessCheckBottomFieldOffset(encoder->m_flatnessCheckBottomFieldOffset),
1941       m_verticalLineStride(encoder->m_verticalLineStride),
1942       m_maxBtCount(encoder->m_maxBtCount),
1943       m_vmeStatesSize(encoder->m_vmeStatesSize),
1944       m_storeData(encoder->m_storeData),
1945       m_syncMarkerOffset(encoder->m_syncMarkerOffset),
1946       m_syncMarkerValue(encoder->m_syncMarkerValue),
1947       m_renderContext(encoder->m_renderContext),
1948       m_walkerMode(encoder->m_walkerMode),
1949       m_currRefList(encoder->m_currRefList),
1950       m_resMbStatsBuffer(encoder->m_resMbStatsBuffer),
1951       m_rawSurfaceToEnc(encoder->m_rawSurfaceToEnc),
1952       m_rawSurfaceToPak(encoder->m_rawSurfaceToPak)
1953 {
1954     // Initilize interface pointers
1955     m_encoder = encoder;
1956     m_osInterface = encoder->GetOsInterface();
1957     m_hwInterface = encoder->GetHwInterface();
1958     m_debugInterface = encoder->GetDebugInterface();
1959     m_miInterface = m_hwInterface->GetMiInterface();
1960     m_renderInterface = m_hwInterface->GetRenderInterface();
1961     m_stateHeapInterface = m_renderInterface->m_stateHeapInterface->pStateHeapInterface;
1962 
1963     m_cscFlag = m_cscDsConvEnable = 0;
1964 
1965     m_dsBTCount[0] = ds4xNumSurfaces;
1966     m_dsBTCount[1] = ds2xNumSurfaces;
1967     m_dsCurbeLength[0] = sizeof(Ds4xKernelCurbeData);
1968     m_dsCurbeLength[1] = sizeof(Ds2xKernelCurbeData);
1969     m_dsInlineDataLength = sizeof(DsKernelInlineData);
1970 }
1971 
~CodechalEncodeCscDs()1972 CodechalEncodeCscDs::~CodechalEncodeCscDs()
1973 {
1974     MOS_Delete(m_cscKernelState);
1975     MOS_Delete(m_sfcState);
1976     MOS_Delete(m_mediaCopyBaseState);
1977 }
1978