1 /*
2 * Copyright (c) 2017-2020, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     codechal_encode_csc_ds_g12.cpp
24 //! \brief    This file implements the Csc+Ds feature for all codecs on Gen12 platform
25 //!
26 
27 #include "codechal_encoder_base.h"
28 #include "codechal_encode_csc_ds_g12.h"
29 #include "codechal_encode_sfc_g12.h"
30 #include "codechal_kernel_header_g12.h"
31 #include "codeckrnheader.h"
32 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
33 #include "igcodeckrn_g12.h"
34 #endif
35 #if USE_CODECHAL_DEBUG_TOOL
36 #include "codechal_debug_encode_par_g12.h"
37 #endif
38 
GetBTCount() const39 uint8_t CodechalEncodeCscDsG12::GetBTCount() const
40 {
41     return (uint8_t)cscNumSurfaces;
42 }
43 
AllocateSurfaceCsc()44 MOS_STATUS CodechalEncodeCscDsG12::AllocateSurfaceCsc()
45 {
46     CODECHAL_ENCODE_FUNCTION_ENTER;
47 
48     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
49 
50     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeCscDs::AllocateSurfaceCsc());
51 
52     MEDIA_WA_TABLE* waTable = m_osInterface->pfnGetWaTable(m_osInterface);
53     uint32_t memType = (MEDIA_IS_WA(waTable, WaForceAllocateLML4)) ? MOS_MEMPOOL_DEVICEMEMORY : 0;
54 
55     // allocate the MbStats surface
56     if (Mos_ResourceIsNull(&m_resMbStatsBuffer))
57     {
58         MOS_ALLOC_GFXRES_PARAMS    allocParamsForBufferLinear;
59         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
60         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
61         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
62         allocParamsForBufferLinear.Format = Format_Buffer;
63         uint32_t alignedWidth = MOS_ALIGN_CEIL(CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_encoder->m_oriFrameWidth), 64);
64         uint32_t alignedHeight = MOS_ALIGN_CEIL(CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_encoder->m_oriFrameHeight), 64);
65         allocParamsForBufferLinear.dwBytes = m_hwInterface->m_avcMbStatBufferSize =
66             MOS_ALIGN_CEIL((alignedWidth * alignedHeight << 6) , 1024);
67         allocParamsForBufferLinear.pBufName = "MB Statistics Buffer";
68         allocParamsForBufferLinear.dwMemType = memType;
69 
70         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
71             m_osInterface,
72             &allocParamsForBufferLinear,
73             &m_resMbStatsBuffer), "Failed to allocate  MB Statistics Buffer.");
74     }
75 
76     return eStatus;
77 }
78 
CheckRawColorFormat(MOS_FORMAT format,MOS_TILE_TYPE tileType)79 MOS_STATUS CodechalEncodeCscDsG12::CheckRawColorFormat(MOS_FORMAT format, MOS_TILE_TYPE tileType)
80 {
81     CODECHAL_ENCODE_FUNCTION_ENTER;
82 
83     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
84 
85     // check input color format, and set target traverse thread space size
86     switch (format)
87     {
88     case Format_NV12:
89         m_colorRawSurface = cscColorNv12Linear;
90         m_cscRequireColor = 1;
91         break;
92     case Format_YUY2:
93     case Format_YUYV:
94         m_colorRawSurface = cscColorYUY2;
95         m_cscRequireColor = (uint8_t)HCP_CHROMA_FORMAT_YUV420 == m_outputChromaFormat;
96         m_cscRequireConvTo8bPlanar = (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat;
97         break;
98     case Format_A8R8G8B8:
99     case Format_X8R8G8B8:
100         m_colorRawSurface = cscColorARGB;
101         m_cscUsingSfc = IsSfcEnabled() ? 1 : 0;
102         m_cscRequireColor = 1;
103         //Use EU for better performance in big resolution cases
104         if (m_cscRawSurfWidth * m_cscRawSurfHeight > 1920 * 1088
105             && !MEDIA_IS_WA(m_hwInterface->GetWaTable(), Wa_1409932735))
106         {
107             m_cscUsingSfc = 0;
108         }
109         break;
110     case Format_A8B8G8R8:
111         m_colorRawSurface = cscColorABGR;
112         m_cscRequireColor = 1;
113         break;
114     case Format_P010:
115     case Format_P016:
116         m_colorRawSurface = cscColorP010;
117         m_cscRequireConvTo8bPlanar = 1;
118         break;
119     case Format_Y210:
120         if (m_encoder->m_vdencEnabled && MEDIA_IS_WA(m_encoder->m_waTable, WaHEVCVDEncY210LinearInputNotSupported))
121         {
122             if (tileType == MOS_TILE_Y)
123             {
124                 m_colorRawSurface = cscColorY210;
125                 m_cscRequireConvTo8bPlanar = 1;
126             }
127             else
128             {
129                 CODECHAL_ENCODE_ASSERTMESSAGE("Input color format Y210 Linear not yet supported!");
130                 eStatus = MOS_STATUS_PLATFORM_NOT_SUPPORTED;
131             }
132         }
133         else
134         {
135             m_colorRawSurface = cscColorY210;
136             m_cscRequireConvTo8bPlanar = 1;
137         }
138         break;
139     case Format_Y216:
140         m_colorRawSurface = cscColorY210;
141         m_cscRequireConvTo8bPlanar = 1;
142         break;
143     case Format_AYUV:
144         if (m_encoder->m_vdencEnabled)
145         {
146             m_colorRawSurface = cscColorAYUV;
147             m_cscRequireColor = 1;
148             break;
149         }
150     case Format_R10G10B10A2:
151         if (m_encoder->m_vdencEnabled)
152         {
153             m_colorRawSurface = cscColorARGB10;
154             break;
155         }
156     case Format_B10G10R10A2:
157         if (m_encoder->m_vdencEnabled)
158         {
159             m_colorRawSurface = cscColorABGR10;
160             break;
161         }
162     case Format_Y410:
163         if (m_encoder->m_vdencEnabled)
164         {
165             m_colorRawSurface = cscColorY410;
166             break;
167         }
168     case Format_YVYU:
169         if (m_encoder->m_vdencEnabled)
170         {
171             m_colorRawSurface = cscColorYUY2;
172             m_cscRequireColor = (uint8_t)HCP_CHROMA_FORMAT_YUV420 == m_outputChromaFormat;
173             m_cscRequireConvTo8bPlanar = (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat;
174             break;
175         }
176     case Format_P210:
177         // not supported yet so fall-thru to default
178         m_colorRawSurface = cscColorP210;
179         m_cscRequireConvTo8bPlanar = 1;
180     default:
181         CODECHAL_ENCODE_ASSERTMESSAGE("Input color format = %d not yet supported!", format);
182         eStatus = MOS_STATUS_INVALID_PARAMETER;
183         break;
184     }
185 
186     return eStatus;
187 }
188 
InitKernelStateCsc()189 MOS_STATUS CodechalEncodeCscDsG12::InitKernelStateCsc()
190 {
191     CODECHAL_ENCODE_FUNCTION_ENTER;
192 
193     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
194 
195     CODECHAL_KERNEL_HEADER currKrnHeader;
196     auto kernelSize = m_combinedKernelSize;
197     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG12(
198         m_kernelBase,
199         ENC_SCALING_CONVERSION,
200         0,
201         &currKrnHeader,
202         &kernelSize));
203 
204     m_cscKernelState->KernelParams.iBTCount = cscNumSurfaces;
205     m_cscKernelState->KernelParams.iThreadCount = m_hwInterface->GetRenderInterface()->GetHwCaps()->dwMaxThreads;
206     m_cscKernelState->KernelParams.iCurbeLength = m_cscCurbeLength;
207     m_cscKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
208     m_cscKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
209     m_cscKernelState->KernelParams.iIdCount = 1;
210     m_cscKernelState->KernelParams.iInlineDataLength = m_cscCurbeLength;
211     m_cscKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
212     m_cscKernelState->KernelParams.pBinary =
213         m_kernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
214     m_cscKernelState->KernelParams.iSize = kernelSize;
215 
216     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
217         m_cscKernelState->KernelParams.iBTCount,
218         &m_cscKernelState->dwSshSize,
219         &m_cscKernelState->dwBindingTableSize));
220 
221     CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
222     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_cscKernelState));
223 
224     m_maxBtCount += MOS_ALIGN_CEIL(cscNumSurfaces,m_renderInterface->m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment());
225 
226     return eStatus;
227 }
228 
SetKernelParamsCsc(KernelParams * params)229 MOS_STATUS CodechalEncodeCscDsG12::SetKernelParamsCsc(KernelParams* params)
230 {
231     CODECHAL_ENCODE_FUNCTION_ENTER;
232 
233     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
234 
235     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
236 
237     m_lastTaskInPhase = params->bLastTaskInPhaseCSC;
238 
239     auto inputFrameWidth = m_encoder->m_frameWidth;
240     auto inputFrameHeight = m_encoder->m_frameHeight;
241     auto inputSurface = m_rawSurfaceToEnc;
242     auto output4xDsSurface = m_encoder->m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
243     auto output2xDsSurface = m_encoder->m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER);
244     auto mbStatsSurface = &m_resMbStatsBuffer;
245 
246     m_curbeParams.bHevcEncHistorySum = false;
247     m_surfaceParamsCsc.hevcExtParams = nullptr;
248 
249     if (dsDisabled == params->stageDsConversion)
250     {
251         m_curbeParams.bConvertFlag = m_cscFlag != 0;
252 
253         if (m_2xScalingEnabled && m_scalingEnabled)
254         {
255             m_curbeParams.downscaleStage = dsStage2x4x;
256             m_currRefList->b4xScalingUsed =
257             m_currRefList->b2xScalingUsed = true;
258             m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
259             m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
260         }
261         else if (m_2xScalingEnabled)
262         {
263             m_curbeParams.downscaleStage = dsStage2x;
264             m_currRefList->b2xScalingUsed = true;
265             output4xDsSurface = nullptr;
266             mbStatsSurface = nullptr;
267             m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = true;
268             m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
269         }
270         else if (m_scalingEnabled)
271         {
272             m_curbeParams.downscaleStage = dsStage4x;
273             m_currRefList->b4xScalingUsed = true;
274             output2xDsSurface = nullptr;
275             m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
276             m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = true;
277         }
278         else
279         {
280             // do CSC only
281             m_curbeParams.downscaleStage = dsDisabled;
282             output4xDsSurface = nullptr;
283             output2xDsSurface = nullptr;
284             mbStatsSurface = nullptr;
285             m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
286             m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
287         }
288 
289         // history sum to be enabled only for the 4x stage
290         if (params->hevcExtParams)
291         {
292             auto hevcExtParam = (HevcExtKernelParams*)params->hevcExtParams;
293             m_curbeParams.bUseLCU32 = hevcExtParam->bUseLCU32;
294             m_curbeParams.bHevcEncHistorySum = hevcExtParam->bHevcEncHistorySum;
295             m_surfaceParamsCsc.hevcExtParams = params->hevcExtParams;
296         }
297     }
298     else
299     {
300         // do 16x/32x downscaling
301         m_curbeParams.bConvertFlag = false;
302         mbStatsSurface = nullptr;
303 
304         if (dsStage16x == params->stageDsConversion)
305         {
306             m_currRefList->b16xScalingUsed = true;
307             m_lastTaskInPhase = params->bLastTaskInPhase16xDS;
308             m_curbeParams.downscaleStage = dsStage16x;
309             inputFrameWidth = m_encoder->m_downscaledWidth4x << 2;
310             inputFrameHeight = m_encoder->m_downscaledHeight4x << 2;
311 
312             inputSurface = m_encoder->m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
313             output4xDsSurface = m_encoder->m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
314             output2xDsSurface = nullptr;
315             m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
316             m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = true;
317         }
318         else if (dsStage32x == params->stageDsConversion)
319         {
320             m_currRefList->b32xScalingUsed = true;
321             m_lastTaskInPhase = params->bLastTaskInPhase32xDS;
322             m_curbeParams.downscaleStage = dsStage2x;
323             inputFrameWidth = m_encoder->m_downscaledWidth16x;
324             inputFrameHeight = m_encoder->m_downscaledHeight16x;
325 
326             inputSurface = m_encoder->m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
327             output4xDsSurface = nullptr;
328             output2xDsSurface = m_encoder->m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER);
329             m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = true;
330             m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
331         }
332     }
333 
334     // setup Curbe
335     m_curbeParams.dwInputPictureWidth = inputFrameWidth;
336     m_curbeParams.dwInputPictureHeight = inputFrameHeight;
337     m_curbeParams.bFlatnessCheckEnabled = m_flatnessCheckEnabled;
338     m_curbeParams.bMBVarianceOutputEnabled = m_mbStatsEnabled;
339     m_curbeParams.bMBPixelAverageOutputEnabled = m_mbStatsEnabled;
340     m_curbeParams.bCscOrCopyOnly = !m_scalingEnabled || params->cscOrCopyOnly;
341     m_curbeParams.inputColorSpace = params->inputColorSpace;
342 
343     // setup surface states
344     m_surfaceParamsCsc.psInputSurface = inputSurface;
345     m_surfaceParamsCsc.psOutputCopiedSurface = m_curbeParams.bConvertFlag ? m_encoder->m_trackedBuf->GetCscSurface(CODEC_CURR_TRACKED_BUFFER) : nullptr;
346     m_surfaceParamsCsc.psOutput4xDsSurface = output4xDsSurface;
347     m_surfaceParamsCsc.psOutput2xDsSurface = output2xDsSurface;
348     m_surfaceParamsCsc.presMBVProcStatsBuffer = mbStatsSurface;
349     m_surfaceParamsCsc.hevcExtParams = params->hevcExtParams;
350 
351     if (dsStage16x == params->stageDsConversion)
352     {
353         // here to calculate the walker resolution, we need to use the input surface resolution.
354         // it is inputFrameWidth/height / 4 in 16xStage, becasue kernel internally will do this.
355         inputFrameWidth = inputFrameWidth >> 2;
356         inputFrameHeight = inputFrameHeight >> 2;
357     }
358 
359     // setup walker param
360     m_walkerResolutionX = CODECHAL_GET_4xDS_SIZE_32ALIGNED(inputFrameWidth) >> 3;
361     m_walkerResolutionY = CODECHAL_GET_4xDS_SIZE_32ALIGNED(inputFrameHeight) >> 3;
362 
363     return eStatus;
364 }
365 
SetCurbeCsc()366 MOS_STATUS CodechalEncodeCscDsG12::SetCurbeCsc()
367 {
368     CODECHAL_ENCODE_FUNCTION_ENTER;
369 
370     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
371 
372     CscKernelCurbeData curbe;
373 
374     curbe.DW0_OutputBitDepthForChroma = m_curbeParams.ucEncBitDepthChroma;
375     curbe.DW0_OutputBitDepthForLuma = m_curbeParams.ucEncBitDepthLuma;
376     curbe.DW0_RoundingEnable = 1;
377 
378     if (m_colorRawSurface == cscColorABGR || m_colorRawSurface == cscColorABGR10)
379     {
380         curbe.DW1_PictureFormat = (uint8_t)((m_colorRawSurface == cscColorABGR) ? cscColorARGB : cscColorARGB10); // Use cscColorARGB for ABGR CSC, just switch B and R coefficients
381     }
382     else
383     {
384         curbe.DW1_PictureFormat = (uint8_t)m_colorRawSurface;
385     }
386 
387     curbe.DW1_ConvertFlag = m_curbeParams.bConvertFlag;
388     curbe.DW1_DownscaleStage = (uint8_t)m_curbeParams.downscaleStage;
389     curbe.DW1_MbStatisticsDumpFlag = (m_curbeParams.downscaleStage == dsStage4x || m_curbeParams.downscaleStage == dsStage2x4x);
390     curbe.DW1_YUY2ConversionFlag = (m_colorRawSurface == cscColorYUY2) && m_cscRequireColor;
391     curbe.DW1_HevcEncHistorySum = m_curbeParams.bHevcEncHistorySum;
392     curbe.DW1_LCUSize = m_curbeParams.bUseLCU32;
393 
394     curbe.DW2_OriginalPicWidthInSamples = m_curbeParams.dwInputPictureWidth;
395     curbe.DW2_OriginalPicHeightInSamples = m_curbeParams.dwInputPictureHeight;
396 
397     // RGB->YUV CSC coefficients
398     if (m_curbeParams.inputColorSpace == ECOLORSPACE_P709)
399     {
400         curbe.DW4_CSC_Coefficient_C0 = 0xFFCD;
401         curbe.DW5_CSC_Coefficient_C3 = 0x0080;
402         curbe.DW6_CSC_Coefficient_C4 = 0x004F;
403         curbe.DW7_CSC_Coefficient_C7 = 0x0010;
404         curbe.DW8_CSC_Coefficient_C8 = 0xFFD5;
405         curbe.DW9_CSC_Coefficient_C11 = 0x0080;
406         if (cscColorARGB == m_colorRawSurface || cscColorARGB10 == m_colorRawSurface)
407         {
408             curbe.DW4_CSC_Coefficient_C1 = 0xFFFB;
409             curbe.DW5_CSC_Coefficient_C2 = 0x0038;
410             curbe.DW6_CSC_Coefficient_C5 = 0x0008;
411             curbe.DW7_CSC_Coefficient_C6 = 0x0017;
412             curbe.DW8_CSC_Coefficient_C9 = 0x0038;
413             curbe.DW9_CSC_Coefficient_C10 = 0xFFF3;
414         }
415         else // cscColorABGR == m_colorRawSurface || cscColorABGR10 == m_colorRawSurface
416         {
417             curbe.DW4_CSC_Coefficient_C1 = 0x0038;
418             curbe.DW5_CSC_Coefficient_C2 = 0xFFFB;
419             curbe.DW6_CSC_Coefficient_C5 = 0x0017;
420             curbe.DW7_CSC_Coefficient_C6 = 0x0008;
421             curbe.DW8_CSC_Coefficient_C9 = 0xFFF3;
422             curbe.DW9_CSC_Coefficient_C10 = 0x0038;
423         }
424     }
425     else if (m_curbeParams.inputColorSpace == ECOLORSPACE_P601)
426     {
427         curbe.DW4_CSC_Coefficient_C0 = 0xFFD1;
428         curbe.DW5_CSC_Coefficient_C3 = 0x0080;
429         curbe.DW6_CSC_Coefficient_C4 = 0x0041;
430         curbe.DW7_CSC_Coefficient_C7 = 0x0010;
431         curbe.DW8_CSC_Coefficient_C8 = 0xFFDB;
432         curbe.DW9_CSC_Coefficient_C11 = 0x0080;
433         if (cscColorARGB == m_colorRawSurface || cscColorARGB10 == m_colorRawSurface)
434         {
435             curbe.DW4_CSC_Coefficient_C1 = 0xFFF7;
436             curbe.DW5_CSC_Coefficient_C2 = 0x0038;
437             curbe.DW6_CSC_Coefficient_C5 = 0x000D;
438             curbe.DW7_CSC_Coefficient_C6 = 0x0021;
439             curbe.DW8_CSC_Coefficient_C9 = 0x0038;
440             curbe.DW9_CSC_Coefficient_C10 = 0xFFED;
441         }
442         else // cscColorABGR == m_colorRawSurface || cscColorABGR10 == m_colorRawSurface
443         {
444             curbe.DW4_CSC_Coefficient_C1 = 0x0038;
445             curbe.DW5_CSC_Coefficient_C2 = 0xFFF7;
446             curbe.DW6_CSC_Coefficient_C5 = 0x0021;
447             curbe.DW7_CSC_Coefficient_C6 = 0x000D;
448             curbe.DW8_CSC_Coefficient_C9 = 0xFFED;
449             curbe.DW9_CSC_Coefficient_C10 = 0x0038;
450         }
451     }
452     else
453     {
454         CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ARGB input color space = %d!", m_curbeParams.inputColorSpace);
455         return MOS_STATUS_INVALID_PARAMETER;
456     }
457 
458     curbe.DW10_BTI_InputSurface = cscSrcYPlane;
459     curbe.DW11_BTI_Enc8BitSurface = cscDstConvYPlane;
460     curbe.DW12_BTI_4xDsSurface = cscDst4xDs;
461     curbe.DW13_BTI_MbStatsSurface = cscDstMbStats;
462     curbe.DW14_BTI_2xDsSurface = cscDst2xDs;
463     curbe.DW15_BTI_HistoryBuffer = cscDstHistBuffer;
464     curbe.DW16_BTI_HistorySumBuffer = cscDstHistSum;
465     curbe.DW17_BTI_MultiTaskBuffer = cscDstMultiTask;
466 
467     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscKernelState->m_dshRegion.AddData(
468         &curbe,
469         m_cscKernelState->dwCurbeOffset,
470         sizeof(curbe)));
471 
472     return eStatus;
473 }
474 
SendSurfaceCsc(PMOS_COMMAND_BUFFER cmdBuffer)475 MOS_STATUS CodechalEncodeCscDsG12::SendSurfaceCsc(PMOS_COMMAND_BUFFER cmdBuffer)
476 {
477     CODECHAL_ENCODE_FUNCTION_ENTER;
478 
479     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
480 
481     // PAK input surface (could be 10-bit)
482     CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
483     MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
484     surfaceParams.bIs2DSurface = true;
485     surfaceParams.bUseUVPlane = (cscColorNv12TileY == m_colorRawSurface ||
486         cscColorP010 == m_colorRawSurface ||
487         cscColorP210 == m_colorRawSurface ||
488         cscColorNv12Linear == m_colorRawSurface);
489     surfaceParams.bMediaBlockRW = true;
490 
491     // Configure to R16/32 for input surface
492     if (m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt)
493     {
494         // 32x scaling requires R16_UNROM
495         surfaceParams.bUse16UnormSurfaceFormat = true;
496     }
497     else if (m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt)
498     {
499         surfaceParams.bUse32UnormSurfaceFormat = true;
500     }
501     else
502     {
503         /*
504         * Unify surface format to avoid mismatches introduced by DS kernel between MMC on and off cases.
505         * bUseCommonKernel        | FormatIsNV12 | MmcdOn | SurfaceFormatToUse
506         *            1            |       1      |  0/1   |        R8
507         *            1            |       0      |  0/1   |        R16
508         *            0            |       1      |  0/1   |        R8
509         *            0            |       0      |   1    |        R8
510         *            0            |       0      |   0    |        R32
511         */
512         surfaceParams.bUse16UnormSurfaceFormat = !(cscColorNv12TileY == m_colorRawSurface ||
513                                                    cscColorNv12Linear == m_colorRawSurface);
514     }
515 
516     if (m_encoder->m_vdencEnabled && (CODECHAL_HEVC == m_standard || CODECHAL_AVC == m_standard))
517     {
518         surfaceParams.bCheckCSC8Format= true;
519     }
520 
521     surfaceParams.psSurface = m_surfaceParamsCsc.psInputSurface;
522     if (cscColorNv12Linear == m_colorRawSurface)
523     {
524         surfaceParams.dwHeightInUse = (surfaceParams.psSurface->dwHeight * 3) / 2;
525     }
526     surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
527         MOS_CODEC_RESOURCE_USAGE_ORIGINAL_UNCOMPRESSED_PICTURE_ENCODE,
528         (codechalL3 | codechalLLC));
529 
530 #ifdef _MMC_SUPPORTED
531     CODECHAL_ENCODE_CHK_NULL_RETURN(m_encoder->m_mmcState);
532     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->m_mmcState->SetSurfaceParams(&surfaceParams));
533 
534     // disable compression for render RC TA resources
535     if (surfaceParams.psSurface->MmcState == MOS_MEMCOMP_RC &&
536         surfaceParams.psSurface->OsResource.pGmmResInfo->GetArraySize() > 1)
537     {
538         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnDecompResource(m_osInterface, &surfaceParams.psSurface->OsResource));
539         surfaceParams.psSurface->MmcState = MOS_MEMCOMP_DISABLED;
540     }
541 #endif
542 
543     surfaceParams.dwBindingTableOffset = cscSrcYPlane;
544     surfaceParams.dwUVBindingTableOffset = cscSrcUVPlane;
545     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
546         m_hwInterface,
547         cmdBuffer,
548         &surfaceParams,
549         m_cscKernelState));
550 
551     // Converted NV12 output surface, or ENC 8-bit output surface
552     if (m_surfaceParamsCsc.psOutputCopiedSurface)
553     {
554         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
555         surfaceParams.bIs2DSurface =
556             surfaceParams.bUseUVPlane =
557             surfaceParams.bMediaBlockRW =
558             surfaceParams.bIsWritable = true;
559         surfaceParams.psSurface = m_surfaceParamsCsc.psOutputCopiedSurface;
560         surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
561             MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
562             codechalLLC);
563 
564         surfaceParams.dwBindingTableOffset = cscDstConvYPlane;
565         surfaceParams.dwUVBindingTableOffset = cscDstConvUVlane;
566         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
567             m_hwInterface,
568             cmdBuffer,
569             &surfaceParams,
570             m_cscKernelState));
571     }
572 
573     // 4x downscaled surface
574     if (m_surfaceParamsCsc.psOutput4xDsSurface)
575     {
576         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
577         surfaceParams.bIs2DSurface =
578             surfaceParams.bMediaBlockRW =
579             surfaceParams.bIsWritable = true;
580         surfaceParams.psSurface = m_surfaceParamsCsc.psOutput4xDsSurface;
581         surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
582             MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
583             codechalLLC);
584         surfaceParams.dwBindingTableOffset = cscDst4xDs;
585         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
586             m_hwInterface,
587             cmdBuffer,
588             &surfaceParams,
589             m_cscKernelState));
590     }
591 
592     // MB Stats surface
593     if (m_surfaceParamsCsc.presMBVProcStatsBuffer)
594     {
595         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
596         surfaceParams.dwSize = m_hwInterface->m_avcMbStatBufferSize;
597         surfaceParams.bIsWritable = true;
598         surfaceParams.presBuffer = m_surfaceParamsCsc.presMBVProcStatsBuffer;
599         surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
600             MOS_CODEC_RESOURCE_USAGE_MB_STATS_ENCODE,
601             codechalLLC);
602         surfaceParams.dwBindingTableOffset = cscDstMbStats;
603         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
604             m_hwInterface,
605             cmdBuffer,
606             &surfaceParams,
607             m_cscKernelState));
608     }
609 
610     // 2x downscaled surface
611     if (m_surfaceParamsCsc.psOutput2xDsSurface)
612     {
613         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
614         surfaceParams.bIs2DSurface =
615             surfaceParams.bMediaBlockRW =
616             surfaceParams.bIsWritable = true;
617         surfaceParams.psSurface = m_surfaceParamsCsc.psOutput2xDsSurface;
618         surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
619             MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
620             codechalLLC);
621         surfaceParams.dwBindingTableOffset = cscDst2xDs;
622         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
623             m_hwInterface,
624             cmdBuffer,
625             &surfaceParams,
626             m_cscKernelState));
627     }
628 
629     if (m_surfaceParamsCsc.hevcExtParams)
630     {
631         auto hevcExtParams = (HevcExtKernelParams*)m_surfaceParamsCsc.hevcExtParams;
632 
633         // History buffer
634         if (hevcExtParams->presHistoryBuffer)
635         {
636             MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
637             surfaceParams.dwSize = hevcExtParams->dwSizeHistoryBuffer;
638             surfaceParams.dwOffset = hevcExtParams->dwOffsetHistoryBuffer;
639             surfaceParams.bIsWritable = true;
640             surfaceParams.presBuffer = hevcExtParams->presHistoryBuffer;
641             surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
642                 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
643                 codechalLLC);
644             surfaceParams.dwBindingTableOffset = cscDstHistBuffer;
645             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
646                 m_hwInterface,
647                 cmdBuffer,
648                 &surfaceParams,
649                 m_cscKernelState));
650         }
651 
652         // History sum output buffer
653         if (hevcExtParams->presHistorySumBuffer)
654         {
655             MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
656             surfaceParams.dwSize = hevcExtParams->dwSizeHistorySumBuffer;
657             surfaceParams.dwOffset = hevcExtParams->dwOffsetHistorySumBuffer;
658             surfaceParams.bIsWritable = true;
659             surfaceParams.presBuffer = hevcExtParams->presHistorySumBuffer;
660             surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
661                 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
662                 codechalLLC);
663             surfaceParams.dwBindingTableOffset = cscDstHistSum;
664             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
665                 m_hwInterface,
666                 cmdBuffer,
667                 &surfaceParams,
668                 m_cscKernelState));
669         }
670 
671         // multi-thread task buffer
672         if (hevcExtParams->presMultiThreadTaskBuffer)
673         {
674             MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
675             surfaceParams.dwSize = hevcExtParams->dwSizeMultiThreadTaskBuffer;
676             surfaceParams.dwOffset = hevcExtParams->dwOffsetMultiThreadTaskBuffer;
677             surfaceParams.bIsWritable = true;
678             surfaceParams.presBuffer = hevcExtParams->presMultiThreadTaskBuffer;
679             surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
680                 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
681                 codechalLLC);
682             surfaceParams.dwBindingTableOffset = cscDstMultiTask;
683             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
684                 m_hwInterface,
685                 cmdBuffer,
686                 &surfaceParams,
687                 m_cscKernelState));
688         }
689     }
690 
691     return eStatus;
692 }
693 
InitKernelStateDS()694 MOS_STATUS CodechalEncodeCscDsG12::InitKernelStateDS()
695 {
696     CODECHAL_ENCODE_FUNCTION_ENTER;
697 
698     m_dsBTCount[0] = ds4xNumSurfaces;
699     m_dsCurbeLength[0] =
700     m_dsInlineDataLength = sizeof(Ds4xKernelCurbeData);
701     m_dsBTISrcY = ds4xSrcYPlane;
702     m_dsBTIDstY = ds4xDstYPlane;
703     m_dsBTISrcYTopField = ds4xSrcYPlaneTopField;
704     m_dsBTIDstYTopField = ds4xDstYPlaneTopField;
705     m_dsBTISrcYBtmField = ds4xSrcYPlaneBtmField;
706     m_dsBTIDstYBtmField = ds4xDstYPlaneBtmField;
707     m_dsBTIDstMbVProc = ds4xDstMbVProc;
708     m_dsBTIDstMbVProcTopField = ds4xDstMbVProcTopField;
709     m_dsBTIDstMbVProcBtmField = ds4xDstMbVProcBtmField;
710 
711     uint32_t kernelSize, numKernelsToLoad = m_encoder->m_interlacedFieldDisabled ? 1 : CODEC_NUM_FIELDS_PER_FRAME;
712     m_dsKernelBase = m_kernelBase;
713     CODECHAL_KERNEL_HEADER currKrnHeader;
714     for (uint32_t krnStateIdx = 0; krnStateIdx < numKernelsToLoad; krnStateIdx++)
715     {
716         kernelSize = m_combinedKernelSize;
717         m_dsKernelState = &m_encoder->m_scaling4xKernelStates[krnStateIdx];
718 
719         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG12(
720             m_dsKernelBase,
721             ENC_SCALING4X,
722             krnStateIdx,
723             &currKrnHeader,
724             &kernelSize))
725 
726         m_dsKernelState->KernelParams.iBTCount = m_dsBTCount[0];
727         m_dsKernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
728         m_dsKernelState->KernelParams.iCurbeLength = m_dsCurbeLength[0];
729         m_dsKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
730         m_dsKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
731         m_dsKernelState->KernelParams.iIdCount = 1;
732         m_dsKernelState->KernelParams.iInlineDataLength = m_dsInlineDataLength;
733 
734         m_dsKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
735         m_dsKernelState->KernelParams.pBinary = m_dsKernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
736         m_dsKernelState->KernelParams.iSize = kernelSize;
737         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
738             m_dsKernelState->KernelParams.iBTCount,
739             &m_dsKernelState->dwSshSize,
740             &m_dsKernelState->dwBindingTableSize));
741 
742         CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
743         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_dsKernelState));
744 
745         if (m_32xMeSupported)
746         {
747             m_dsKernelState = &m_encoder->m_scaling2xKernelStates[krnStateIdx];
748 
749             CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG12(
750                 m_dsKernelBase,
751                 ENC_SCALING2X,
752                 krnStateIdx,
753                 &currKrnHeader,
754                 &kernelSize))
755 
756             m_dsKernelState->KernelParams.iBTCount = m_dsBTCount[1];
757             m_dsKernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
758             m_dsKernelState->KernelParams.iCurbeLength = m_dsCurbeLength[1];
759             m_dsKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
760             m_dsKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
761 
762             m_dsKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
763             m_dsKernelState->KernelParams.pBinary = m_dsKernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
764             m_dsKernelState->KernelParams.iSize = kernelSize;
765             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
766                 m_dsKernelState->KernelParams.iBTCount,
767                 &m_dsKernelState->dwSshSize,
768                 &m_dsKernelState->dwBindingTableSize));
769 
770             CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
771             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_dsKernelState));
772         }
773 
774         if (m_encoder->m_interlacedFieldDisabled)
775         {
776             m_encoder->m_scaling4xKernelStates[1] = m_encoder->m_scaling4xKernelStates[0];
777 
778             if (m_32xMeSupported)
779             {
780                 m_encoder->m_scaling2xKernelStates[1] = m_encoder->m_scaling2xKernelStates[0];
781             }
782         }
783     }
784 
785     return MOS_STATUS_SUCCESS;
786 }
787 
SetCurbeDS4x()788 MOS_STATUS CodechalEncodeCscDsG12::SetCurbeDS4x()
789 {
790     CODECHAL_ENCODE_FUNCTION_ENTER;
791 
792     if (CODECHAL_AVC != m_standard)
793     {
794         return CodechalEncodeCscDs::SetCurbeDS4x();
795     }
796 
797     Ds4xKernelCurbeData curbe;
798 
799     curbe.DW0_InputPictureWidth = m_curbeParams.dwInputPictureWidth;
800     curbe.DW0_InputPictureHeight = m_curbeParams.dwInputPictureHeight;
801 
802     curbe.DW1_InputYBTIFrame = ds4xSrcYPlane;
803     curbe.DW2_OutputYBTIFrame = ds4xDstYPlane;
804 
805     if (m_curbeParams.bFieldPicture)
806     {
807         curbe.DW3_InputYBTIBottomField = ds4xSrcYPlaneBtmField;
808         curbe.DW4_OutputYBTIBottomField = ds4xDstYPlaneBtmField;
809     }
810 
811     if ((curbe.DW6_EnableMBFlatnessCheck = m_curbeParams.bFlatnessCheckEnabled))
812     {
813         curbe.DW5_FlatnessThreshold = 128;
814     }
815 
816     // For gen10 DS kernel, If Flatness Check enabled, need enable MBVariance as well. Otherwise will not output MbIsFlat.
817     curbe.DW6_EnableMBVarianceOutput = curbe.DW6_EnableMBFlatnessCheck || m_curbeParams.bMBVarianceOutputEnabled;
818     curbe.DW6_EnableMBPixelAverageOutput = m_curbeParams.bMBPixelAverageOutputEnabled;
819     curbe.DW6_EnableBlock8x8StatisticsOutput = m_curbeParams.bBlock8x8StatisticsEnabled;
820 
821     if (curbe.DW6_EnableMBVarianceOutput || curbe.DW6_EnableMBPixelAverageOutput)
822     {
823         curbe.DW8_MBVProcStatsBTIFrame = ds4xDstMbVProc;
824 
825         if (m_curbeParams.bFieldPicture)
826         {
827             curbe.DW9_MBVProcStatsBTIBottomField = ds4xDstMbVProcBtmField;
828         }
829     }
830 
831     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_dsKernelState->m_dshRegion.AddData(
832         &curbe,
833         m_dsKernelState->dwCurbeOffset,
834         sizeof(curbe)));
835 
836     CODECHAL_DEBUG_TOOL(
837         if (m_encoder->m_encodeParState)
838         {
839             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->m_encodeParState->PopulateDsParam(&curbe));
840         }
841     )
842 
843     return MOS_STATUS_SUCCESS;
844 }
845 
InitSfcState()846 MOS_STATUS CodechalEncodeCscDsG12::InitSfcState()
847 {
848     CODECHAL_ENCODE_FUNCTION_ENTER;
849 
850     if (!m_sfcState)
851     {
852         m_sfcState = (CodecHalEncodeSfc*)MOS_New(CodecHalEncodeSfcG12);
853         CODECHAL_ENCODE_CHK_NULL_RETURN(m_sfcState);
854 
855         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_sfcState->Initialize(m_hwInterface, m_osInterface));
856 
857         m_sfcState->SetInputColorSpace(MHW_CSpace_sRGB);
858     }
859     return MOS_STATUS_SUCCESS;
860 }
861 
SurfaceNeedsExtraCopy()862 MOS_STATUS CodechalEncodeCscDsG12::SurfaceNeedsExtraCopy()
863 {
864     m_needsExtraCopy = true;
865     return MOS_STATUS_SUCCESS;
866 }
867 
CheckRawSurfaceAlignment(MOS_SURFACE surface)868 MOS_STATUS CodechalEncodeCscDsG12::CheckRawSurfaceAlignment(MOS_SURFACE surface)
869 {
870     if (m_cscEnableCopy && m_needsExtraCopy)
871     {
872         if (surface.Format == Format_A8R8G8B8) // not touch NV12 logic.
873         {
874             m_colorRawSurface = cscColorARGB;
875             m_cscRequireCopy = 1;
876         }
877     }
878 
879     if (m_cscEnableCopy && (surface.dwWidth % m_rawSurfAlignment || surface.dwHeight % m_rawSurfAlignment) &&
880         m_colorRawSurface != cscColorNv12TileY)
881     {
882         m_cscRequireCopy = 1;
883     }
884 
885     return MOS_STATUS_SUCCESS;
886 }
887 
CodechalEncodeCscDsG12(CodechalEncoderState * encoder)888 CodechalEncodeCscDsG12::CodechalEncodeCscDsG12(CodechalEncoderState* encoder)
889     : CodechalEncodeCscDs(encoder)
890 {
891     m_cscKernelUID = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
892     m_cscCurbeLength = sizeof(CscKernelCurbeData);
893 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
894     m_kernelBase = (uint8_t*)IGCODECKRN_G12;
895 #endif
896 }
897 
~CodechalEncodeCscDsG12()898 CodechalEncodeCscDsG12::~CodechalEncodeCscDsG12()
899 {
900     // free the MbStats surface
901     m_osInterface->pfnFreeResource(m_osInterface, &m_resMbStatsBuffer);
902 }
903