1 /*
2 * Copyright (c) 2017-2020, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     codechal_encode_csc_ds_g11.cpp
24 //! \brief    This file implements the Csc+Ds feature for all codecs on Gen11 platform
25 //!
26 
27 #include "codechal_encoder_base.h"
28 #include "codechal_encode_sfc_g11.h"
29 #include "codechal_encode_csc_ds_g11.h"
30 #include "codechal_kernel_header_g11.h"
31 #include "codeckrnheader.h"
32 #if defined(ENABLE_KERNELS)
33 #include "igcodeckrn_g11.h"
34 #endif
35 #if USE_CODECHAL_DEBUG_TOOL
36 #include "codechal_debug_encode_par_g11.h"
37 #endif
38 
GetBTCount() const39 uint8_t CodechalEncodeCscDsG11::GetBTCount() const
40 {
41     return (uint8_t)cscNumSurfaces;
42 }
43 
AllocateSurfaceCsc()44 MOS_STATUS CodechalEncodeCscDsG11::AllocateSurfaceCsc()
45 {
46     CODECHAL_ENCODE_FUNCTION_ENTER;
47 
48     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
49 
50     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeCscDs::AllocateSurfaceCsc());
51 
52     // allocate the MbStats surface
53     if (Mos_ResourceIsNull(&m_resMbStatsBuffer))
54     {
55         MOS_ALLOC_GFXRES_PARAMS    allocParamsForBufferLinear;
56         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
57         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
58         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
59         allocParamsForBufferLinear.Format = Format_Buffer;
60         uint32_t alignedWidth = MOS_ALIGN_CEIL(CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_encoder->m_oriFrameWidth), 64);
61         uint32_t alignedHeight = MOS_ALIGN_CEIL(CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_encoder->m_oriFrameHeight), 64);
62         allocParamsForBufferLinear.dwBytes = m_hwInterface->m_avcMbStatBufferSize =
63             MOS_ALIGN_CEIL((alignedWidth * alignedHeight << 6) , 1024);
64         allocParamsForBufferLinear.pBufName = "MB Statistics Buffer";
65 
66         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
67             m_osInterface,
68             &allocParamsForBufferLinear,
69             &m_resMbStatsBuffer), "Failed to allocate  MB Statistics Buffer.");
70     }
71 
72     return eStatus;
73 }
74 
CheckRawColorFormat(MOS_FORMAT format,MOS_TILE_TYPE tileType)75 MOS_STATUS CodechalEncodeCscDsG11::CheckRawColorFormat(MOS_FORMAT format, MOS_TILE_TYPE tileType)
76 {
77     CODECHAL_ENCODE_FUNCTION_ENTER;
78 
79     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
80 
81     // check input color format, and set target traverse thread space size
82     switch (format)
83     {
84     case Format_NV12:
85         m_colorRawSurface = cscColorNv12Linear;
86         m_cscRequireColor = 1;
87         break;
88     case Format_YUY2:
89     case Format_YUYV:
90         m_colorRawSurface = cscColorYUY2;
91         m_cscRequireColor = (uint8_t)HCP_CHROMA_FORMAT_YUV420 == m_outputChromaFormat;
92         m_cscRequireConvTo8bPlanar = (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat;
93         break;
94     case Format_A8R8G8B8:
95         m_colorRawSurface = cscColorARGB;
96         m_cscUsingSfc = IsSfcEnabled() ? 1 : 0;
97         m_cscRequireColor = 1;
98         //Use EU for better performance in big resolution cases
99         if (m_cscRawSurfWidth * m_cscRawSurfHeight > 1920 * 1088)
100         {
101             m_cscUsingSfc = 0;
102         }
103         break;
104     case Format_A8B8G8R8:
105         m_colorRawSurface = cscColorABGR;
106         m_cscRequireColor = 1;
107         m_cscUsingSfc     = IsSfcEnabled() ? 1 : 0;
108         // Use EU for better performance in big resolution cases or TU1
109         if (m_cscRawSurfWidth * m_cscRawSurfHeight > 1920 * 1088)
110         {
111             m_cscUsingSfc = 0;
112         }
113         break;
114     case Format_P010:
115         m_colorRawSurface = cscColorP010;
116         m_cscRequireConvTo8bPlanar = 1;
117         break;
118     case Format_Y210:
119         m_colorRawSurface = cscColorY210;
120         if (m_encoder->m_vdencEnabled)
121         {
122             CODECHAL_ENCODE_ASSERTMESSAGE("Input color format Y210 Linear or Tile X not yet supported!");
123             eStatus = MOS_STATUS_PLATFORM_NOT_SUPPORTED;
124         }
125         else
126         {
127             m_cscRequireConvTo8bPlanar = 1;
128         }
129         break;
130     case Format_AYUV:
131         if (m_encoder->m_vdencEnabled)
132         {
133             m_colorRawSurface = cscColorAYUV;
134             m_cscRequireColor = 1;
135             break;
136         }
137     case Format_P210:
138         // not supported yet so fall-thru to default
139         m_colorRawSurface = cscColorP210;
140         m_cscRequireConvTo8bPlanar = 1;
141     default:
142         CODECHAL_ENCODE_ASSERTMESSAGE("Input color format = %d not yet supported!", format);
143         eStatus = MOS_STATUS_INVALID_PARAMETER;
144         break;
145     }
146 
147     return eStatus;
148 }
149 
InitKernelStateCsc()150 MOS_STATUS CodechalEncodeCscDsG11::InitKernelStateCsc()
151 {
152     CODECHAL_ENCODE_FUNCTION_ENTER;
153 
154     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
155 
156     CODECHAL_KERNEL_HEADER currKrnHeader;
157     auto kernelSize = m_combinedKernelSize;
158     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(
159         m_kernelBase,
160         ENC_SCALING_CONVERSION,
161         0,
162         &currKrnHeader,
163         &kernelSize));
164 
165     m_cscKernelState->KernelParams.iBTCount = cscNumSurfaces;
166     m_cscKernelState->KernelParams.iThreadCount = m_hwInterface->GetRenderInterface()->GetHwCaps()->dwMaxThreads;
167     m_cscKernelState->KernelParams.iCurbeLength = m_cscCurbeLength;
168     m_cscKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
169     m_cscKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
170     m_cscKernelState->KernelParams.iIdCount = 1;
171     m_cscKernelState->KernelParams.iInlineDataLength = m_cscCurbeLength;
172     m_cscKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
173     m_cscKernelState->KernelParams.pBinary =
174         m_kernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
175     m_cscKernelState->KernelParams.iSize = kernelSize;
176 
177     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
178         m_cscKernelState->KernelParams.iBTCount,
179         &m_cscKernelState->dwSshSize,
180         &m_cscKernelState->dwBindingTableSize));
181 
182     CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
183     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_cscKernelState));
184 
185     return eStatus;
186 }
187 
SetKernelParamsCsc(KernelParams * params)188 MOS_STATUS CodechalEncodeCscDsG11::SetKernelParamsCsc(KernelParams* params)
189 {
190     CODECHAL_ENCODE_FUNCTION_ENTER;
191 
192     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
193 
194     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
195 
196     m_lastTaskInPhase = params->bLastTaskInPhaseCSC;
197 
198     auto inputFrameWidth = m_encoder->m_frameWidth;
199     auto inputFrameHeight = m_encoder->m_frameHeight;
200     auto inputSurface = m_rawSurfaceToEnc;
201     auto output4xDsSurface = m_encoder->m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
202     auto output2xDsSurface = m_encoder->m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER);
203     auto mbStatsSurface = &m_resMbStatsBuffer;
204 
205     m_curbeParams.bHevcEncHistorySum = false;
206     m_surfaceParamsCsc.hevcExtParams = nullptr;
207 
208     if (dsDisabled == params->stageDsConversion)
209     {
210         m_curbeParams.bConvertFlag = m_cscFlag != 0;
211 
212         if (m_2xScalingEnabled && m_scalingEnabled)
213         {
214             m_curbeParams.downscaleStage = dsStage2x4x;
215             m_currRefList->b4xScalingUsed =
216             m_currRefList->b2xScalingUsed = true;
217             m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
218             m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
219         }
220         else if (m_2xScalingEnabled)
221         {
222             m_curbeParams.downscaleStage = dsStage2x;
223             m_currRefList->b2xScalingUsed = true;
224             output4xDsSurface = nullptr;
225             mbStatsSurface = nullptr;
226             m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = true;
227             m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
228         }
229         else if (m_scalingEnabled)
230         {
231             m_curbeParams.downscaleStage = dsStage4x;
232             m_currRefList->b4xScalingUsed = true;
233             output2xDsSurface = nullptr;
234             m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
235             m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = true;
236         }
237         else
238         {
239             // do CSC only
240             m_curbeParams.downscaleStage = dsDisabled;
241             output4xDsSurface = nullptr;
242             output2xDsSurface = nullptr;
243             mbStatsSurface = nullptr;
244             m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
245             m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
246         }
247 
248         // history sum to be enabled only for the 4x stage
249         if (params->hevcExtParams)
250         {
251             auto hevcExtParam = (HevcExtKernelParams*)params->hevcExtParams;
252             m_curbeParams.bUseLCU32 = hevcExtParam->bUseLCU32;
253             m_curbeParams.bHevcEncHistorySum = hevcExtParam->bHevcEncHistorySum;
254             m_surfaceParamsCsc.hevcExtParams = params->hevcExtParams;
255         }
256     }
257     else
258     {
259         // do 16x/32x downscaling
260         inputFrameWidth = m_encoder->m_downscaledWidth4x;
261         inputFrameHeight = m_encoder->m_downscaledHeight4x;
262         m_curbeParams.bConvertFlag = false;
263         mbStatsSurface = nullptr;
264 
265         if (dsStage16x == params->stageDsConversion)
266         {
267             m_currRefList->b16xScalingUsed = true;
268             m_lastTaskInPhase = params->bLastTaskInPhase16xDS;
269             m_curbeParams.downscaleStage = dsStage16x;
270             inputFrameWidth = m_encoder->m_downscaledWidth4x << 2;
271             inputFrameHeight = m_encoder->m_downscaledHeight4x << 2;
272 
273             inputSurface = m_encoder->m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
274             output4xDsSurface = m_encoder->m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
275             output2xDsSurface = nullptr;
276             m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
277             m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = true;
278         }
279         else if (dsStage32x == params->stageDsConversion)
280         {
281             m_currRefList->b32xScalingUsed = true;
282             m_lastTaskInPhase = params->bLastTaskInPhase32xDS;
283             m_curbeParams.downscaleStage = dsStage2x;
284             inputFrameWidth = m_encoder->m_downscaledWidth16x;
285             inputFrameHeight = m_encoder->m_downscaledHeight16x;
286             inputSurface = m_encoder->m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
287             output4xDsSurface = nullptr;
288             output2xDsSurface = m_encoder->m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER);
289             m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = true;
290             m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
291         }
292     }
293 
294     // setup Curbe
295     m_curbeParams.dwInputPictureWidth = inputFrameWidth;
296     m_curbeParams.dwInputPictureHeight = inputFrameHeight;
297     m_curbeParams.bFlatnessCheckEnabled = m_flatnessCheckEnabled;
298     m_curbeParams.bMBVarianceOutputEnabled = m_mbStatsEnabled;
299     m_curbeParams.bMBPixelAverageOutputEnabled = m_mbStatsEnabled;
300     m_curbeParams.bCscOrCopyOnly = !m_scalingEnabled || params->cscOrCopyOnly;
301     m_curbeParams.inputColorSpace = params->inputColorSpace;
302 
303     // setup surface states
304     m_surfaceParamsCsc.psInputSurface = inputSurface;
305     m_surfaceParamsCsc.psOutputCopiedSurface = m_curbeParams.bConvertFlag ? m_encoder->m_trackedBuf->GetCscSurface(CODEC_CURR_TRACKED_BUFFER) : nullptr;
306     m_surfaceParamsCsc.psOutput4xDsSurface = output4xDsSurface;
307     m_surfaceParamsCsc.psOutput2xDsSurface = output2xDsSurface;
308     m_surfaceParamsCsc.presMBVProcStatsBuffer = mbStatsSurface;
309     m_surfaceParamsCsc.hevcExtParams = params->hevcExtParams;
310 
311     if (dsStage16x == params->stageDsConversion)
312     {
313         // here to calculate the walkder resolution, we need to use the input surface resolution.
314         // it is inputFrameWidth/height / 4 in 16xStage, becasue kernel internally will do this.
315         inputFrameWidth = inputFrameWidth >> 2;
316         inputFrameHeight = inputFrameHeight >> 2;
317     }
318 
319     // setup walker param
320     m_walkerResolutionX = CODECHAL_GET_4xDS_SIZE_32ALIGNED(inputFrameWidth) >> 3;
321     m_walkerResolutionY = CODECHAL_GET_4xDS_SIZE_32ALIGNED(inputFrameHeight) >> 3;
322 
323     return eStatus;
324 }
325 
SetCurbeCsc()326 MOS_STATUS CodechalEncodeCscDsG11::SetCurbeCsc()
327 {
328     CODECHAL_ENCODE_FUNCTION_ENTER;
329 
330     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
331 
332     CscKernelCurbeData curbe;
333 
334     curbe.DW0_OutputBitDepthForChroma = m_curbeParams.ucEncBitDepthChroma;
335     curbe.DW0_OutputBitDepthForLuma = m_curbeParams.ucEncBitDepthLuma;
336     curbe.DW0_RoundingEnable = 1;
337 
338     curbe.DW1_PictureFormat = (uint8_t)((m_colorRawSurface == cscColorABGR) ? cscColorARGB : m_colorRawSurface); // Use cscColorARGB for ABGR CSC, just switch B and R coefficients
339     curbe.DW1_ConvertFlag = m_curbeParams.bConvertFlag;
340     curbe.DW1_DownscaleStage = (uint8_t)m_curbeParams.downscaleStage;
341     curbe.DW1_MbStatisticsDumpFlag = (m_curbeParams.downscaleStage == dsStage4x || m_curbeParams.downscaleStage == dsStage2x4x);
342     curbe.DW1_YUY2ConversionFlag = (m_colorRawSurface == cscColorYUY2) && m_cscRequireColor;
343     curbe.DW1_HevcEncHistorySum = m_curbeParams.bHevcEncHistorySum;
344     curbe.DW1_LCUSize = m_curbeParams.bUseLCU32;
345 
346     curbe.DW2_OriginalPicWidthInSamples = m_curbeParams.dwInputPictureWidth;
347     curbe.DW2_OriginalPicHeightInSamples = m_curbeParams.dwInputPictureHeight;
348 
349     // RGB->YUV CSC coefficients
350     if (m_curbeParams.inputColorSpace == ECOLORSPACE_P709)
351     {
352         curbe.DW4_CSC_Coefficient_C0 = 0xFFCD;
353         curbe.DW5_CSC_Coefficient_C3 = 0x0080;
354         curbe.DW6_CSC_Coefficient_C4 = 0x004F;
355         curbe.DW7_CSC_Coefficient_C7 = 0x0010;
356         curbe.DW8_CSC_Coefficient_C8 = 0xFFD5;
357         curbe.DW9_CSC_Coefficient_C11 = 0x0080;
358         if (cscColorARGB == m_colorRawSurface)
359         {
360             curbe.DW4_CSC_Coefficient_C1 = 0xFFFB;
361             curbe.DW5_CSC_Coefficient_C2 = 0x0038;
362             curbe.DW6_CSC_Coefficient_C5 = 0x0008;
363             curbe.DW7_CSC_Coefficient_C6 = 0x0017;
364             curbe.DW8_CSC_Coefficient_C9 = 0x0038;
365             curbe.DW9_CSC_Coefficient_C10 = 0xFFF3;
366         }
367         else // cscColorABGR == m_colorRawSurface
368         {
369             curbe.DW4_CSC_Coefficient_C1 = 0x0038;
370             curbe.DW5_CSC_Coefficient_C2 = 0xFFFB;
371             curbe.DW6_CSC_Coefficient_C5 = 0x0017;
372             curbe.DW7_CSC_Coefficient_C6 = 0x0008;
373             curbe.DW8_CSC_Coefficient_C9 = 0xFFF3;
374             curbe.DW9_CSC_Coefficient_C10 = 0x0038;
375         }
376     }
377     else if (m_curbeParams.inputColorSpace == ECOLORSPACE_P601)
378     {
379         curbe.DW4_CSC_Coefficient_C0 = 0xFFD1;
380         curbe.DW5_CSC_Coefficient_C3 = 0x0080;
381         curbe.DW6_CSC_Coefficient_C4 = 0x0041;
382         curbe.DW7_CSC_Coefficient_C7 = 0x0010;
383         curbe.DW8_CSC_Coefficient_C8 = 0xFFDB;
384         curbe.DW9_CSC_Coefficient_C11 = 0x0080;
385         if (cscColorARGB == m_colorRawSurface)
386         {
387             curbe.DW4_CSC_Coefficient_C1 = 0xFFF7;
388             curbe.DW5_CSC_Coefficient_C2 = 0x0038;
389             curbe.DW6_CSC_Coefficient_C5 = 0x000D;
390             curbe.DW7_CSC_Coefficient_C6 = 0x0021;
391             curbe.DW8_CSC_Coefficient_C9 = 0x0038;
392             curbe.DW9_CSC_Coefficient_C10 = 0xFFED;
393         }
394         else // cscColorABGR == m_colorRawSurface
395         {
396             curbe.DW4_CSC_Coefficient_C1 = 0x0038;
397             curbe.DW5_CSC_Coefficient_C2 = 0xFFF7;
398             curbe.DW6_CSC_Coefficient_C5 = 0x0021;
399             curbe.DW7_CSC_Coefficient_C6 = 0x000D;
400             curbe.DW8_CSC_Coefficient_C9 = 0xFFED;
401             curbe.DW9_CSC_Coefficient_C10 = 0x0038;
402         }
403     }
404     else
405     {
406         CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ARGB input color space = %d!", m_curbeParams.inputColorSpace);
407         return MOS_STATUS_INVALID_PARAMETER;
408     }
409 
410     curbe.DW10_BTI_InputSurface = cscSrcYPlane;
411     curbe.DW11_BTI_Enc8BitSurface = cscDstConvYPlane;
412     curbe.DW12_BTI_4xDsSurface = cscDst4xDs;
413     curbe.DW13_BTI_MbStatsSurface = cscDstMbStats;
414     curbe.DW14_BTI_2xDsSurface = cscDst2xDs;
415     curbe.DW15_BTI_HistoryBuffer = cscDstHistBuffer;
416     curbe.DW16_BTI_HistorySumBuffer = cscDstHistSum;
417     curbe.DW17_BTI_MultiTaskBuffer = cscDstMultiTask;
418 
419     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscKernelState->m_dshRegion.AddData(
420         &curbe,
421         m_cscKernelState->dwCurbeOffset,
422         sizeof(curbe)));
423 
424     return eStatus;
425 }
426 
SendSurfaceCsc(PMOS_COMMAND_BUFFER cmdBuffer)427 MOS_STATUS CodechalEncodeCscDsG11::SendSurfaceCsc(PMOS_COMMAND_BUFFER cmdBuffer)
428 {
429     CODECHAL_ENCODE_FUNCTION_ENTER;
430 
431     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
432 
433     // PAK input surface (could be 10-bit)
434     CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
435     MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
436     surfaceParams.bIs2DSurface = true;
437     surfaceParams.bUseUVPlane = (cscColorNv12TileY == m_colorRawSurface ||
438         cscColorP010 == m_colorRawSurface ||
439         cscColorP210 == m_colorRawSurface ||
440         cscColorNv12Linear == m_colorRawSurface);
441     surfaceParams.bMediaBlockRW = true;
442 
443     // Configure to R16/32 for input surface
444     if (m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt)
445     {
446         // 32x scaling requires R16_UNROM
447         surfaceParams.bUse16UnormSurfaceFormat = true;
448     }
449     else if (m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt)
450     {
451         surfaceParams.bUse32UnormSurfaceFormat = true;
452     }
453     else
454     {
455         /*
456         * Unify surface format to avoid mismatches introduced by DS kernel between MMC on and off cases.
457         * bUseCommonKernel        | FormatIsNV12 | MmcdOn | SurfaceFormatToUse
458         *            1            |       1      |  0/1   |        R8
459         *            1            |       0      |  0/1   |        R16
460         *            0            |       1      |  0/1   |        R8
461         *            0            |       0      |   1    |        R8
462         *            0            |       0      |   0    |        R32
463         */
464         surfaceParams.bUse16UnormSurfaceFormat = !(cscColorNv12TileY == m_colorRawSurface ||
465                                                    cscColorNv12Linear == m_colorRawSurface);
466     }
467 
468     if (m_encoder->m_vdencEnabled && (CODECHAL_HEVC == m_standard || CODECHAL_AVC == m_standard))
469     {
470         surfaceParams.bCheckCSC8Format= true;
471     }
472 
473     surfaceParams.psSurface = m_surfaceParamsCsc.psInputSurface;
474     if (cscColorNv12Linear == m_colorRawSurface)
475     {
476         surfaceParams.dwHeightInUse = (surfaceParams.psSurface->dwHeight * 3) / 2;
477     }
478     surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
479         MOS_CODEC_RESOURCE_USAGE_ORIGINAL_UNCOMPRESSED_PICTURE_ENCODE,
480         (codechalL3 | codechalLLC));
481 
482     surfaceParams.dwBindingTableOffset = cscSrcYPlane;
483     surfaceParams.dwUVBindingTableOffset = cscSrcUVPlane;
484     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
485         m_hwInterface,
486         cmdBuffer,
487         &surfaceParams,
488         m_cscKernelState));
489 
490     // Converted NV12 output surface, or ENC 8-bit output surface
491     if (m_surfaceParamsCsc.psOutputCopiedSurface)
492     {
493         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
494         surfaceParams.bIs2DSurface =
495         surfaceParams.bUseUVPlane =
496         surfaceParams.bMediaBlockRW =
497         surfaceParams.bIsWritable = true;
498         surfaceParams.psSurface = m_surfaceParamsCsc.psOutputCopiedSurface;
499         surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
500             MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
501             codechalLLC);
502 
503         surfaceParams.dwBindingTableOffset = cscDstConvYPlane;
504         surfaceParams.dwUVBindingTableOffset = cscDstConvUVlane;
505         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
506             m_hwInterface,
507             cmdBuffer,
508             &surfaceParams,
509             m_cscKernelState));
510     }
511 
512     // 4x downscaled surface
513     if (m_surfaceParamsCsc.psOutput4xDsSurface)
514     {
515         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
516         surfaceParams.bIs2DSurface =
517         surfaceParams.bMediaBlockRW =
518         surfaceParams.bIsWritable = true;
519         surfaceParams.psSurface = m_surfaceParamsCsc.psOutput4xDsSurface;
520         surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
521             MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
522             codechalLLC);
523         surfaceParams.dwBindingTableOffset = cscDst4xDs;
524         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
525             m_hwInterface,
526             cmdBuffer,
527             &surfaceParams,
528             m_cscKernelState));
529     }
530 
531     // MB Stats surface
532     if (m_surfaceParamsCsc.presMBVProcStatsBuffer)
533     {
534         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
535         surfaceParams.dwSize = MOS_BYTES_TO_DWORDS(m_hwInterface->m_avcMbStatBufferSize);
536         surfaceParams.bIsWritable = true;
537         surfaceParams.presBuffer = m_surfaceParamsCsc.presMBVProcStatsBuffer;
538         surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
539             MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
540             codechalLLC);
541         surfaceParams.dwBindingTableOffset = cscDstMbStats;
542         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
543             m_hwInterface,
544             cmdBuffer,
545             &surfaceParams,
546             m_cscKernelState));
547     }
548 
549     // 2x downscaled surface
550     if (m_surfaceParamsCsc.psOutput2xDsSurface)
551     {
552         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
553         surfaceParams.bIs2DSurface =
554         surfaceParams.bMediaBlockRW =
555         surfaceParams.bIsWritable = true;
556         surfaceParams.psSurface = m_surfaceParamsCsc.psOutput2xDsSurface;
557         surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
558             MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
559             codechalLLC);
560         surfaceParams.dwBindingTableOffset = cscDst2xDs;
561         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
562             m_hwInterface,
563             cmdBuffer,
564             &surfaceParams,
565             m_cscKernelState));
566     }
567 
568     if (m_surfaceParamsCsc.hevcExtParams)
569     {
570         auto hevcExtParams = (HevcExtKernelParams*)m_surfaceParamsCsc.hevcExtParams;
571 
572         // History buffer
573         if (hevcExtParams->presHistoryBuffer)
574         {
575             MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
576             surfaceParams.dwSize = MOS_BYTES_TO_DWORDS(hevcExtParams->dwSizeHistoryBuffer);
577             surfaceParams.dwOffset = hevcExtParams->dwOffsetHistoryBuffer;
578             surfaceParams.bIsWritable = true;
579             surfaceParams.presBuffer = hevcExtParams->presHistoryBuffer;
580             surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
581                 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
582                 codechalLLC);
583             surfaceParams.dwBindingTableOffset = cscDstHistBuffer;
584             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
585                 m_hwInterface,
586                 cmdBuffer,
587                 &surfaceParams,
588                 m_cscKernelState));
589         }
590 
591         // History sum output buffer
592         if (hevcExtParams->presHistorySumBuffer)
593         {
594             MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
595             surfaceParams.dwSize = MOS_BYTES_TO_DWORDS(hevcExtParams->dwSizeHistorySumBuffer);
596             surfaceParams.dwOffset = hevcExtParams->dwOffsetHistorySumBuffer;
597             surfaceParams.bIsWritable = true;
598             surfaceParams.presBuffer = hevcExtParams->presHistorySumBuffer;
599             surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
600                 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
601                 codechalLLC);
602             surfaceParams.dwBindingTableOffset = cscDstHistSum;
603             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
604                 m_hwInterface,
605                 cmdBuffer,
606                 &surfaceParams,
607                 m_cscKernelState));
608         }
609 
610         // multi-thread task buffer
611         if (hevcExtParams->presMultiThreadTaskBuffer)
612         {
613             MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
614             surfaceParams.dwSize = MOS_BYTES_TO_DWORDS(hevcExtParams->dwSizeMultiThreadTaskBuffer);
615             surfaceParams.dwOffset = hevcExtParams->dwOffsetMultiThreadTaskBuffer;
616             surfaceParams.bIsWritable = true;
617             surfaceParams.presBuffer = hevcExtParams->presMultiThreadTaskBuffer;
618             surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
619                 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
620                 codechalLLC);
621             surfaceParams.dwBindingTableOffset = cscDstMultiTask;
622             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
623                 m_hwInterface,
624                 cmdBuffer,
625                 &surfaceParams,
626                 m_cscKernelState));
627         }
628     }
629 
630     return eStatus;
631 }
632 
InitKernelStateDS()633 MOS_STATUS CodechalEncodeCscDsG11::InitKernelStateDS()
634 {
635     CODECHAL_ENCODE_FUNCTION_ENTER;
636 
637     m_dsBTCount[0] = ds4xNumSurfaces;
638     m_dsCurbeLength[0] =
639     m_dsInlineDataLength = sizeof(Ds4xKernelCurbeData);
640     m_dsBTISrcY = ds4xSrcYPlane;
641     m_dsBTIDstY = ds4xDstYPlane;
642     m_dsBTISrcYTopField = ds4xSrcYPlaneTopField;
643     m_dsBTIDstYTopField = ds4xDstYPlaneTopField;
644     m_dsBTISrcYBtmField = ds4xSrcYPlaneBtmField;
645     m_dsBTIDstYBtmField = ds4xDstYPlaneBtmField;
646     m_dsBTIDstMbVProc = ds4xDstMbVProc;
647     m_dsBTIDstMbVProcTopField = ds4xDstMbVProcTopField;
648     m_dsBTIDstMbVProcBtmField = ds4xDstMbVProcBtmField;
649 
650     uint32_t kernelSize, numKernelsToLoad = m_encoder->m_interlacedFieldDisabled ? 1 : CODEC_NUM_FIELDS_PER_FRAME;
651     m_dsKernelBase = m_kernelBase;
652     CODECHAL_KERNEL_HEADER currKrnHeader;
653     for (uint32_t krnStateIdx = 0; krnStateIdx < numKernelsToLoad; krnStateIdx++)
654     {
655         kernelSize = m_combinedKernelSize;
656         m_dsKernelState = &m_encoder->m_scaling4xKernelStates[krnStateIdx];
657 
658         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(
659             m_dsKernelBase,
660             ENC_SCALING4X,
661             krnStateIdx,
662             &currKrnHeader,
663             &kernelSize))
664 
665         m_dsKernelState->KernelParams.iBTCount = m_dsBTCount[0];
666         m_dsKernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
667         m_dsKernelState->KernelParams.iCurbeLength = m_dsCurbeLength[0];
668         m_dsKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
669         m_dsKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
670         m_dsKernelState->KernelParams.iIdCount = 1;
671         m_dsKernelState->KernelParams.iInlineDataLength = m_dsInlineDataLength;
672 
673         m_dsKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
674         m_dsKernelState->KernelParams.pBinary = m_dsKernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
675         m_dsKernelState->KernelParams.iSize = kernelSize;
676         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
677             m_dsKernelState->KernelParams.iBTCount,
678             &m_dsKernelState->dwSshSize,
679             &m_dsKernelState->dwBindingTableSize));
680 
681         CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
682         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_dsKernelState));
683 
684         if (m_32xMeSupported)
685         {
686             m_dsKernelState = &m_encoder->m_scaling2xKernelStates[krnStateIdx];
687 
688             CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(
689                 m_dsKernelBase,
690                 ENC_SCALING2X,
691                 krnStateIdx,
692                 &currKrnHeader,
693                 &kernelSize))
694 
695             m_dsKernelState->KernelParams.iBTCount = m_dsBTCount[1];
696             m_dsKernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
697             m_dsKernelState->KernelParams.iCurbeLength = m_dsCurbeLength[1];
698             m_dsKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
699             m_dsKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
700 
701             m_dsKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
702             m_dsKernelState->KernelParams.pBinary = m_dsKernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
703             m_dsKernelState->KernelParams.iSize = kernelSize;
704             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
705                 m_dsKernelState->KernelParams.iBTCount,
706                 &m_dsKernelState->dwSshSize,
707                 &m_dsKernelState->dwBindingTableSize));
708 
709             CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
710             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_dsKernelState));
711         }
712 
713         if (m_encoder->m_interlacedFieldDisabled)
714         {
715             m_encoder->m_scaling4xKernelStates[1] = m_encoder->m_scaling4xKernelStates[0];
716 
717             if (m_32xMeSupported)
718             {
719                 m_encoder->m_scaling2xKernelStates[1] = m_encoder->m_scaling2xKernelStates[0];
720             }
721         }
722     }
723 
724     return MOS_STATUS_SUCCESS;
725 }
726 
SetCurbeDS4x()727 MOS_STATUS CodechalEncodeCscDsG11::SetCurbeDS4x()
728 {
729     CODECHAL_ENCODE_FUNCTION_ENTER;
730 
731     if (CODECHAL_AVC != m_standard)
732     {
733         return CodechalEncodeCscDs::SetCurbeDS4x();
734     }
735 
736     Ds4xKernelCurbeData curbe;
737 
738     curbe.DW0_InputPictureWidth = m_curbeParams.dwInputPictureWidth;
739     curbe.DW0_InputPictureHeight = m_curbeParams.dwInputPictureHeight;
740 
741     curbe.DW1_InputYBTIFrame = ds4xSrcYPlane;
742     curbe.DW2_OutputYBTIFrame = ds4xDstYPlane;
743 
744     if (m_curbeParams.bFieldPicture)
745     {
746         curbe.DW3_InputYBTIBottomField = ds4xSrcYPlaneBtmField;
747         curbe.DW4_OutputYBTIBottomField = ds4xDstYPlaneBtmField;
748     }
749 
750     if ((curbe.DW6_EnableMBFlatnessCheck = m_curbeParams.bFlatnessCheckEnabled))
751     {
752         curbe.DW5_FlatnessThreshold = 128;
753     }
754 
755     // For gen10 DS kernel, If Flatness Check enabled, need enable MBVariance as well. Otherwise will not output MbIsFlat.
756     curbe.DW6_EnableMBVarianceOutput = curbe.DW6_EnableMBFlatnessCheck || m_curbeParams.bMBVarianceOutputEnabled;
757     curbe.DW6_EnableMBPixelAverageOutput = m_curbeParams.bMBPixelAverageOutputEnabled;
758     curbe.DW6_EnableBlock8x8StatisticsOutput = m_curbeParams.bBlock8x8StatisticsEnabled;
759 
760     if (curbe.DW6_EnableMBVarianceOutput || curbe.DW6_EnableMBPixelAverageOutput)
761     {
762         curbe.DW8_MBVProcStatsBTIFrame = ds4xDstMbVProc;
763 
764         if (m_curbeParams.bFieldPicture)
765         {
766             curbe.DW9_MBVProcStatsBTIBottomField = ds4xDstMbVProcBtmField;
767         }
768     }
769 
770     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_dsKernelState->m_dshRegion.AddData(
771         &curbe,
772         m_dsKernelState->dwCurbeOffset,
773         sizeof(curbe)));
774 
775     CODECHAL_DEBUG_TOOL(
776         if (m_encoder->m_encodeParState)
777         {
778             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->m_encodeParState->PopulateDsParam(&curbe));
779         }
780     )
781 
782     return MOS_STATUS_SUCCESS;
783 }
784 
InitSfcState()785 MOS_STATUS CodechalEncodeCscDsG11::InitSfcState()
786 {
787     CODECHAL_ENCODE_FUNCTION_ENTER;
788 
789     if (!m_sfcState)
790     {
791         m_sfcState = (CodecHalEncodeSfc*)MOS_New(CodecHalEncodeSfcG11);
792         CODECHAL_ENCODE_CHK_NULL_RETURN(m_sfcState);
793 
794         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_sfcState->Initialize(m_hwInterface, m_osInterface));
795 
796         m_sfcState->SetInputColorSpace(MHW_CSpace_sRGB);
797     }
798     return MOS_STATUS_SUCCESS;
799 }
800 
CheckRawSurfaceAlignment(MOS_SURFACE surface)801 MOS_STATUS CodechalEncodeCscDsG11::CheckRawSurfaceAlignment(MOS_SURFACE surface)
802 {
803     if (m_cscEnableCopy && (surface.dwWidth % m_rawSurfAlignment || surface.dwHeight % m_rawSurfAlignment) &&
804         m_colorRawSurface != cscColorNv12TileY)
805     {
806         m_cscRequireCopy = 1;
807     }
808     return MOS_STATUS_SUCCESS;
809 }
810 
811 
CodechalEncodeCscDsG11(CodechalEncoderState * encoder)812 CodechalEncodeCscDsG11::CodechalEncodeCscDsG11(CodechalEncoderState* encoder)
813     : CodechalEncodeCscDs(encoder)
814 {
815     m_cscKernelUID = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
816     m_cscCurbeLength = sizeof(CscKernelCurbeData);
817 #if defined(ENABLE_KERNELS)
818     m_kernelBase = (uint8_t*)IGCODECKRN_G11;
819 #endif
820 }
821 
~CodechalEncodeCscDsG11()822 CodechalEncodeCscDsG11::~CodechalEncodeCscDsG11()
823 {
824     // free the MbStats surface
825     m_osInterface->pfnFreeResource(m_osInterface, &m_resMbStatsBuffer);
826 }
827