1 /*
2 * Copyright (c) 2017-2020, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_encode_csc_ds_g11.cpp
24 //! \brief This file implements the Csc+Ds feature for all codecs on Gen11 platform
25 //!
26
27 #include "codechal_encoder_base.h"
28 #include "codechal_encode_sfc_g11.h"
29 #include "codechal_encode_csc_ds_g11.h"
30 #include "codechal_kernel_header_g11.h"
31 #include "codeckrnheader.h"
32 #if defined(ENABLE_KERNELS)
33 #include "igcodeckrn_g11.h"
34 #endif
35 #if USE_CODECHAL_DEBUG_TOOL
36 #include "codechal_debug_encode_par_g11.h"
37 #endif
38
GetBTCount() const39 uint8_t CodechalEncodeCscDsG11::GetBTCount() const
40 {
41 return (uint8_t)cscNumSurfaces;
42 }
43
AllocateSurfaceCsc()44 MOS_STATUS CodechalEncodeCscDsG11::AllocateSurfaceCsc()
45 {
46 CODECHAL_ENCODE_FUNCTION_ENTER;
47
48 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
49
50 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeCscDs::AllocateSurfaceCsc());
51
52 // allocate the MbStats surface
53 if (Mos_ResourceIsNull(&m_resMbStatsBuffer))
54 {
55 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
56 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
57 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
58 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
59 allocParamsForBufferLinear.Format = Format_Buffer;
60 uint32_t alignedWidth = MOS_ALIGN_CEIL(CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_encoder->m_oriFrameWidth), 64);
61 uint32_t alignedHeight = MOS_ALIGN_CEIL(CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_encoder->m_oriFrameHeight), 64);
62 allocParamsForBufferLinear.dwBytes = m_hwInterface->m_avcMbStatBufferSize =
63 MOS_ALIGN_CEIL((alignedWidth * alignedHeight << 6) , 1024);
64 allocParamsForBufferLinear.pBufName = "MB Statistics Buffer";
65
66 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
67 m_osInterface,
68 &allocParamsForBufferLinear,
69 &m_resMbStatsBuffer), "Failed to allocate MB Statistics Buffer.");
70 }
71
72 return eStatus;
73 }
74
CheckRawColorFormat(MOS_FORMAT format,MOS_TILE_TYPE tileType)75 MOS_STATUS CodechalEncodeCscDsG11::CheckRawColorFormat(MOS_FORMAT format, MOS_TILE_TYPE tileType)
76 {
77 CODECHAL_ENCODE_FUNCTION_ENTER;
78
79 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
80
81 // check input color format, and set target traverse thread space size
82 switch (format)
83 {
84 case Format_NV12:
85 m_colorRawSurface = cscColorNv12Linear;
86 m_cscRequireColor = 1;
87 break;
88 case Format_YUY2:
89 case Format_YUYV:
90 m_colorRawSurface = cscColorYUY2;
91 m_cscRequireColor = (uint8_t)HCP_CHROMA_FORMAT_YUV420 == m_outputChromaFormat;
92 m_cscRequireConvTo8bPlanar = (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat;
93 break;
94 case Format_A8R8G8B8:
95 m_colorRawSurface = cscColorARGB;
96 m_cscUsingSfc = IsSfcEnabled() ? 1 : 0;
97 m_cscRequireColor = 1;
98 //Use EU for better performance in big resolution cases
99 if (m_cscRawSurfWidth * m_cscRawSurfHeight > 1920 * 1088)
100 {
101 m_cscUsingSfc = 0;
102 }
103 break;
104 case Format_A8B8G8R8:
105 m_colorRawSurface = cscColorABGR;
106 m_cscRequireColor = 1;
107 m_cscUsingSfc = IsSfcEnabled() ? 1 : 0;
108 // Use EU for better performance in big resolution cases or TU1
109 if (m_cscRawSurfWidth * m_cscRawSurfHeight > 1920 * 1088)
110 {
111 m_cscUsingSfc = 0;
112 }
113 break;
114 case Format_P010:
115 m_colorRawSurface = cscColorP010;
116 m_cscRequireConvTo8bPlanar = 1;
117 break;
118 case Format_Y210:
119 m_colorRawSurface = cscColorY210;
120 if (m_encoder->m_vdencEnabled)
121 {
122 CODECHAL_ENCODE_ASSERTMESSAGE("Input color format Y210 Linear or Tile X not yet supported!");
123 eStatus = MOS_STATUS_PLATFORM_NOT_SUPPORTED;
124 }
125 else
126 {
127 m_cscRequireConvTo8bPlanar = 1;
128 }
129 break;
130 case Format_AYUV:
131 if (m_encoder->m_vdencEnabled)
132 {
133 m_colorRawSurface = cscColorAYUV;
134 m_cscRequireColor = 1;
135 break;
136 }
137 case Format_P210:
138 // not supported yet so fall-thru to default
139 m_colorRawSurface = cscColorP210;
140 m_cscRequireConvTo8bPlanar = 1;
141 default:
142 CODECHAL_ENCODE_ASSERTMESSAGE("Input color format = %d not yet supported!", format);
143 eStatus = MOS_STATUS_INVALID_PARAMETER;
144 break;
145 }
146
147 return eStatus;
148 }
149
InitKernelStateCsc()150 MOS_STATUS CodechalEncodeCscDsG11::InitKernelStateCsc()
151 {
152 CODECHAL_ENCODE_FUNCTION_ENTER;
153
154 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
155
156 CODECHAL_KERNEL_HEADER currKrnHeader;
157 auto kernelSize = m_combinedKernelSize;
158 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(
159 m_kernelBase,
160 ENC_SCALING_CONVERSION,
161 0,
162 &currKrnHeader,
163 &kernelSize));
164
165 m_cscKernelState->KernelParams.iBTCount = cscNumSurfaces;
166 m_cscKernelState->KernelParams.iThreadCount = m_hwInterface->GetRenderInterface()->GetHwCaps()->dwMaxThreads;
167 m_cscKernelState->KernelParams.iCurbeLength = m_cscCurbeLength;
168 m_cscKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
169 m_cscKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
170 m_cscKernelState->KernelParams.iIdCount = 1;
171 m_cscKernelState->KernelParams.iInlineDataLength = m_cscCurbeLength;
172 m_cscKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
173 m_cscKernelState->KernelParams.pBinary =
174 m_kernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
175 m_cscKernelState->KernelParams.iSize = kernelSize;
176
177 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
178 m_cscKernelState->KernelParams.iBTCount,
179 &m_cscKernelState->dwSshSize,
180 &m_cscKernelState->dwBindingTableSize));
181
182 CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
183 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_cscKernelState));
184
185 return eStatus;
186 }
187
SetKernelParamsCsc(KernelParams * params)188 MOS_STATUS CodechalEncodeCscDsG11::SetKernelParamsCsc(KernelParams* params)
189 {
190 CODECHAL_ENCODE_FUNCTION_ENTER;
191
192 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
193
194 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
195
196 m_lastTaskInPhase = params->bLastTaskInPhaseCSC;
197
198 auto inputFrameWidth = m_encoder->m_frameWidth;
199 auto inputFrameHeight = m_encoder->m_frameHeight;
200 auto inputSurface = m_rawSurfaceToEnc;
201 auto output4xDsSurface = m_encoder->m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
202 auto output2xDsSurface = m_encoder->m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER);
203 auto mbStatsSurface = &m_resMbStatsBuffer;
204
205 m_curbeParams.bHevcEncHistorySum = false;
206 m_surfaceParamsCsc.hevcExtParams = nullptr;
207
208 if (dsDisabled == params->stageDsConversion)
209 {
210 m_curbeParams.bConvertFlag = m_cscFlag != 0;
211
212 if (m_2xScalingEnabled && m_scalingEnabled)
213 {
214 m_curbeParams.downscaleStage = dsStage2x4x;
215 m_currRefList->b4xScalingUsed =
216 m_currRefList->b2xScalingUsed = true;
217 m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
218 m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
219 }
220 else if (m_2xScalingEnabled)
221 {
222 m_curbeParams.downscaleStage = dsStage2x;
223 m_currRefList->b2xScalingUsed = true;
224 output4xDsSurface = nullptr;
225 mbStatsSurface = nullptr;
226 m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = true;
227 m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
228 }
229 else if (m_scalingEnabled)
230 {
231 m_curbeParams.downscaleStage = dsStage4x;
232 m_currRefList->b4xScalingUsed = true;
233 output2xDsSurface = nullptr;
234 m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
235 m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = true;
236 }
237 else
238 {
239 // do CSC only
240 m_curbeParams.downscaleStage = dsDisabled;
241 output4xDsSurface = nullptr;
242 output2xDsSurface = nullptr;
243 mbStatsSurface = nullptr;
244 m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
245 m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
246 }
247
248 // history sum to be enabled only for the 4x stage
249 if (params->hevcExtParams)
250 {
251 auto hevcExtParam = (HevcExtKernelParams*)params->hevcExtParams;
252 m_curbeParams.bUseLCU32 = hevcExtParam->bUseLCU32;
253 m_curbeParams.bHevcEncHistorySum = hevcExtParam->bHevcEncHistorySum;
254 m_surfaceParamsCsc.hevcExtParams = params->hevcExtParams;
255 }
256 }
257 else
258 {
259 // do 16x/32x downscaling
260 inputFrameWidth = m_encoder->m_downscaledWidth4x;
261 inputFrameHeight = m_encoder->m_downscaledHeight4x;
262 m_curbeParams.bConvertFlag = false;
263 mbStatsSurface = nullptr;
264
265 if (dsStage16x == params->stageDsConversion)
266 {
267 m_currRefList->b16xScalingUsed = true;
268 m_lastTaskInPhase = params->bLastTaskInPhase16xDS;
269 m_curbeParams.downscaleStage = dsStage16x;
270 inputFrameWidth = m_encoder->m_downscaledWidth4x << 2;
271 inputFrameHeight = m_encoder->m_downscaledHeight4x << 2;
272
273 inputSurface = m_encoder->m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
274 output4xDsSurface = m_encoder->m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
275 output2xDsSurface = nullptr;
276 m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
277 m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = true;
278 }
279 else if (dsStage32x == params->stageDsConversion)
280 {
281 m_currRefList->b32xScalingUsed = true;
282 m_lastTaskInPhase = params->bLastTaskInPhase32xDS;
283 m_curbeParams.downscaleStage = dsStage2x;
284 inputFrameWidth = m_encoder->m_downscaledWidth16x;
285 inputFrameHeight = m_encoder->m_downscaledHeight16x;
286 inputSurface = m_encoder->m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
287 output4xDsSurface = nullptr;
288 output2xDsSurface = m_encoder->m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER);
289 m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = true;
290 m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
291 }
292 }
293
294 // setup Curbe
295 m_curbeParams.dwInputPictureWidth = inputFrameWidth;
296 m_curbeParams.dwInputPictureHeight = inputFrameHeight;
297 m_curbeParams.bFlatnessCheckEnabled = m_flatnessCheckEnabled;
298 m_curbeParams.bMBVarianceOutputEnabled = m_mbStatsEnabled;
299 m_curbeParams.bMBPixelAverageOutputEnabled = m_mbStatsEnabled;
300 m_curbeParams.bCscOrCopyOnly = !m_scalingEnabled || params->cscOrCopyOnly;
301 m_curbeParams.inputColorSpace = params->inputColorSpace;
302
303 // setup surface states
304 m_surfaceParamsCsc.psInputSurface = inputSurface;
305 m_surfaceParamsCsc.psOutputCopiedSurface = m_curbeParams.bConvertFlag ? m_encoder->m_trackedBuf->GetCscSurface(CODEC_CURR_TRACKED_BUFFER) : nullptr;
306 m_surfaceParamsCsc.psOutput4xDsSurface = output4xDsSurface;
307 m_surfaceParamsCsc.psOutput2xDsSurface = output2xDsSurface;
308 m_surfaceParamsCsc.presMBVProcStatsBuffer = mbStatsSurface;
309 m_surfaceParamsCsc.hevcExtParams = params->hevcExtParams;
310
311 if (dsStage16x == params->stageDsConversion)
312 {
313 // here to calculate the walkder resolution, we need to use the input surface resolution.
314 // it is inputFrameWidth/height / 4 in 16xStage, becasue kernel internally will do this.
315 inputFrameWidth = inputFrameWidth >> 2;
316 inputFrameHeight = inputFrameHeight >> 2;
317 }
318
319 // setup walker param
320 m_walkerResolutionX = CODECHAL_GET_4xDS_SIZE_32ALIGNED(inputFrameWidth) >> 3;
321 m_walkerResolutionY = CODECHAL_GET_4xDS_SIZE_32ALIGNED(inputFrameHeight) >> 3;
322
323 return eStatus;
324 }
325
SetCurbeCsc()326 MOS_STATUS CodechalEncodeCscDsG11::SetCurbeCsc()
327 {
328 CODECHAL_ENCODE_FUNCTION_ENTER;
329
330 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
331
332 CscKernelCurbeData curbe;
333
334 curbe.DW0_OutputBitDepthForChroma = m_curbeParams.ucEncBitDepthChroma;
335 curbe.DW0_OutputBitDepthForLuma = m_curbeParams.ucEncBitDepthLuma;
336 curbe.DW0_RoundingEnable = 1;
337
338 curbe.DW1_PictureFormat = (uint8_t)((m_colorRawSurface == cscColorABGR) ? cscColorARGB : m_colorRawSurface); // Use cscColorARGB for ABGR CSC, just switch B and R coefficients
339 curbe.DW1_ConvertFlag = m_curbeParams.bConvertFlag;
340 curbe.DW1_DownscaleStage = (uint8_t)m_curbeParams.downscaleStage;
341 curbe.DW1_MbStatisticsDumpFlag = (m_curbeParams.downscaleStage == dsStage4x || m_curbeParams.downscaleStage == dsStage2x4x);
342 curbe.DW1_YUY2ConversionFlag = (m_colorRawSurface == cscColorYUY2) && m_cscRequireColor;
343 curbe.DW1_HevcEncHistorySum = m_curbeParams.bHevcEncHistorySum;
344 curbe.DW1_LCUSize = m_curbeParams.bUseLCU32;
345
346 curbe.DW2_OriginalPicWidthInSamples = m_curbeParams.dwInputPictureWidth;
347 curbe.DW2_OriginalPicHeightInSamples = m_curbeParams.dwInputPictureHeight;
348
349 // RGB->YUV CSC coefficients
350 if (m_curbeParams.inputColorSpace == ECOLORSPACE_P709)
351 {
352 curbe.DW4_CSC_Coefficient_C0 = 0xFFCD;
353 curbe.DW5_CSC_Coefficient_C3 = 0x0080;
354 curbe.DW6_CSC_Coefficient_C4 = 0x004F;
355 curbe.DW7_CSC_Coefficient_C7 = 0x0010;
356 curbe.DW8_CSC_Coefficient_C8 = 0xFFD5;
357 curbe.DW9_CSC_Coefficient_C11 = 0x0080;
358 if (cscColorARGB == m_colorRawSurface)
359 {
360 curbe.DW4_CSC_Coefficient_C1 = 0xFFFB;
361 curbe.DW5_CSC_Coefficient_C2 = 0x0038;
362 curbe.DW6_CSC_Coefficient_C5 = 0x0008;
363 curbe.DW7_CSC_Coefficient_C6 = 0x0017;
364 curbe.DW8_CSC_Coefficient_C9 = 0x0038;
365 curbe.DW9_CSC_Coefficient_C10 = 0xFFF3;
366 }
367 else // cscColorABGR == m_colorRawSurface
368 {
369 curbe.DW4_CSC_Coefficient_C1 = 0x0038;
370 curbe.DW5_CSC_Coefficient_C2 = 0xFFFB;
371 curbe.DW6_CSC_Coefficient_C5 = 0x0017;
372 curbe.DW7_CSC_Coefficient_C6 = 0x0008;
373 curbe.DW8_CSC_Coefficient_C9 = 0xFFF3;
374 curbe.DW9_CSC_Coefficient_C10 = 0x0038;
375 }
376 }
377 else if (m_curbeParams.inputColorSpace == ECOLORSPACE_P601)
378 {
379 curbe.DW4_CSC_Coefficient_C0 = 0xFFD1;
380 curbe.DW5_CSC_Coefficient_C3 = 0x0080;
381 curbe.DW6_CSC_Coefficient_C4 = 0x0041;
382 curbe.DW7_CSC_Coefficient_C7 = 0x0010;
383 curbe.DW8_CSC_Coefficient_C8 = 0xFFDB;
384 curbe.DW9_CSC_Coefficient_C11 = 0x0080;
385 if (cscColorARGB == m_colorRawSurface)
386 {
387 curbe.DW4_CSC_Coefficient_C1 = 0xFFF7;
388 curbe.DW5_CSC_Coefficient_C2 = 0x0038;
389 curbe.DW6_CSC_Coefficient_C5 = 0x000D;
390 curbe.DW7_CSC_Coefficient_C6 = 0x0021;
391 curbe.DW8_CSC_Coefficient_C9 = 0x0038;
392 curbe.DW9_CSC_Coefficient_C10 = 0xFFED;
393 }
394 else // cscColorABGR == m_colorRawSurface
395 {
396 curbe.DW4_CSC_Coefficient_C1 = 0x0038;
397 curbe.DW5_CSC_Coefficient_C2 = 0xFFF7;
398 curbe.DW6_CSC_Coefficient_C5 = 0x0021;
399 curbe.DW7_CSC_Coefficient_C6 = 0x000D;
400 curbe.DW8_CSC_Coefficient_C9 = 0xFFED;
401 curbe.DW9_CSC_Coefficient_C10 = 0x0038;
402 }
403 }
404 else
405 {
406 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ARGB input color space = %d!", m_curbeParams.inputColorSpace);
407 return MOS_STATUS_INVALID_PARAMETER;
408 }
409
410 curbe.DW10_BTI_InputSurface = cscSrcYPlane;
411 curbe.DW11_BTI_Enc8BitSurface = cscDstConvYPlane;
412 curbe.DW12_BTI_4xDsSurface = cscDst4xDs;
413 curbe.DW13_BTI_MbStatsSurface = cscDstMbStats;
414 curbe.DW14_BTI_2xDsSurface = cscDst2xDs;
415 curbe.DW15_BTI_HistoryBuffer = cscDstHistBuffer;
416 curbe.DW16_BTI_HistorySumBuffer = cscDstHistSum;
417 curbe.DW17_BTI_MultiTaskBuffer = cscDstMultiTask;
418
419 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscKernelState->m_dshRegion.AddData(
420 &curbe,
421 m_cscKernelState->dwCurbeOffset,
422 sizeof(curbe)));
423
424 return eStatus;
425 }
426
SendSurfaceCsc(PMOS_COMMAND_BUFFER cmdBuffer)427 MOS_STATUS CodechalEncodeCscDsG11::SendSurfaceCsc(PMOS_COMMAND_BUFFER cmdBuffer)
428 {
429 CODECHAL_ENCODE_FUNCTION_ENTER;
430
431 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
432
433 // PAK input surface (could be 10-bit)
434 CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
435 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
436 surfaceParams.bIs2DSurface = true;
437 surfaceParams.bUseUVPlane = (cscColorNv12TileY == m_colorRawSurface ||
438 cscColorP010 == m_colorRawSurface ||
439 cscColorP210 == m_colorRawSurface ||
440 cscColorNv12Linear == m_colorRawSurface);
441 surfaceParams.bMediaBlockRW = true;
442
443 // Configure to R16/32 for input surface
444 if (m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt)
445 {
446 // 32x scaling requires R16_UNROM
447 surfaceParams.bUse16UnormSurfaceFormat = true;
448 }
449 else if (m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt)
450 {
451 surfaceParams.bUse32UnormSurfaceFormat = true;
452 }
453 else
454 {
455 /*
456 * Unify surface format to avoid mismatches introduced by DS kernel between MMC on and off cases.
457 * bUseCommonKernel | FormatIsNV12 | MmcdOn | SurfaceFormatToUse
458 * 1 | 1 | 0/1 | R8
459 * 1 | 0 | 0/1 | R16
460 * 0 | 1 | 0/1 | R8
461 * 0 | 0 | 1 | R8
462 * 0 | 0 | 0 | R32
463 */
464 surfaceParams.bUse16UnormSurfaceFormat = !(cscColorNv12TileY == m_colorRawSurface ||
465 cscColorNv12Linear == m_colorRawSurface);
466 }
467
468 if (m_encoder->m_vdencEnabled && (CODECHAL_HEVC == m_standard || CODECHAL_AVC == m_standard))
469 {
470 surfaceParams.bCheckCSC8Format= true;
471 }
472
473 surfaceParams.psSurface = m_surfaceParamsCsc.psInputSurface;
474 if (cscColorNv12Linear == m_colorRawSurface)
475 {
476 surfaceParams.dwHeightInUse = (surfaceParams.psSurface->dwHeight * 3) / 2;
477 }
478 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
479 MOS_CODEC_RESOURCE_USAGE_ORIGINAL_UNCOMPRESSED_PICTURE_ENCODE,
480 (codechalL3 | codechalLLC));
481
482 surfaceParams.dwBindingTableOffset = cscSrcYPlane;
483 surfaceParams.dwUVBindingTableOffset = cscSrcUVPlane;
484 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
485 m_hwInterface,
486 cmdBuffer,
487 &surfaceParams,
488 m_cscKernelState));
489
490 // Converted NV12 output surface, or ENC 8-bit output surface
491 if (m_surfaceParamsCsc.psOutputCopiedSurface)
492 {
493 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
494 surfaceParams.bIs2DSurface =
495 surfaceParams.bUseUVPlane =
496 surfaceParams.bMediaBlockRW =
497 surfaceParams.bIsWritable = true;
498 surfaceParams.psSurface = m_surfaceParamsCsc.psOutputCopiedSurface;
499 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
500 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
501 codechalLLC);
502
503 surfaceParams.dwBindingTableOffset = cscDstConvYPlane;
504 surfaceParams.dwUVBindingTableOffset = cscDstConvUVlane;
505 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
506 m_hwInterface,
507 cmdBuffer,
508 &surfaceParams,
509 m_cscKernelState));
510 }
511
512 // 4x downscaled surface
513 if (m_surfaceParamsCsc.psOutput4xDsSurface)
514 {
515 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
516 surfaceParams.bIs2DSurface =
517 surfaceParams.bMediaBlockRW =
518 surfaceParams.bIsWritable = true;
519 surfaceParams.psSurface = m_surfaceParamsCsc.psOutput4xDsSurface;
520 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
521 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
522 codechalLLC);
523 surfaceParams.dwBindingTableOffset = cscDst4xDs;
524 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
525 m_hwInterface,
526 cmdBuffer,
527 &surfaceParams,
528 m_cscKernelState));
529 }
530
531 // MB Stats surface
532 if (m_surfaceParamsCsc.presMBVProcStatsBuffer)
533 {
534 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
535 surfaceParams.dwSize = MOS_BYTES_TO_DWORDS(m_hwInterface->m_avcMbStatBufferSize);
536 surfaceParams.bIsWritable = true;
537 surfaceParams.presBuffer = m_surfaceParamsCsc.presMBVProcStatsBuffer;
538 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
539 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
540 codechalLLC);
541 surfaceParams.dwBindingTableOffset = cscDstMbStats;
542 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
543 m_hwInterface,
544 cmdBuffer,
545 &surfaceParams,
546 m_cscKernelState));
547 }
548
549 // 2x downscaled surface
550 if (m_surfaceParamsCsc.psOutput2xDsSurface)
551 {
552 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
553 surfaceParams.bIs2DSurface =
554 surfaceParams.bMediaBlockRW =
555 surfaceParams.bIsWritable = true;
556 surfaceParams.psSurface = m_surfaceParamsCsc.psOutput2xDsSurface;
557 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
558 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
559 codechalLLC);
560 surfaceParams.dwBindingTableOffset = cscDst2xDs;
561 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
562 m_hwInterface,
563 cmdBuffer,
564 &surfaceParams,
565 m_cscKernelState));
566 }
567
568 if (m_surfaceParamsCsc.hevcExtParams)
569 {
570 auto hevcExtParams = (HevcExtKernelParams*)m_surfaceParamsCsc.hevcExtParams;
571
572 // History buffer
573 if (hevcExtParams->presHistoryBuffer)
574 {
575 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
576 surfaceParams.dwSize = MOS_BYTES_TO_DWORDS(hevcExtParams->dwSizeHistoryBuffer);
577 surfaceParams.dwOffset = hevcExtParams->dwOffsetHistoryBuffer;
578 surfaceParams.bIsWritable = true;
579 surfaceParams.presBuffer = hevcExtParams->presHistoryBuffer;
580 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
581 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
582 codechalLLC);
583 surfaceParams.dwBindingTableOffset = cscDstHistBuffer;
584 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
585 m_hwInterface,
586 cmdBuffer,
587 &surfaceParams,
588 m_cscKernelState));
589 }
590
591 // History sum output buffer
592 if (hevcExtParams->presHistorySumBuffer)
593 {
594 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
595 surfaceParams.dwSize = MOS_BYTES_TO_DWORDS(hevcExtParams->dwSizeHistorySumBuffer);
596 surfaceParams.dwOffset = hevcExtParams->dwOffsetHistorySumBuffer;
597 surfaceParams.bIsWritable = true;
598 surfaceParams.presBuffer = hevcExtParams->presHistorySumBuffer;
599 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
600 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
601 codechalLLC);
602 surfaceParams.dwBindingTableOffset = cscDstHistSum;
603 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
604 m_hwInterface,
605 cmdBuffer,
606 &surfaceParams,
607 m_cscKernelState));
608 }
609
610 // multi-thread task buffer
611 if (hevcExtParams->presMultiThreadTaskBuffer)
612 {
613 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
614 surfaceParams.dwSize = MOS_BYTES_TO_DWORDS(hevcExtParams->dwSizeMultiThreadTaskBuffer);
615 surfaceParams.dwOffset = hevcExtParams->dwOffsetMultiThreadTaskBuffer;
616 surfaceParams.bIsWritable = true;
617 surfaceParams.presBuffer = hevcExtParams->presMultiThreadTaskBuffer;
618 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
619 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
620 codechalLLC);
621 surfaceParams.dwBindingTableOffset = cscDstMultiTask;
622 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
623 m_hwInterface,
624 cmdBuffer,
625 &surfaceParams,
626 m_cscKernelState));
627 }
628 }
629
630 return eStatus;
631 }
632
InitKernelStateDS()633 MOS_STATUS CodechalEncodeCscDsG11::InitKernelStateDS()
634 {
635 CODECHAL_ENCODE_FUNCTION_ENTER;
636
637 m_dsBTCount[0] = ds4xNumSurfaces;
638 m_dsCurbeLength[0] =
639 m_dsInlineDataLength = sizeof(Ds4xKernelCurbeData);
640 m_dsBTISrcY = ds4xSrcYPlane;
641 m_dsBTIDstY = ds4xDstYPlane;
642 m_dsBTISrcYTopField = ds4xSrcYPlaneTopField;
643 m_dsBTIDstYTopField = ds4xDstYPlaneTopField;
644 m_dsBTISrcYBtmField = ds4xSrcYPlaneBtmField;
645 m_dsBTIDstYBtmField = ds4xDstYPlaneBtmField;
646 m_dsBTIDstMbVProc = ds4xDstMbVProc;
647 m_dsBTIDstMbVProcTopField = ds4xDstMbVProcTopField;
648 m_dsBTIDstMbVProcBtmField = ds4xDstMbVProcBtmField;
649
650 uint32_t kernelSize, numKernelsToLoad = m_encoder->m_interlacedFieldDisabled ? 1 : CODEC_NUM_FIELDS_PER_FRAME;
651 m_dsKernelBase = m_kernelBase;
652 CODECHAL_KERNEL_HEADER currKrnHeader;
653 for (uint32_t krnStateIdx = 0; krnStateIdx < numKernelsToLoad; krnStateIdx++)
654 {
655 kernelSize = m_combinedKernelSize;
656 m_dsKernelState = &m_encoder->m_scaling4xKernelStates[krnStateIdx];
657
658 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(
659 m_dsKernelBase,
660 ENC_SCALING4X,
661 krnStateIdx,
662 &currKrnHeader,
663 &kernelSize))
664
665 m_dsKernelState->KernelParams.iBTCount = m_dsBTCount[0];
666 m_dsKernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
667 m_dsKernelState->KernelParams.iCurbeLength = m_dsCurbeLength[0];
668 m_dsKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
669 m_dsKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
670 m_dsKernelState->KernelParams.iIdCount = 1;
671 m_dsKernelState->KernelParams.iInlineDataLength = m_dsInlineDataLength;
672
673 m_dsKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
674 m_dsKernelState->KernelParams.pBinary = m_dsKernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
675 m_dsKernelState->KernelParams.iSize = kernelSize;
676 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
677 m_dsKernelState->KernelParams.iBTCount,
678 &m_dsKernelState->dwSshSize,
679 &m_dsKernelState->dwBindingTableSize));
680
681 CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
682 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_dsKernelState));
683
684 if (m_32xMeSupported)
685 {
686 m_dsKernelState = &m_encoder->m_scaling2xKernelStates[krnStateIdx];
687
688 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(
689 m_dsKernelBase,
690 ENC_SCALING2X,
691 krnStateIdx,
692 &currKrnHeader,
693 &kernelSize))
694
695 m_dsKernelState->KernelParams.iBTCount = m_dsBTCount[1];
696 m_dsKernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
697 m_dsKernelState->KernelParams.iCurbeLength = m_dsCurbeLength[1];
698 m_dsKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
699 m_dsKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
700
701 m_dsKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
702 m_dsKernelState->KernelParams.pBinary = m_dsKernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
703 m_dsKernelState->KernelParams.iSize = kernelSize;
704 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
705 m_dsKernelState->KernelParams.iBTCount,
706 &m_dsKernelState->dwSshSize,
707 &m_dsKernelState->dwBindingTableSize));
708
709 CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
710 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_dsKernelState));
711 }
712
713 if (m_encoder->m_interlacedFieldDisabled)
714 {
715 m_encoder->m_scaling4xKernelStates[1] = m_encoder->m_scaling4xKernelStates[0];
716
717 if (m_32xMeSupported)
718 {
719 m_encoder->m_scaling2xKernelStates[1] = m_encoder->m_scaling2xKernelStates[0];
720 }
721 }
722 }
723
724 return MOS_STATUS_SUCCESS;
725 }
726
SetCurbeDS4x()727 MOS_STATUS CodechalEncodeCscDsG11::SetCurbeDS4x()
728 {
729 CODECHAL_ENCODE_FUNCTION_ENTER;
730
731 if (CODECHAL_AVC != m_standard)
732 {
733 return CodechalEncodeCscDs::SetCurbeDS4x();
734 }
735
736 Ds4xKernelCurbeData curbe;
737
738 curbe.DW0_InputPictureWidth = m_curbeParams.dwInputPictureWidth;
739 curbe.DW0_InputPictureHeight = m_curbeParams.dwInputPictureHeight;
740
741 curbe.DW1_InputYBTIFrame = ds4xSrcYPlane;
742 curbe.DW2_OutputYBTIFrame = ds4xDstYPlane;
743
744 if (m_curbeParams.bFieldPicture)
745 {
746 curbe.DW3_InputYBTIBottomField = ds4xSrcYPlaneBtmField;
747 curbe.DW4_OutputYBTIBottomField = ds4xDstYPlaneBtmField;
748 }
749
750 if ((curbe.DW6_EnableMBFlatnessCheck = m_curbeParams.bFlatnessCheckEnabled))
751 {
752 curbe.DW5_FlatnessThreshold = 128;
753 }
754
755 // For gen10 DS kernel, If Flatness Check enabled, need enable MBVariance as well. Otherwise will not output MbIsFlat.
756 curbe.DW6_EnableMBVarianceOutput = curbe.DW6_EnableMBFlatnessCheck || m_curbeParams.bMBVarianceOutputEnabled;
757 curbe.DW6_EnableMBPixelAverageOutput = m_curbeParams.bMBPixelAverageOutputEnabled;
758 curbe.DW6_EnableBlock8x8StatisticsOutput = m_curbeParams.bBlock8x8StatisticsEnabled;
759
760 if (curbe.DW6_EnableMBVarianceOutput || curbe.DW6_EnableMBPixelAverageOutput)
761 {
762 curbe.DW8_MBVProcStatsBTIFrame = ds4xDstMbVProc;
763
764 if (m_curbeParams.bFieldPicture)
765 {
766 curbe.DW9_MBVProcStatsBTIBottomField = ds4xDstMbVProcBtmField;
767 }
768 }
769
770 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_dsKernelState->m_dshRegion.AddData(
771 &curbe,
772 m_dsKernelState->dwCurbeOffset,
773 sizeof(curbe)));
774
775 CODECHAL_DEBUG_TOOL(
776 if (m_encoder->m_encodeParState)
777 {
778 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->m_encodeParState->PopulateDsParam(&curbe));
779 }
780 )
781
782 return MOS_STATUS_SUCCESS;
783 }
784
InitSfcState()785 MOS_STATUS CodechalEncodeCscDsG11::InitSfcState()
786 {
787 CODECHAL_ENCODE_FUNCTION_ENTER;
788
789 if (!m_sfcState)
790 {
791 m_sfcState = (CodecHalEncodeSfc*)MOS_New(CodecHalEncodeSfcG11);
792 CODECHAL_ENCODE_CHK_NULL_RETURN(m_sfcState);
793
794 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_sfcState->Initialize(m_hwInterface, m_osInterface));
795
796 m_sfcState->SetInputColorSpace(MHW_CSpace_sRGB);
797 }
798 return MOS_STATUS_SUCCESS;
799 }
800
CheckRawSurfaceAlignment(MOS_SURFACE surface)801 MOS_STATUS CodechalEncodeCscDsG11::CheckRawSurfaceAlignment(MOS_SURFACE surface)
802 {
803 if (m_cscEnableCopy && (surface.dwWidth % m_rawSurfAlignment || surface.dwHeight % m_rawSurfAlignment) &&
804 m_colorRawSurface != cscColorNv12TileY)
805 {
806 m_cscRequireCopy = 1;
807 }
808 return MOS_STATUS_SUCCESS;
809 }
810
811
CodechalEncodeCscDsG11(CodechalEncoderState * encoder)812 CodechalEncodeCscDsG11::CodechalEncodeCscDsG11(CodechalEncoderState* encoder)
813 : CodechalEncodeCscDs(encoder)
814 {
815 m_cscKernelUID = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
816 m_cscCurbeLength = sizeof(CscKernelCurbeData);
817 #if defined(ENABLE_KERNELS)
818 m_kernelBase = (uint8_t*)IGCODECKRN_G11;
819 #endif
820 }
821
~CodechalEncodeCscDsG11()822 CodechalEncodeCscDsG11::~CodechalEncodeCscDsG11()
823 {
824 // free the MbStats surface
825 m_osInterface->pfnFreeResource(m_osInterface, &m_resMbStatsBuffer);
826 }
827