1 /*
2 * Copyright (c) 2017-2020, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_encode_csc_ds_g12.cpp
24 //! \brief This file implements the Csc+Ds feature for all codecs on Gen12 platform
25 //!
26
27 #include "codechal_encoder_base.h"
28 #include "codechal_encode_csc_ds_g12.h"
29 #include "codechal_encode_sfc_g12.h"
30 #include "codechal_kernel_header_g12.h"
31 #include "codeckrnheader.h"
32 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
33 #include "igcodeckrn_g12.h"
34 #endif
35 #if USE_CODECHAL_DEBUG_TOOL
36 #include "codechal_debug_encode_par_g12.h"
37 #endif
38
GetBTCount() const39 uint8_t CodechalEncodeCscDsG12::GetBTCount() const
40 {
41 return (uint8_t)cscNumSurfaces;
42 }
43
AllocateSurfaceCsc()44 MOS_STATUS CodechalEncodeCscDsG12::AllocateSurfaceCsc()
45 {
46 CODECHAL_ENCODE_FUNCTION_ENTER;
47
48 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
49
50 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeCscDs::AllocateSurfaceCsc());
51
52 MEDIA_WA_TABLE* waTable = m_osInterface->pfnGetWaTable(m_osInterface);
53 uint32_t memType = (MEDIA_IS_WA(waTable, WaForceAllocateLML4)) ? MOS_MEMPOOL_DEVICEMEMORY : 0;
54
55 // allocate the MbStats surface
56 if (Mos_ResourceIsNull(&m_resMbStatsBuffer))
57 {
58 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
59 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
60 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
61 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
62 allocParamsForBufferLinear.Format = Format_Buffer;
63 uint32_t alignedWidth = MOS_ALIGN_CEIL(CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_encoder->m_oriFrameWidth), 64);
64 uint32_t alignedHeight = MOS_ALIGN_CEIL(CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_encoder->m_oriFrameHeight), 64);
65 allocParamsForBufferLinear.dwBytes = m_hwInterface->m_avcMbStatBufferSize =
66 MOS_ALIGN_CEIL((alignedWidth * alignedHeight << 6) , 1024);
67 allocParamsForBufferLinear.pBufName = "MB Statistics Buffer";
68 allocParamsForBufferLinear.dwMemType = memType;
69
70 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
71 m_osInterface,
72 &allocParamsForBufferLinear,
73 &m_resMbStatsBuffer), "Failed to allocate MB Statistics Buffer.");
74 }
75
76 return eStatus;
77 }
78
CheckRawColorFormat(MOS_FORMAT format,MOS_TILE_TYPE tileType)79 MOS_STATUS CodechalEncodeCscDsG12::CheckRawColorFormat(MOS_FORMAT format, MOS_TILE_TYPE tileType)
80 {
81 CODECHAL_ENCODE_FUNCTION_ENTER;
82
83 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
84
85 // check input color format, and set target traverse thread space size
86 switch (format)
87 {
88 case Format_NV12:
89 m_colorRawSurface = cscColorNv12Linear;
90 m_cscRequireColor = 1;
91 break;
92 case Format_YUY2:
93 case Format_YUYV:
94 m_colorRawSurface = cscColorYUY2;
95 m_cscRequireColor = (uint8_t)HCP_CHROMA_FORMAT_YUV420 == m_outputChromaFormat;
96 m_cscRequireConvTo8bPlanar = (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat;
97 break;
98 case Format_A8R8G8B8:
99 case Format_X8R8G8B8:
100 m_colorRawSurface = cscColorARGB;
101 m_cscUsingSfc = IsSfcEnabled() ? 1 : 0;
102 m_cscRequireColor = 1;
103 //Use EU for better performance in big resolution cases
104 if (m_cscRawSurfWidth * m_cscRawSurfHeight > 1920 * 1088
105 && !MEDIA_IS_WA(m_hwInterface->GetWaTable(), Wa_1409932735))
106 {
107 m_cscUsingSfc = 0;
108 }
109 break;
110 case Format_A8B8G8R8:
111 m_colorRawSurface = cscColorABGR;
112 m_cscRequireColor = 1;
113 break;
114 case Format_P010:
115 case Format_P016:
116 m_colorRawSurface = cscColorP010;
117 m_cscRequireConvTo8bPlanar = 1;
118 break;
119 case Format_Y210:
120 if (m_encoder->m_vdencEnabled && MEDIA_IS_WA(m_encoder->m_waTable, WaHEVCVDEncY210LinearInputNotSupported))
121 {
122 if (tileType == MOS_TILE_Y)
123 {
124 m_colorRawSurface = cscColorY210;
125 m_cscRequireConvTo8bPlanar = 1;
126 }
127 else
128 {
129 CODECHAL_ENCODE_ASSERTMESSAGE("Input color format Y210 Linear not yet supported!");
130 eStatus = MOS_STATUS_PLATFORM_NOT_SUPPORTED;
131 }
132 }
133 else
134 {
135 m_colorRawSurface = cscColorY210;
136 m_cscRequireConvTo8bPlanar = 1;
137 }
138 break;
139 case Format_Y216:
140 m_colorRawSurface = cscColorY210;
141 m_cscRequireConvTo8bPlanar = 1;
142 break;
143 case Format_AYUV:
144 if (m_encoder->m_vdencEnabled)
145 {
146 m_colorRawSurface = cscColorAYUV;
147 m_cscRequireColor = 1;
148 break;
149 }
150 case Format_R10G10B10A2:
151 if (m_encoder->m_vdencEnabled)
152 {
153 m_colorRawSurface = cscColorARGB10;
154 break;
155 }
156 case Format_B10G10R10A2:
157 if (m_encoder->m_vdencEnabled)
158 {
159 m_colorRawSurface = cscColorABGR10;
160 break;
161 }
162 case Format_Y410:
163 if (m_encoder->m_vdencEnabled)
164 {
165 m_colorRawSurface = cscColorY410;
166 break;
167 }
168 case Format_YVYU:
169 if (m_encoder->m_vdencEnabled)
170 {
171 m_colorRawSurface = cscColorYUY2;
172 m_cscRequireColor = (uint8_t)HCP_CHROMA_FORMAT_YUV420 == m_outputChromaFormat;
173 m_cscRequireConvTo8bPlanar = (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat;
174 break;
175 }
176 case Format_P210:
177 // not supported yet so fall-thru to default
178 m_colorRawSurface = cscColorP210;
179 m_cscRequireConvTo8bPlanar = 1;
180 default:
181 CODECHAL_ENCODE_ASSERTMESSAGE("Input color format = %d not yet supported!", format);
182 eStatus = MOS_STATUS_INVALID_PARAMETER;
183 break;
184 }
185
186 return eStatus;
187 }
188
InitKernelStateCsc()189 MOS_STATUS CodechalEncodeCscDsG12::InitKernelStateCsc()
190 {
191 CODECHAL_ENCODE_FUNCTION_ENTER;
192
193 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
194
195 CODECHAL_KERNEL_HEADER currKrnHeader;
196 auto kernelSize = m_combinedKernelSize;
197 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG12(
198 m_kernelBase,
199 ENC_SCALING_CONVERSION,
200 0,
201 &currKrnHeader,
202 &kernelSize));
203
204 m_cscKernelState->KernelParams.iBTCount = cscNumSurfaces;
205 m_cscKernelState->KernelParams.iThreadCount = m_hwInterface->GetRenderInterface()->GetHwCaps()->dwMaxThreads;
206 m_cscKernelState->KernelParams.iCurbeLength = m_cscCurbeLength;
207 m_cscKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
208 m_cscKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
209 m_cscKernelState->KernelParams.iIdCount = 1;
210 m_cscKernelState->KernelParams.iInlineDataLength = m_cscCurbeLength;
211 m_cscKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
212 m_cscKernelState->KernelParams.pBinary =
213 m_kernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
214 m_cscKernelState->KernelParams.iSize = kernelSize;
215
216 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
217 m_cscKernelState->KernelParams.iBTCount,
218 &m_cscKernelState->dwSshSize,
219 &m_cscKernelState->dwBindingTableSize));
220
221 CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
222 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_cscKernelState));
223
224 m_maxBtCount += MOS_ALIGN_CEIL(cscNumSurfaces,m_renderInterface->m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment());
225
226 return eStatus;
227 }
228
SetKernelParamsCsc(KernelParams * params)229 MOS_STATUS CodechalEncodeCscDsG12::SetKernelParamsCsc(KernelParams* params)
230 {
231 CODECHAL_ENCODE_FUNCTION_ENTER;
232
233 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
234
235 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
236
237 m_lastTaskInPhase = params->bLastTaskInPhaseCSC;
238
239 auto inputFrameWidth = m_encoder->m_frameWidth;
240 auto inputFrameHeight = m_encoder->m_frameHeight;
241 auto inputSurface = m_rawSurfaceToEnc;
242 auto output4xDsSurface = m_encoder->m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
243 auto output2xDsSurface = m_encoder->m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER);
244 auto mbStatsSurface = &m_resMbStatsBuffer;
245
246 m_curbeParams.bHevcEncHistorySum = false;
247 m_surfaceParamsCsc.hevcExtParams = nullptr;
248
249 if (dsDisabled == params->stageDsConversion)
250 {
251 m_curbeParams.bConvertFlag = m_cscFlag != 0;
252
253 if (m_2xScalingEnabled && m_scalingEnabled)
254 {
255 m_curbeParams.downscaleStage = dsStage2x4x;
256 m_currRefList->b4xScalingUsed =
257 m_currRefList->b2xScalingUsed = true;
258 m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
259 m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
260 }
261 else if (m_2xScalingEnabled)
262 {
263 m_curbeParams.downscaleStage = dsStage2x;
264 m_currRefList->b2xScalingUsed = true;
265 output4xDsSurface = nullptr;
266 mbStatsSurface = nullptr;
267 m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = true;
268 m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
269 }
270 else if (m_scalingEnabled)
271 {
272 m_curbeParams.downscaleStage = dsStage4x;
273 m_currRefList->b4xScalingUsed = true;
274 output2xDsSurface = nullptr;
275 m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
276 m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = true;
277 }
278 else
279 {
280 // do CSC only
281 m_curbeParams.downscaleStage = dsDisabled;
282 output4xDsSurface = nullptr;
283 output2xDsSurface = nullptr;
284 mbStatsSurface = nullptr;
285 m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
286 m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
287 }
288
289 // history sum to be enabled only for the 4x stage
290 if (params->hevcExtParams)
291 {
292 auto hevcExtParam = (HevcExtKernelParams*)params->hevcExtParams;
293 m_curbeParams.bUseLCU32 = hevcExtParam->bUseLCU32;
294 m_curbeParams.bHevcEncHistorySum = hevcExtParam->bHevcEncHistorySum;
295 m_surfaceParamsCsc.hevcExtParams = params->hevcExtParams;
296 }
297 }
298 else
299 {
300 // do 16x/32x downscaling
301 m_curbeParams.bConvertFlag = false;
302 mbStatsSurface = nullptr;
303
304 if (dsStage16x == params->stageDsConversion)
305 {
306 m_currRefList->b16xScalingUsed = true;
307 m_lastTaskInPhase = params->bLastTaskInPhase16xDS;
308 m_curbeParams.downscaleStage = dsStage16x;
309 inputFrameWidth = m_encoder->m_downscaledWidth4x << 2;
310 inputFrameHeight = m_encoder->m_downscaledHeight4x << 2;
311
312 inputSurface = m_encoder->m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
313 output4xDsSurface = m_encoder->m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
314 output2xDsSurface = nullptr;
315 m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = false;
316 m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = true;
317 }
318 else if (dsStage32x == params->stageDsConversion)
319 {
320 m_currRefList->b32xScalingUsed = true;
321 m_lastTaskInPhase = params->bLastTaskInPhase32xDS;
322 m_curbeParams.downscaleStage = dsStage2x;
323 inputFrameWidth = m_encoder->m_downscaledWidth16x;
324 inputFrameHeight = m_encoder->m_downscaledHeight16x;
325
326 inputSurface = m_encoder->m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
327 output4xDsSurface = nullptr;
328 output2xDsSurface = m_encoder->m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER);
329 m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt = true;
330 m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt = false;
331 }
332 }
333
334 // setup Curbe
335 m_curbeParams.dwInputPictureWidth = inputFrameWidth;
336 m_curbeParams.dwInputPictureHeight = inputFrameHeight;
337 m_curbeParams.bFlatnessCheckEnabled = m_flatnessCheckEnabled;
338 m_curbeParams.bMBVarianceOutputEnabled = m_mbStatsEnabled;
339 m_curbeParams.bMBPixelAverageOutputEnabled = m_mbStatsEnabled;
340 m_curbeParams.bCscOrCopyOnly = !m_scalingEnabled || params->cscOrCopyOnly;
341 m_curbeParams.inputColorSpace = params->inputColorSpace;
342
343 // setup surface states
344 m_surfaceParamsCsc.psInputSurface = inputSurface;
345 m_surfaceParamsCsc.psOutputCopiedSurface = m_curbeParams.bConvertFlag ? m_encoder->m_trackedBuf->GetCscSurface(CODEC_CURR_TRACKED_BUFFER) : nullptr;
346 m_surfaceParamsCsc.psOutput4xDsSurface = output4xDsSurface;
347 m_surfaceParamsCsc.psOutput2xDsSurface = output2xDsSurface;
348 m_surfaceParamsCsc.presMBVProcStatsBuffer = mbStatsSurface;
349 m_surfaceParamsCsc.hevcExtParams = params->hevcExtParams;
350
351 if (dsStage16x == params->stageDsConversion)
352 {
353 // here to calculate the walker resolution, we need to use the input surface resolution.
354 // it is inputFrameWidth/height / 4 in 16xStage, becasue kernel internally will do this.
355 inputFrameWidth = inputFrameWidth >> 2;
356 inputFrameHeight = inputFrameHeight >> 2;
357 }
358
359 // setup walker param
360 m_walkerResolutionX = CODECHAL_GET_4xDS_SIZE_32ALIGNED(inputFrameWidth) >> 3;
361 m_walkerResolutionY = CODECHAL_GET_4xDS_SIZE_32ALIGNED(inputFrameHeight) >> 3;
362
363 return eStatus;
364 }
365
SetCurbeCsc()366 MOS_STATUS CodechalEncodeCscDsG12::SetCurbeCsc()
367 {
368 CODECHAL_ENCODE_FUNCTION_ENTER;
369
370 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
371
372 CscKernelCurbeData curbe;
373
374 curbe.DW0_OutputBitDepthForChroma = m_curbeParams.ucEncBitDepthChroma;
375 curbe.DW0_OutputBitDepthForLuma = m_curbeParams.ucEncBitDepthLuma;
376 curbe.DW0_RoundingEnable = 1;
377
378 if (m_colorRawSurface == cscColorABGR || m_colorRawSurface == cscColorABGR10)
379 {
380 curbe.DW1_PictureFormat = (uint8_t)((m_colorRawSurface == cscColorABGR) ? cscColorARGB : cscColorARGB10); // Use cscColorARGB for ABGR CSC, just switch B and R coefficients
381 }
382 else
383 {
384 curbe.DW1_PictureFormat = (uint8_t)m_colorRawSurface;
385 }
386
387 curbe.DW1_ConvertFlag = m_curbeParams.bConvertFlag;
388 curbe.DW1_DownscaleStage = (uint8_t)m_curbeParams.downscaleStage;
389 curbe.DW1_MbStatisticsDumpFlag = (m_curbeParams.downscaleStage == dsStage4x || m_curbeParams.downscaleStage == dsStage2x4x);
390 curbe.DW1_YUY2ConversionFlag = (m_colorRawSurface == cscColorYUY2) && m_cscRequireColor;
391 curbe.DW1_HevcEncHistorySum = m_curbeParams.bHevcEncHistorySum;
392 curbe.DW1_LCUSize = m_curbeParams.bUseLCU32;
393
394 curbe.DW2_OriginalPicWidthInSamples = m_curbeParams.dwInputPictureWidth;
395 curbe.DW2_OriginalPicHeightInSamples = m_curbeParams.dwInputPictureHeight;
396
397 // RGB->YUV CSC coefficients
398 if (m_curbeParams.inputColorSpace == ECOLORSPACE_P709)
399 {
400 curbe.DW4_CSC_Coefficient_C0 = 0xFFCD;
401 curbe.DW5_CSC_Coefficient_C3 = 0x0080;
402 curbe.DW6_CSC_Coefficient_C4 = 0x004F;
403 curbe.DW7_CSC_Coefficient_C7 = 0x0010;
404 curbe.DW8_CSC_Coefficient_C8 = 0xFFD5;
405 curbe.DW9_CSC_Coefficient_C11 = 0x0080;
406 if (cscColorARGB == m_colorRawSurface || cscColorARGB10 == m_colorRawSurface)
407 {
408 curbe.DW4_CSC_Coefficient_C1 = 0xFFFB;
409 curbe.DW5_CSC_Coefficient_C2 = 0x0038;
410 curbe.DW6_CSC_Coefficient_C5 = 0x0008;
411 curbe.DW7_CSC_Coefficient_C6 = 0x0017;
412 curbe.DW8_CSC_Coefficient_C9 = 0x0038;
413 curbe.DW9_CSC_Coefficient_C10 = 0xFFF3;
414 }
415 else // cscColorABGR == m_colorRawSurface || cscColorABGR10 == m_colorRawSurface
416 {
417 curbe.DW4_CSC_Coefficient_C1 = 0x0038;
418 curbe.DW5_CSC_Coefficient_C2 = 0xFFFB;
419 curbe.DW6_CSC_Coefficient_C5 = 0x0017;
420 curbe.DW7_CSC_Coefficient_C6 = 0x0008;
421 curbe.DW8_CSC_Coefficient_C9 = 0xFFF3;
422 curbe.DW9_CSC_Coefficient_C10 = 0x0038;
423 }
424 }
425 else if (m_curbeParams.inputColorSpace == ECOLORSPACE_P601)
426 {
427 curbe.DW4_CSC_Coefficient_C0 = 0xFFD1;
428 curbe.DW5_CSC_Coefficient_C3 = 0x0080;
429 curbe.DW6_CSC_Coefficient_C4 = 0x0041;
430 curbe.DW7_CSC_Coefficient_C7 = 0x0010;
431 curbe.DW8_CSC_Coefficient_C8 = 0xFFDB;
432 curbe.DW9_CSC_Coefficient_C11 = 0x0080;
433 if (cscColorARGB == m_colorRawSurface || cscColorARGB10 == m_colorRawSurface)
434 {
435 curbe.DW4_CSC_Coefficient_C1 = 0xFFF7;
436 curbe.DW5_CSC_Coefficient_C2 = 0x0038;
437 curbe.DW6_CSC_Coefficient_C5 = 0x000D;
438 curbe.DW7_CSC_Coefficient_C6 = 0x0021;
439 curbe.DW8_CSC_Coefficient_C9 = 0x0038;
440 curbe.DW9_CSC_Coefficient_C10 = 0xFFED;
441 }
442 else // cscColorABGR == m_colorRawSurface || cscColorABGR10 == m_colorRawSurface
443 {
444 curbe.DW4_CSC_Coefficient_C1 = 0x0038;
445 curbe.DW5_CSC_Coefficient_C2 = 0xFFF7;
446 curbe.DW6_CSC_Coefficient_C5 = 0x0021;
447 curbe.DW7_CSC_Coefficient_C6 = 0x000D;
448 curbe.DW8_CSC_Coefficient_C9 = 0xFFED;
449 curbe.DW9_CSC_Coefficient_C10 = 0x0038;
450 }
451 }
452 else
453 {
454 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ARGB input color space = %d!", m_curbeParams.inputColorSpace);
455 return MOS_STATUS_INVALID_PARAMETER;
456 }
457
458 curbe.DW10_BTI_InputSurface = cscSrcYPlane;
459 curbe.DW11_BTI_Enc8BitSurface = cscDstConvYPlane;
460 curbe.DW12_BTI_4xDsSurface = cscDst4xDs;
461 curbe.DW13_BTI_MbStatsSurface = cscDstMbStats;
462 curbe.DW14_BTI_2xDsSurface = cscDst2xDs;
463 curbe.DW15_BTI_HistoryBuffer = cscDstHistBuffer;
464 curbe.DW16_BTI_HistorySumBuffer = cscDstHistSum;
465 curbe.DW17_BTI_MultiTaskBuffer = cscDstMultiTask;
466
467 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscKernelState->m_dshRegion.AddData(
468 &curbe,
469 m_cscKernelState->dwCurbeOffset,
470 sizeof(curbe)));
471
472 return eStatus;
473 }
474
SendSurfaceCsc(PMOS_COMMAND_BUFFER cmdBuffer)475 MOS_STATUS CodechalEncodeCscDsG12::SendSurfaceCsc(PMOS_COMMAND_BUFFER cmdBuffer)
476 {
477 CODECHAL_ENCODE_FUNCTION_ENTER;
478
479 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
480
481 // PAK input surface (could be 10-bit)
482 CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
483 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
484 surfaceParams.bIs2DSurface = true;
485 surfaceParams.bUseUVPlane = (cscColorNv12TileY == m_colorRawSurface ||
486 cscColorP010 == m_colorRawSurface ||
487 cscColorP210 == m_colorRawSurface ||
488 cscColorNv12Linear == m_colorRawSurface);
489 surfaceParams.bMediaBlockRW = true;
490
491 // Configure to R16/32 for input surface
492 if (m_surfaceParamsCsc.bScalingInUses16UnormSurfFmt)
493 {
494 // 32x scaling requires R16_UNROM
495 surfaceParams.bUse16UnormSurfaceFormat = true;
496 }
497 else if (m_surfaceParamsCsc.bScalingInUses32UnormSurfFmt)
498 {
499 surfaceParams.bUse32UnormSurfaceFormat = true;
500 }
501 else
502 {
503 /*
504 * Unify surface format to avoid mismatches introduced by DS kernel between MMC on and off cases.
505 * bUseCommonKernel | FormatIsNV12 | MmcdOn | SurfaceFormatToUse
506 * 1 | 1 | 0/1 | R8
507 * 1 | 0 | 0/1 | R16
508 * 0 | 1 | 0/1 | R8
509 * 0 | 0 | 1 | R8
510 * 0 | 0 | 0 | R32
511 */
512 surfaceParams.bUse16UnormSurfaceFormat = !(cscColorNv12TileY == m_colorRawSurface ||
513 cscColorNv12Linear == m_colorRawSurface);
514 }
515
516 if (m_encoder->m_vdencEnabled && (CODECHAL_HEVC == m_standard || CODECHAL_AVC == m_standard))
517 {
518 surfaceParams.bCheckCSC8Format= true;
519 }
520
521 surfaceParams.psSurface = m_surfaceParamsCsc.psInputSurface;
522 if (cscColorNv12Linear == m_colorRawSurface)
523 {
524 surfaceParams.dwHeightInUse = (surfaceParams.psSurface->dwHeight * 3) / 2;
525 }
526 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
527 MOS_CODEC_RESOURCE_USAGE_ORIGINAL_UNCOMPRESSED_PICTURE_ENCODE,
528 (codechalL3 | codechalLLC));
529
530 #ifdef _MMC_SUPPORTED
531 CODECHAL_ENCODE_CHK_NULL_RETURN(m_encoder->m_mmcState);
532 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->m_mmcState->SetSurfaceParams(&surfaceParams));
533
534 // disable compression for render RC TA resources
535 if (surfaceParams.psSurface->MmcState == MOS_MEMCOMP_RC &&
536 surfaceParams.psSurface->OsResource.pGmmResInfo->GetArraySize() > 1)
537 {
538 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnDecompResource(m_osInterface, &surfaceParams.psSurface->OsResource));
539 surfaceParams.psSurface->MmcState = MOS_MEMCOMP_DISABLED;
540 }
541 #endif
542
543 surfaceParams.dwBindingTableOffset = cscSrcYPlane;
544 surfaceParams.dwUVBindingTableOffset = cscSrcUVPlane;
545 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
546 m_hwInterface,
547 cmdBuffer,
548 &surfaceParams,
549 m_cscKernelState));
550
551 // Converted NV12 output surface, or ENC 8-bit output surface
552 if (m_surfaceParamsCsc.psOutputCopiedSurface)
553 {
554 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
555 surfaceParams.bIs2DSurface =
556 surfaceParams.bUseUVPlane =
557 surfaceParams.bMediaBlockRW =
558 surfaceParams.bIsWritable = true;
559 surfaceParams.psSurface = m_surfaceParamsCsc.psOutputCopiedSurface;
560 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
561 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
562 codechalLLC);
563
564 surfaceParams.dwBindingTableOffset = cscDstConvYPlane;
565 surfaceParams.dwUVBindingTableOffset = cscDstConvUVlane;
566 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
567 m_hwInterface,
568 cmdBuffer,
569 &surfaceParams,
570 m_cscKernelState));
571 }
572
573 // 4x downscaled surface
574 if (m_surfaceParamsCsc.psOutput4xDsSurface)
575 {
576 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
577 surfaceParams.bIs2DSurface =
578 surfaceParams.bMediaBlockRW =
579 surfaceParams.bIsWritable = true;
580 surfaceParams.psSurface = m_surfaceParamsCsc.psOutput4xDsSurface;
581 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
582 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
583 codechalLLC);
584 surfaceParams.dwBindingTableOffset = cscDst4xDs;
585 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
586 m_hwInterface,
587 cmdBuffer,
588 &surfaceParams,
589 m_cscKernelState));
590 }
591
592 // MB Stats surface
593 if (m_surfaceParamsCsc.presMBVProcStatsBuffer)
594 {
595 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
596 surfaceParams.dwSize = m_hwInterface->m_avcMbStatBufferSize;
597 surfaceParams.bIsWritable = true;
598 surfaceParams.presBuffer = m_surfaceParamsCsc.presMBVProcStatsBuffer;
599 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
600 MOS_CODEC_RESOURCE_USAGE_MB_STATS_ENCODE,
601 codechalLLC);
602 surfaceParams.dwBindingTableOffset = cscDstMbStats;
603 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
604 m_hwInterface,
605 cmdBuffer,
606 &surfaceParams,
607 m_cscKernelState));
608 }
609
610 // 2x downscaled surface
611 if (m_surfaceParamsCsc.psOutput2xDsSurface)
612 {
613 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
614 surfaceParams.bIs2DSurface =
615 surfaceParams.bMediaBlockRW =
616 surfaceParams.bIsWritable = true;
617 surfaceParams.psSurface = m_surfaceParamsCsc.psOutput2xDsSurface;
618 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
619 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
620 codechalLLC);
621 surfaceParams.dwBindingTableOffset = cscDst2xDs;
622 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
623 m_hwInterface,
624 cmdBuffer,
625 &surfaceParams,
626 m_cscKernelState));
627 }
628
629 if (m_surfaceParamsCsc.hevcExtParams)
630 {
631 auto hevcExtParams = (HevcExtKernelParams*)m_surfaceParamsCsc.hevcExtParams;
632
633 // History buffer
634 if (hevcExtParams->presHistoryBuffer)
635 {
636 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
637 surfaceParams.dwSize = hevcExtParams->dwSizeHistoryBuffer;
638 surfaceParams.dwOffset = hevcExtParams->dwOffsetHistoryBuffer;
639 surfaceParams.bIsWritable = true;
640 surfaceParams.presBuffer = hevcExtParams->presHistoryBuffer;
641 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
642 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
643 codechalLLC);
644 surfaceParams.dwBindingTableOffset = cscDstHistBuffer;
645 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
646 m_hwInterface,
647 cmdBuffer,
648 &surfaceParams,
649 m_cscKernelState));
650 }
651
652 // History sum output buffer
653 if (hevcExtParams->presHistorySumBuffer)
654 {
655 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
656 surfaceParams.dwSize = hevcExtParams->dwSizeHistorySumBuffer;
657 surfaceParams.dwOffset = hevcExtParams->dwOffsetHistorySumBuffer;
658 surfaceParams.bIsWritable = true;
659 surfaceParams.presBuffer = hevcExtParams->presHistorySumBuffer;
660 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
661 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
662 codechalLLC);
663 surfaceParams.dwBindingTableOffset = cscDstHistSum;
664 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
665 m_hwInterface,
666 cmdBuffer,
667 &surfaceParams,
668 m_cscKernelState));
669 }
670
671 // multi-thread task buffer
672 if (hevcExtParams->presMultiThreadTaskBuffer)
673 {
674 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
675 surfaceParams.dwSize = hevcExtParams->dwSizeMultiThreadTaskBuffer;
676 surfaceParams.dwOffset = hevcExtParams->dwOffsetMultiThreadTaskBuffer;
677 surfaceParams.bIsWritable = true;
678 surfaceParams.presBuffer = hevcExtParams->presMultiThreadTaskBuffer;
679 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
680 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
681 codechalLLC);
682 surfaceParams.dwBindingTableOffset = cscDstMultiTask;
683 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
684 m_hwInterface,
685 cmdBuffer,
686 &surfaceParams,
687 m_cscKernelState));
688 }
689 }
690
691 return eStatus;
692 }
693
InitKernelStateDS()694 MOS_STATUS CodechalEncodeCscDsG12::InitKernelStateDS()
695 {
696 CODECHAL_ENCODE_FUNCTION_ENTER;
697
698 m_dsBTCount[0] = ds4xNumSurfaces;
699 m_dsCurbeLength[0] =
700 m_dsInlineDataLength = sizeof(Ds4xKernelCurbeData);
701 m_dsBTISrcY = ds4xSrcYPlane;
702 m_dsBTIDstY = ds4xDstYPlane;
703 m_dsBTISrcYTopField = ds4xSrcYPlaneTopField;
704 m_dsBTIDstYTopField = ds4xDstYPlaneTopField;
705 m_dsBTISrcYBtmField = ds4xSrcYPlaneBtmField;
706 m_dsBTIDstYBtmField = ds4xDstYPlaneBtmField;
707 m_dsBTIDstMbVProc = ds4xDstMbVProc;
708 m_dsBTIDstMbVProcTopField = ds4xDstMbVProcTopField;
709 m_dsBTIDstMbVProcBtmField = ds4xDstMbVProcBtmField;
710
711 uint32_t kernelSize, numKernelsToLoad = m_encoder->m_interlacedFieldDisabled ? 1 : CODEC_NUM_FIELDS_PER_FRAME;
712 m_dsKernelBase = m_kernelBase;
713 CODECHAL_KERNEL_HEADER currKrnHeader;
714 for (uint32_t krnStateIdx = 0; krnStateIdx < numKernelsToLoad; krnStateIdx++)
715 {
716 kernelSize = m_combinedKernelSize;
717 m_dsKernelState = &m_encoder->m_scaling4xKernelStates[krnStateIdx];
718
719 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG12(
720 m_dsKernelBase,
721 ENC_SCALING4X,
722 krnStateIdx,
723 &currKrnHeader,
724 &kernelSize))
725
726 m_dsKernelState->KernelParams.iBTCount = m_dsBTCount[0];
727 m_dsKernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
728 m_dsKernelState->KernelParams.iCurbeLength = m_dsCurbeLength[0];
729 m_dsKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
730 m_dsKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
731 m_dsKernelState->KernelParams.iIdCount = 1;
732 m_dsKernelState->KernelParams.iInlineDataLength = m_dsInlineDataLength;
733
734 m_dsKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
735 m_dsKernelState->KernelParams.pBinary = m_dsKernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
736 m_dsKernelState->KernelParams.iSize = kernelSize;
737 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
738 m_dsKernelState->KernelParams.iBTCount,
739 &m_dsKernelState->dwSshSize,
740 &m_dsKernelState->dwBindingTableSize));
741
742 CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
743 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_dsKernelState));
744
745 if (m_32xMeSupported)
746 {
747 m_dsKernelState = &m_encoder->m_scaling2xKernelStates[krnStateIdx];
748
749 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG12(
750 m_dsKernelBase,
751 ENC_SCALING2X,
752 krnStateIdx,
753 &currKrnHeader,
754 &kernelSize))
755
756 m_dsKernelState->KernelParams.iBTCount = m_dsBTCount[1];
757 m_dsKernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
758 m_dsKernelState->KernelParams.iCurbeLength = m_dsCurbeLength[1];
759 m_dsKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
760 m_dsKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
761
762 m_dsKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
763 m_dsKernelState->KernelParams.pBinary = m_dsKernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
764 m_dsKernelState->KernelParams.iSize = kernelSize;
765 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
766 m_dsKernelState->KernelParams.iBTCount,
767 &m_dsKernelState->dwSshSize,
768 &m_dsKernelState->dwBindingTableSize));
769
770 CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
771 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_dsKernelState));
772 }
773
774 if (m_encoder->m_interlacedFieldDisabled)
775 {
776 m_encoder->m_scaling4xKernelStates[1] = m_encoder->m_scaling4xKernelStates[0];
777
778 if (m_32xMeSupported)
779 {
780 m_encoder->m_scaling2xKernelStates[1] = m_encoder->m_scaling2xKernelStates[0];
781 }
782 }
783 }
784
785 return MOS_STATUS_SUCCESS;
786 }
787
SetCurbeDS4x()788 MOS_STATUS CodechalEncodeCscDsG12::SetCurbeDS4x()
789 {
790 CODECHAL_ENCODE_FUNCTION_ENTER;
791
792 if (CODECHAL_AVC != m_standard)
793 {
794 return CodechalEncodeCscDs::SetCurbeDS4x();
795 }
796
797 Ds4xKernelCurbeData curbe;
798
799 curbe.DW0_InputPictureWidth = m_curbeParams.dwInputPictureWidth;
800 curbe.DW0_InputPictureHeight = m_curbeParams.dwInputPictureHeight;
801
802 curbe.DW1_InputYBTIFrame = ds4xSrcYPlane;
803 curbe.DW2_OutputYBTIFrame = ds4xDstYPlane;
804
805 if (m_curbeParams.bFieldPicture)
806 {
807 curbe.DW3_InputYBTIBottomField = ds4xSrcYPlaneBtmField;
808 curbe.DW4_OutputYBTIBottomField = ds4xDstYPlaneBtmField;
809 }
810
811 if ((curbe.DW6_EnableMBFlatnessCheck = m_curbeParams.bFlatnessCheckEnabled))
812 {
813 curbe.DW5_FlatnessThreshold = 128;
814 }
815
816 // For gen10 DS kernel, If Flatness Check enabled, need enable MBVariance as well. Otherwise will not output MbIsFlat.
817 curbe.DW6_EnableMBVarianceOutput = curbe.DW6_EnableMBFlatnessCheck || m_curbeParams.bMBVarianceOutputEnabled;
818 curbe.DW6_EnableMBPixelAverageOutput = m_curbeParams.bMBPixelAverageOutputEnabled;
819 curbe.DW6_EnableBlock8x8StatisticsOutput = m_curbeParams.bBlock8x8StatisticsEnabled;
820
821 if (curbe.DW6_EnableMBVarianceOutput || curbe.DW6_EnableMBPixelAverageOutput)
822 {
823 curbe.DW8_MBVProcStatsBTIFrame = ds4xDstMbVProc;
824
825 if (m_curbeParams.bFieldPicture)
826 {
827 curbe.DW9_MBVProcStatsBTIBottomField = ds4xDstMbVProcBtmField;
828 }
829 }
830
831 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_dsKernelState->m_dshRegion.AddData(
832 &curbe,
833 m_dsKernelState->dwCurbeOffset,
834 sizeof(curbe)));
835
836 CODECHAL_DEBUG_TOOL(
837 if (m_encoder->m_encodeParState)
838 {
839 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->m_encodeParState->PopulateDsParam(&curbe));
840 }
841 )
842
843 return MOS_STATUS_SUCCESS;
844 }
845
InitSfcState()846 MOS_STATUS CodechalEncodeCscDsG12::InitSfcState()
847 {
848 CODECHAL_ENCODE_FUNCTION_ENTER;
849
850 if (!m_sfcState)
851 {
852 m_sfcState = (CodecHalEncodeSfc*)MOS_New(CodecHalEncodeSfcG12);
853 CODECHAL_ENCODE_CHK_NULL_RETURN(m_sfcState);
854
855 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_sfcState->Initialize(m_hwInterface, m_osInterface));
856
857 m_sfcState->SetInputColorSpace(MHW_CSpace_sRGB);
858 }
859 return MOS_STATUS_SUCCESS;
860 }
861
SurfaceNeedsExtraCopy()862 MOS_STATUS CodechalEncodeCscDsG12::SurfaceNeedsExtraCopy()
863 {
864 m_needsExtraCopy = true;
865 return MOS_STATUS_SUCCESS;
866 }
867
CheckRawSurfaceAlignment(MOS_SURFACE surface)868 MOS_STATUS CodechalEncodeCscDsG12::CheckRawSurfaceAlignment(MOS_SURFACE surface)
869 {
870 if (m_cscEnableCopy && m_needsExtraCopy)
871 {
872 if (surface.Format == Format_A8R8G8B8) // not touch NV12 logic.
873 {
874 m_colorRawSurface = cscColorARGB;
875 m_cscRequireCopy = 1;
876 }
877 }
878
879 if (m_cscEnableCopy && (surface.dwWidth % m_rawSurfAlignment || surface.dwHeight % m_rawSurfAlignment) &&
880 m_colorRawSurface != cscColorNv12TileY)
881 {
882 m_cscRequireCopy = 1;
883 }
884
885 return MOS_STATUS_SUCCESS;
886 }
887
CodechalEncodeCscDsG12(CodechalEncoderState * encoder)888 CodechalEncodeCscDsG12::CodechalEncodeCscDsG12(CodechalEncoderState* encoder)
889 : CodechalEncodeCscDs(encoder)
890 {
891 m_cscKernelUID = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
892 m_cscCurbeLength = sizeof(CscKernelCurbeData);
893 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
894 m_kernelBase = (uint8_t*)IGCODECKRN_G12;
895 #endif
896 }
897
~CodechalEncodeCscDsG12()898 CodechalEncodeCscDsG12::~CodechalEncodeCscDsG12()
899 {
900 // free the MbStats surface
901 m_osInterface->pfnFreeResource(m_osInterface, &m_resMbStatsBuffer);
902 }
903