1 /*
2 * Copyright (c) 2017-2022, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_encode_csc_ds.cpp
24 //! \brief Defines base class for CSC and Downscaling
25 //!
26
27 #include "codechal_encoder_base.h"
28 #include "codechal_encode_csc_ds.h"
29 #include "hal_oca_interface.h"
30
AllocateSurfaceCsc()31 MOS_STATUS CodechalEncodeCscDs::AllocateSurfaceCsc()
32 {
33 CODECHAL_ENCODE_FUNCTION_ENTER;
34
35 if (!m_cscFlag)
36 {
37 return MOS_STATUS_SUCCESS;
38 }
39
40 return m_encoder->m_trackedBuf->AllocateSurfaceCsc();
41 }
42
AllocateSurfaceCopy(MOS_FORMAT format)43 MOS_STATUS CodechalEncodeCscDs::AllocateSurfaceCopy(MOS_FORMAT format)
44 {
45 CODECHAL_ENCODE_FUNCTION_ENTER;
46
47 if (!m_cscFlag)
48 {
49 return MOS_STATUS_SUCCESS;
50 }
51
52 return m_encoder->m_trackedBuf->AllocateSurfaceCopy(format, m_rawSurfaceToEnc->OsResource.pGmmResInfo->GetSetCpSurfTag(false, 0));
53 }
54
CheckRawColorFormat(MOS_FORMAT format,MOS_TILE_TYPE tileType)55 MOS_STATUS CodechalEncodeCscDs::CheckRawColorFormat(MOS_FORMAT format, MOS_TILE_TYPE tileType)
56 {
57 CODECHAL_ENCODE_FUNCTION_ENTER;
58
59 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
60
61 // check input color format, and set target traverse thread space size
62 switch (format)
63 {
64 case Format_NV12:
65 m_colorRawSurface = cscColorNv12Linear;
66 m_cscRequireColor = 1;
67 m_threadTraverseSizeX = 5; // for NV12, thread space is 32x4
68 break;
69 case Format_YUY2:
70 case Format_YUYV:
71 m_colorRawSurface = cscColorYUY2;
72 m_cscRequireColor = (uint8_t)HCP_CHROMA_FORMAT_YUV420 == m_outputChromaFormat;
73 m_cscRequireConvTo8bPlanar = (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat;
74 m_threadTraverseSizeX = 4; // for YUY2, thread space is 16x4
75 break;
76 case Format_A8R8G8B8:
77 m_colorRawSurface = cscColorARGB;
78 m_cscRequireColor = 1;
79 m_cscUsingSfc = m_cscEnableSfc ? 1 : 0;
80 // Use EU for better performance in big resolution cases or TU1
81 if ((m_cscRawSurfWidth * m_cscRawSurfHeight > 1920 * 1088) || m_16xMeSupported)
82 {
83 m_cscUsingSfc = 0;
84 }
85 m_threadTraverseSizeX = 3; // for ARGB, thread space is 8x4
86 break;
87 case Format_A8B8G8R8:
88 m_colorRawSurface = cscColorABGR;
89 m_cscRequireColor = 1;
90 m_cscUsingSfc = m_cscEnableSfc ? 1 : 0;
91 // Use EU for better performance in big resolution cases or TU1
92 if ((m_cscRawSurfWidth * m_cscRawSurfHeight > 1920 * 1088) || m_16xMeSupported)
93 {
94 m_cscUsingSfc = 0;
95 }
96 m_threadTraverseSizeX = 3; // for ABGR, thread space is 8x4
97 break;
98 case Format_P010:
99 m_colorRawSurface = cscColorP010;
100 m_cscRequireConvTo8bPlanar = 1;
101 break;
102 default:
103 CODECHAL_ENCODE_ASSERTMESSAGE("Input color format = %d not supported!", format);
104 eStatus = MOS_STATUS_INVALID_PARAMETER;
105 break;
106 }
107
108 return eStatus;
109 }
110
InitSfcState()111 MOS_STATUS CodechalEncodeCscDs::InitSfcState()
112 {
113 CODECHAL_ENCODE_FUNCTION_ENTER;
114
115 if (!m_sfcState)
116 {
117 m_sfcState = (CodecHalEncodeSfc*)MOS_New(CodecHalEncodeSfc);
118 CODECHAL_ENCODE_CHK_NULL_RETURN(m_sfcState);
119
120 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_sfcState->Initialize(m_hwInterface, m_osInterface));
121
122 m_sfcState->SetInputColorSpace(MHW_CSpace_sRGB);
123 }
124
125 return MOS_STATUS_SUCCESS;
126 }
127
SetParamsSfc(CODECHAL_ENCODE_SFC_PARAMS * sfcParams)128 MOS_STATUS CodechalEncodeCscDs::SetParamsSfc(CODECHAL_ENCODE_SFC_PARAMS* sfcParams)
129 {
130 CODECHAL_ENCODE_FUNCTION_ENTER;
131
132 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
133
134 CODECHAL_ENCODE_CHK_NULL_RETURN(sfcParams);
135
136 // color space parameters have been set to pSfcState already, no need set here
137 sfcParams->pInputSurface = m_rawSurfaceToEnc;
138 sfcParams->pOutputSurface = m_encoder->m_trackedBuf->GetCscSurface(CODEC_CURR_TRACKED_BUFFER);
139 sfcParams->rcInputSurfaceRegion.X = 0;
140 sfcParams->rcInputSurfaceRegion.Y = 0;
141 sfcParams->rcInputSurfaceRegion.Width = m_cscRawSurfWidth;
142 sfcParams->rcInputSurfaceRegion.Height = m_cscRawSurfHeight;
143
144 sfcParams->rcOutputSurfaceRegion.X = 0;
145 sfcParams->rcOutputSurfaceRegion.Y = 0;
146 sfcParams->rcOutputSurfaceRegion.Width = m_cscRawSurfWidth;
147 sfcParams->rcOutputSurfaceRegion.Height = m_cscRawSurfHeight;
148
149 sfcParams->uiChromaSitingType = MHW_CHROMA_SITING_HORZ_CENTER | MHW_CHROMA_SITING_VERT_CENTER;
150
151 return eStatus;
152 }
153
InitKernelStateCsc()154 MOS_STATUS CodechalEncodeCscDs::InitKernelStateCsc()
155 {
156 CODECHAL_ENCODE_FUNCTION_ENTER;
157
158 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
159
160 auto kernelHeaderTable = (CscKernelHeader*)m_kernelBase;
161 auto currKrnHeader = kernelHeaderTable->header;
162
163 m_cscKernelState->KernelParams.iBTCount = cscNumSurfaces;
164 m_cscKernelState->KernelParams.iThreadCount = m_hwInterface->GetRenderInterface()->GetHwCaps()->dwMaxThreads;
165 m_cscKernelState->KernelParams.iCurbeLength = m_cscCurbeLength;
166 m_cscKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
167 m_cscKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
168 m_cscKernelState->KernelParams.iIdCount = 1;
169 m_cscKernelState->KernelParams.iInlineDataLength = 0;
170 m_cscKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
171 m_cscKernelState->KernelParams.pBinary =
172 m_kernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
173 m_cscKernelState->KernelParams.iSize = m_combinedKernelSize - (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
174
175 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
176 m_cscKernelState->KernelParams.iBTCount,
177 &m_cscKernelState->dwSshSize,
178 &m_cscKernelState->dwBindingTableSize));
179
180 CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
181 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_cscKernelState));
182
183 return eStatus;
184 }
185
SetKernelParamsCsc(KernelParams * params)186 MOS_STATUS CodechalEncodeCscDs::SetKernelParamsCsc(KernelParams* params)
187 {
188 CODECHAL_ENCODE_FUNCTION_ENTER;
189
190 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
191
192 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
193
194 /* calling mode for Ds+Copy kernel and/or 4x DS kernel
195 *
196 * For Progressive:
197 * ------------------------------------------------------------------------------------------------
198 * bScalingEnabled cscReqdForRawSurf bFirstField call Ds+Copy kernel? call 4x DS kernel?
199 * ------------------------------------------------------------------------------------------------
200 * 1 0 1 Yes
201 * 1 1 1 COPY_DS mode
202 * 0 0 1
203 * 0 1 1 COPY_ONLY mode
204 *
205 * For Interlaced:
206 * 1 0 1 Yes
207 * 1 1 1 COPY_ONLY mode Yes, see note 2
208 * 0 0 dont care
209 * 0 1 1 COPY_ONLY mode
210 * 0 1 0 do nothing for 2nd field
211 *
212 * Note 1: bFirstField must be == 1 when (1) bScalingEnabled == 1, or (2) Progressive content
213 * Note 2: so far Ds+Copy kernel does not support Interlaced, so we would have to do a COPY_ONLY, followed by 4x DS
214 * these 2 steps can combine into a single COPY_DS once Interlaced is supported
215 */
216
217 m_lastTaskInPhase = params->bLastTaskInPhaseCSC;
218 m_currRefList->b4xScalingUsed = m_scalingEnabled;
219
220 // setup Curbe
221 m_curbeParams.dwInputPictureWidth = m_cscRawSurfWidth;
222 m_curbeParams.dwInputPictureHeight = m_cscRawSurfHeight;
223 m_curbeParams.bFlatnessCheckEnabled = m_flatnessCheckEnabled;
224 m_curbeParams.bMBVarianceOutputEnabled = m_mbStatsEnabled;
225 m_curbeParams.bMBPixelAverageOutputEnabled = m_mbStatsEnabled;
226 m_curbeParams.bCscOrCopyOnly = !m_scalingEnabled || params->cscOrCopyOnly;
227 m_curbeParams.inputColorSpace = params->inputColorSpace;
228
229 // setup surface states
230 m_surfaceParamsCsc.psInputSurface = m_rawSurfaceToEnc;
231 m_surfaceParamsCsc.psOutputCopiedSurface = m_cscFlag ? m_encoder->m_trackedBuf->GetCscSurface(CODEC_CURR_TRACKED_BUFFER) : nullptr;
232 m_surfaceParamsCsc.psOutput4xDsSurface =
233 !m_curbeParams.bCscOrCopyOnly ? m_encoder->m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER) : nullptr;
234
235 if (m_mbStatsSupported)
236 {
237 m_surfaceParamsCsc.bMBVProcStatsEnabled = true;
238 m_surfaceParamsCsc.presMBVProcStatsBuffer = &m_resMbStatsBuffer;
239 }
240 else
241 {
242 m_surfaceParamsCsc.bFlatnessCheckEnabled = m_flatnessCheckEnabled;
243 m_surfaceParamsCsc.psFlatnessCheckSurface = &m_encoder->m_flatnessCheckSurface;
244 }
245
246 // setup walker param
247 m_walkerResolutionX = MOS_ROUNDUP_SHIFT(m_downscaledWidth4x, m_threadTraverseSizeX);
248 m_walkerResolutionY = MOS_ROUNDUP_SHIFT(m_downscaledHeight4x, m_threadTraverseSizeY);
249
250 return eStatus;
251 }
252
SetCurbeCsc()253 MOS_STATUS CodechalEncodeCscDs::SetCurbeCsc()
254 {
255 CODECHAL_ENCODE_FUNCTION_ENTER;
256
257 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
258
259 CscKernelCurbeData curbe;
260
261 curbe.DW0_InputPictureWidth = m_curbeParams.dwInputPictureWidth;
262 curbe.DW0_InputPictureHeight = m_curbeParams.dwInputPictureHeight;
263
264 curbe.DW1_SrcNV12SurfYIndex = cscSrcYPlane;
265 curbe.DW2_DstYSurfIndex = cscDstDsYPlane;
266 curbe.DW3_FlatDstSurfIndex = cscDstFlatOrMbStats;
267 curbe.DW4_CopyDstNV12SurfIndex = cscDstCopyYPlane;
268
269 if (m_curbeParams.bCscOrCopyOnly)
270 {
271 curbe.DW5_CscDsCopyOpCode = 0; // Copy only
272 }
273 else
274 {
275 // Enable DS kernel (0 disable, 1 enable)
276 curbe.DW5_CscDsCopyOpCode = 1; // 0x01 to 0x7F: DS + Copy
277 }
278
279 if (cscColorNv12TileY == m_colorRawSurface ||
280 cscColorNv12Linear == m_colorRawSurface)
281 {
282 curbe.DW5_InputColorFormat = 0;
283 }
284 else if (cscColorYUY2 == m_colorRawSurface)
285 {
286 curbe.DW5_InputColorFormat = 1;
287 }
288 else if (cscColorARGB == m_colorRawSurface)
289 {
290 curbe.DW5_InputColorFormat = 2;
291 }
292
293 if (m_curbeParams.bFlatnessCheckEnabled ||
294 m_curbeParams.bMBVarianceOutputEnabled ||
295 m_curbeParams.bMBPixelAverageOutputEnabled)
296 {
297 curbe.DW6_FlatnessThreshold = 128;
298 curbe.DW7_EnableMBFlatnessCheck = true;
299 }
300 else
301 {
302 curbe.DW7_EnableMBFlatnessCheck = false;
303 }
304
305 curbe.DW8_SrcNV12SurfUVIndex = cscSrcUVPlane;
306
307 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscKernelState->m_dshRegion.AddData(
308 &curbe,
309 m_cscKernelState->dwCurbeOffset,
310 sizeof(curbe)));
311
312 return eStatus;
313 }
314
SendSurfaceCsc(PMOS_COMMAND_BUFFER cmdBuffer)315 MOS_STATUS CodechalEncodeCscDs::SendSurfaceCsc(PMOS_COMMAND_BUFFER cmdBuffer)
316 {
317 CODECHAL_ENCODE_FUNCTION_ENTER;
318
319 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
320
321 // Source surface/s
322 CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
323 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
324 surfaceParams.bIs2DSurface = true; // linear surface is not 2D -> changed kernel
325 surfaceParams.bUseUVPlane = (cscColorNv12TileY == m_colorRawSurface ||
326 cscColorNv12Linear == m_colorRawSurface);
327 surfaceParams.bMediaBlockRW = true;
328 surfaceParams.psSurface = m_surfaceParamsCsc.psInputSurface;
329 surfaceParams.bUseARGB8Format = true;
330 surfaceParams.dwCacheabilityControl =
331 m_hwInterface->ComposeSurfaceCacheabilityControl(
332 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
333 (codechalL3 | codechalLLC));
334
335 surfaceParams.dwVerticalLineStride = m_verticalLineStride;
336 surfaceParams.dwBindingTableOffset = cscSrcYPlane;
337 surfaceParams.dwUVBindingTableOffset = cscSrcUVPlane;
338 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
339 m_hwInterface,
340 cmdBuffer,
341 &surfaceParams,
342 m_cscKernelState));
343
344 // Destination surface/s - 4x downscaled surface
345 if (m_surfaceParamsCsc.psOutput4xDsSurface)
346 {
347 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
348 surfaceParams.bIs2DSurface =
349 surfaceParams.bIsWritable = true;
350 surfaceParams.psSurface = m_surfaceParamsCsc.psOutput4xDsSurface;
351 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
352 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
353 codechalLLC);
354 surfaceParams.dwVerticalLineStride = m_verticalLineStride;
355 surfaceParams.dwBindingTableOffset = cscDstDsYPlane;
356 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
357 m_hwInterface,
358 cmdBuffer,
359 &surfaceParams,
360 m_cscKernelState));
361 }
362
363 // FlatnessCheck or MbStats surface
364 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
365 if (m_surfaceParamsCsc.bMBVProcStatsEnabled)
366 {
367 surfaceParams.bRawSurface =
368 surfaceParams.bIsWritable = true;
369 surfaceParams.dwSize = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_surfaceParamsCsc.psInputSurface->dwWidth) *
370 CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_surfaceParamsCsc.psInputSurface->dwHeight) * 16 * sizeof(uint32_t);
371 surfaceParams.presBuffer = m_surfaceParamsCsc.presMBVProcStatsBuffer;
372 surfaceParams.dwCacheabilityControl =
373 m_hwInterface->ComposeSurfaceCacheabilityControl(
374 MOS_CODEC_RESOURCE_USAGE_MB_STATS_ENCODE,
375 codechalLLC | codechalL3);
376 surfaceParams.dwBindingTableOffset = cscDstFlatOrMbStats;
377 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
378 m_hwInterface,
379 cmdBuffer,
380 &surfaceParams,
381 m_cscKernelState));
382 }
383 else if (m_surfaceParamsCsc.bFlatnessCheckEnabled)
384 {
385 surfaceParams.bIs2DSurface =
386 surfaceParams.bMediaBlockRW =
387 surfaceParams.bIsWritable = true;
388 surfaceParams.psSurface = m_surfaceParamsCsc.psFlatnessCheckSurface;
389 surfaceParams.dwCacheabilityControl =
390 m_hwInterface->ComposeSurfaceCacheabilityControl(
391 MOS_CODEC_RESOURCE_USAGE_SURFACE_FLATNESS_CHECK_ENCODE,
392 codechalLLC | codechalL3);
393 surfaceParams.dwBindingTableOffset = cscDstFlatOrMbStats;
394 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
395 m_hwInterface,
396 cmdBuffer,
397 &surfaceParams,
398 m_cscKernelState));
399 }
400
401 // copy kernel output luma + chroma
402 if (m_surfaceParamsCsc.psOutputCopiedSurface)
403 {
404 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
405 surfaceParams.bIs2DSurface =
406 surfaceParams.bUseUVPlane =
407 surfaceParams.bMediaBlockRW =
408 surfaceParams.bIsWritable = true;
409 surfaceParams.psSurface = m_surfaceParamsCsc.psOutputCopiedSurface;
410 surfaceParams.dwCacheabilityControl = m_hwInterface->ComposeSurfaceCacheabilityControl(
411 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
412 codechalLLC);
413 surfaceParams.dwBindingTableOffset = cscDstCopyYPlane;
414 surfaceParams.dwUVBindingTableOffset = cscDstCopyUVPlane;
415 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
416 m_hwInterface,
417 cmdBuffer,
418 &surfaceParams,
419 m_cscKernelState));
420 }
421
422 return eStatus;
423 }
424
SetSurfacesToEncPak()425 MOS_STATUS CodechalEncodeCscDs::SetSurfacesToEncPak()
426 {
427 CODECHAL_ENCODE_FUNCTION_ENTER;
428
429 auto cscSurface = m_encoder->m_trackedBuf->GetCscSurface(CODEC_CURR_TRACKED_BUFFER);
430
431 // assign CSC output surface according to different operation
432 if (RenderConsumesCscSurface())
433 {
434 m_rawSurfaceToEnc = cscSurface;
435
436 // update the RawBuffer and RefBuffer (if Raw is used as Ref)
437 m_currRefList->sRefRawBuffer = *cscSurface;
438 if (m_useRawForRef)
439 {
440 m_currRefList->sRefBuffer = *cscSurface;
441 }
442 CODECHAL_ENCODE_NORMALMESSAGE("Set m_rawSurfaceToEnc %d x %d",
443 m_rawSurfaceToEnc->dwWidth, m_rawSurfaceToEnc->dwHeight);
444 }
445
446 if (VdboxConsumesCscSurface())
447 {
448 m_rawSurfaceToPak = cscSurface;
449 CODECHAL_ENCODE_NORMALMESSAGE("Set m_rawSurfaceToPak %d x %d",
450 m_rawSurfaceToPak->dwWidth, m_rawSurfaceToPak->dwHeight);
451 }
452
453 // dump copied surface from Ds+Copy kernel
454 if (m_cscFlag)
455 {
456 CODECHAL_DEBUG_TOOL(
457 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
458 cscSurface,
459 CodechalDbgAttr::attrEncodeRawInputSurface,
460 "Copied_SrcSurf")); // needs to consider YUV420
461 if (m_cscUsingSfc)
462 {
463 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_sfcState->DumpBuffers(m_debugInterface));
464 }
465 )
466 }
467
468 return MOS_STATUS_SUCCESS;
469 }
470
InitKernelStateDS()471 MOS_STATUS CodechalEncodeCscDs::InitKernelStateDS()
472 {
473 CODECHAL_ENCODE_FUNCTION_ENTER;
474
475 uint32_t kernelSize, combinedKernelSize, numKernelsToLoad;
476
477 numKernelsToLoad = m_encoder->m_interlacedFieldDisabled ? 1 : CODEC_NUM_FIELDS_PER_FRAME;
478
479 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
480 m_encoder->m_kernelBase,
481 m_encoder->m_kuid,
482 &m_dsKernelBase,
483 &combinedKernelSize));
484
485 CODECHAL_KERNEL_HEADER currKrnHeader;
486 for (uint32_t krnStateIdx = 0; krnStateIdx < numKernelsToLoad; krnStateIdx++)
487 {
488 kernelSize = combinedKernelSize;
489
490 m_dsKernelState = &m_encoder->m_scaling4xKernelStates[krnStateIdx];
491
492 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->pfnGetKernelHeaderAndSize(
493 m_dsKernelBase,
494 ENC_SCALING4X,
495 krnStateIdx,
496 &currKrnHeader,
497 &kernelSize))
498
499 m_dsKernelState->KernelParams.iBTCount = m_dsBTCount[0];
500 m_dsKernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
501 m_dsKernelState->KernelParams.iCurbeLength = m_dsCurbeLength[0];
502 m_dsKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
503 m_dsKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
504 m_dsKernelState->KernelParams.iIdCount = 1;
505 m_dsKernelState->KernelParams.iInlineDataLength = m_dsInlineDataLength;
506
507 m_dsKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
508 m_dsKernelState->KernelParams.pBinary = m_dsKernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
509 m_dsKernelState->KernelParams.iSize = kernelSize;
510 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
511 m_dsKernelState->KernelParams.iBTCount,
512 &m_dsKernelState->dwSshSize,
513 &m_dsKernelState->dwBindingTableSize));
514
515 CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
516 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_dsKernelState));
517
518 if (m_32xMeSupported)
519 {
520 m_dsKernelState = &m_encoder->m_scaling2xKernelStates[krnStateIdx];
521
522 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->pfnGetKernelHeaderAndSize(
523 m_dsKernelBase,
524 ENC_SCALING2X,
525 krnStateIdx,
526 &currKrnHeader,
527 &kernelSize))
528
529 m_dsKernelState->KernelParams.iBTCount = m_dsBTCount[1];
530 m_dsKernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
531 m_dsKernelState->KernelParams.iCurbeLength = m_dsCurbeLength[1];
532 m_dsKernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
533 m_dsKernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
534
535 m_dsKernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
536 m_dsKernelState->KernelParams.pBinary = m_dsKernelBase + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
537 m_dsKernelState->KernelParams.iSize = kernelSize;
538 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
539 m_dsKernelState->KernelParams.iBTCount,
540 &m_dsKernelState->dwSshSize,
541 &m_dsKernelState->dwBindingTableSize));
542
543 CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
544 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_dsKernelState));
545 }
546
547 if (m_encoder->m_interlacedFieldDisabled)
548 {
549 m_encoder->m_scaling4xKernelStates[1] = m_encoder->m_scaling4xKernelStates[0];
550
551 if (m_32xMeSupported)
552 {
553 m_encoder->m_scaling2xKernelStates[1] = m_encoder->m_scaling2xKernelStates[0];
554 }
555 }
556 }
557
558 return MOS_STATUS_SUCCESS;
559 }
560
SetCurbeDS4x()561 MOS_STATUS CodechalEncodeCscDs::SetCurbeDS4x()
562 {
563 CODECHAL_ENCODE_FUNCTION_ENTER;
564
565 Ds4xKernelCurbeData curbe;
566
567 curbe.DW0_InputPictureWidth = m_curbeParams.dwInputPictureWidth;
568 curbe.DW0_InputPictureHeight = m_curbeParams.dwInputPictureHeight;
569
570 curbe.DW1_InputYBTIFrame = ds4xSrcYPlane;
571 curbe.DW2_OutputYBTIFrame = ds4xDstYPlane;
572
573 if (m_curbeParams.bFieldPicture)
574 {
575 curbe.DW3_InputYBTIBottomField = ds4xSrcYPlaneBtmField;
576 curbe.DW4_OutputYBTIBottomField = ds4xDstYPlaneBtmField;
577 }
578
579 if ((curbe.DW6_EnableMBFlatnessCheck = m_curbeParams.bFlatnessCheckEnabled))
580 {
581 curbe.DW5_FlatnessThreshold = 128;
582 curbe.DW8_FlatnessOutputBTIFrame = ds4xDstFlatness;
583
584 if (m_curbeParams.bFieldPicture)
585 {
586 curbe.DW9_FlatnessOutputBTIBottomField = ds4xDstFlatnessBtmField;
587 }
588 }
589
590 curbe.DW6_EnableMBVarianceOutput = m_curbeParams.bMBVarianceOutputEnabled;
591 curbe.DW6_EnableMBPixelAverageOutput = m_curbeParams.bMBPixelAverageOutputEnabled;
592 curbe.DW6_EnableBlock8x8StatisticsOutput = m_curbeParams.bBlock8x8StatisticsEnabled;
593
594 if (curbe.DW6_EnableMBVarianceOutput || curbe.DW6_EnableMBPixelAverageOutput)
595 {
596 curbe.DW10_MBVProcStatsBTIFrame = ds4xDstMbVProc;
597
598 if (m_curbeParams.bFieldPicture)
599 {
600 curbe.DW11_MBVProcStatsBTIBottomField = ds4xDstMbVProcBtmField;
601 }
602 }
603
604 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_dsKernelState->m_dshRegion.AddData(
605 &curbe,
606 m_dsKernelState->dwCurbeOffset,
607 sizeof(curbe)));
608
609 return MOS_STATUS_SUCCESS;
610 }
611
SetCurbeDS2x()612 MOS_STATUS CodechalEncodeCscDs::SetCurbeDS2x()
613 {
614 CODECHAL_ENCODE_FUNCTION_ENTER;
615
616 Ds2xKernelCurbeData curbe;
617
618 curbe.DW0_InputPictureWidth = m_curbeParams.dwInputPictureWidth;
619 curbe.DW0_InputPictureHeight = m_curbeParams.dwInputPictureHeight;
620
621 curbe.DW8_InputYBTIFrame = ds2xSrcYPlane;
622 curbe.DW9_OutputYBTIFrame = ds2xDstYPlane;
623
624 if (m_curbeParams.bFieldPicture)
625 {
626 curbe.DW10_InputYBTIBottomField = ds2xSrcYPlaneBtmField;
627 curbe.DW11_OutputYBTIBottomField = ds2xDstYPlaneBtmField;
628 }
629
630 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_dsKernelState->m_dshRegion.AddData(
631 &curbe,
632 m_dsKernelState->dwCurbeOffset,
633 sizeof(curbe)));
634
635 return MOS_STATUS_SUCCESS;
636 }
637
SetSurfaceParamsDS(KernelParams * params)638 MOS_STATUS CodechalEncodeCscDs::SetSurfaceParamsDS(KernelParams* params)
639 {
640 CODECHAL_ENCODE_FUNCTION_ENTER;
641
642 uint32_t scaleFactor, downscaledWidthInMb, downscaledHeightInMb;
643 uint32_t inputFrameWidth, inputFrameHeight, outputFrameWidth, outputFrameHeight;
644 uint32_t inputBottomFieldOffset, outputBottomFieldOffset;
645 PMOS_SURFACE inputSurface, outputSurface;
646 bool scaling4xInUse = !(params->b32xScalingInUse || params->b16xScalingInUse);
647 bool fieldPicture = CodecHal_PictureIsField(m_encoder->m_currOriginalPic);
648
649 if (params->b32xScalingInUse)
650 {
651 scaleFactor = SCALE_FACTOR_32x;
652 downscaledWidthInMb = m_downscaledWidth32x / CODECHAL_MACROBLOCK_WIDTH;
653 downscaledHeightInMb = m_downscaledHeight32x / CODECHAL_MACROBLOCK_HEIGHT;
654 if (fieldPicture)
655 {
656 downscaledHeightInMb = (downscaledHeightInMb + 1) >> 1 << 1;
657 }
658
659 inputSurface = m_encoder->m_trackedBuf->Get16xDsSurface(m_encoder->m_currRefList->ucScalingIdx);
660 inputFrameWidth = m_downscaledWidth16x;
661 inputFrameHeight = m_downscaledHeight16x;
662 inputBottomFieldOffset = m_scaled16xBottomFieldOffset;
663
664 outputSurface = m_encoder->m_trackedBuf->Get32xDsSurface(m_encoder->m_currRefList->ucScalingIdx);
665 outputFrameWidth = m_downscaledWidth32x;
666 outputFrameHeight = downscaledHeightInMb * CODECHAL_MACROBLOCK_HEIGHT;
667 outputBottomFieldOffset = m_scaled32xBottomFieldOffset;
668 m_lastTaskInPhase = params->bLastTaskInPhase32xDS;
669 m_currRefList->b32xScalingUsed = true;
670 }
671 else if (params->b16xScalingInUse)
672 {
673 scaleFactor = SCALE_FACTOR_16x;
674 downscaledWidthInMb = m_downscaledWidth16x / CODECHAL_MACROBLOCK_WIDTH;
675 downscaledHeightInMb = m_downscaledHeight16x / CODECHAL_MACROBLOCK_HEIGHT;
676 if (fieldPicture)
677 {
678 downscaledHeightInMb = (downscaledHeightInMb + 1) >> 1 << 1;
679 }
680
681 inputSurface = m_encoder->m_trackedBuf->Get4xDsSurface(m_encoder->m_currRefList->ucScalingIdx);
682 inputFrameWidth = m_downscaledWidth4x;
683 inputFrameHeight = m_downscaledHeight4x;
684 inputBottomFieldOffset = m_scaledBottomFieldOffset;
685
686 outputSurface = m_encoder->m_trackedBuf->Get16xDsSurface(m_encoder->m_currRefList->ucScalingIdx);
687 outputFrameWidth = m_downscaledWidth16x;
688 outputFrameHeight = downscaledHeightInMb * CODECHAL_MACROBLOCK_HEIGHT;
689 outputBottomFieldOffset = m_scaled16xBottomFieldOffset;
690 m_lastTaskInPhase = params->bLastTaskInPhase16xDS;
691 m_currRefList->b16xScalingUsed = true;
692 }
693 else
694 {
695 scaleFactor = SCALE_FACTOR_4x;
696 downscaledWidthInMb = m_downscaledWidth4x / CODECHAL_MACROBLOCK_WIDTH;
697 downscaledHeightInMb = m_downscaledHeight4x / CODECHAL_MACROBLOCK_HEIGHT;
698 if (fieldPicture)
699 {
700 downscaledHeightInMb = (downscaledHeightInMb + 1) >> 1 << 1;
701 }
702
703 inputSurface = (params->bRawInputProvided) ? ¶ms->sInputRawSurface : m_rawSurfaceToEnc;
704 inputFrameWidth = m_encoder->m_oriFrameWidth;
705 inputFrameHeight = m_encoder->m_oriFrameHeight;
706 inputBottomFieldOffset = 0;
707
708 outputSurface = m_encoder->m_trackedBuf->Get4xDsSurface(m_encoder->m_currRefList->ucScalingIdx);
709 outputFrameWidth = m_downscaledWidth4x;
710 outputFrameHeight = downscaledHeightInMb * CODECHAL_MACROBLOCK_HEIGHT;
711 outputBottomFieldOffset = m_scaledBottomFieldOffset;
712 m_lastTaskInPhase = params->bLastTaskInPhase4xDS;
713 m_currRefList->b4xScalingUsed = true;
714 }
715
716 CODEC_PICTURE originalPic = (params->bRawInputProvided) ? params->inputPicture : m_encoder->m_currOriginalPic;
717 FeiPreEncParams *preEncParams = nullptr;
718 if (m_encoder->m_codecFunction == CODECHAL_FUNCTION_FEI_PRE_ENC)
719 {
720 preEncParams = (FeiPreEncParams*)m_encoder->m_encodeParams.pPreEncParams;
721 CODECHAL_ENCODE_CHK_NULL_RETURN(preEncParams);
722 }
723
724 // setup surface states
725 m_surfaceParamsDS.bCurrPicIsFrame = !CodecHal_PictureIsField(originalPic);
726 m_surfaceParamsDS.psInputSurface = inputSurface;
727 m_surfaceParamsDS.dwInputFrameWidth = inputFrameWidth;
728 m_surfaceParamsDS.dwInputFrameHeight = inputFrameHeight;
729 m_surfaceParamsDS.psOutputSurface = outputSurface;
730 m_surfaceParamsDS.dwOutputFrameWidth = outputFrameWidth;
731 m_surfaceParamsDS.dwOutputFrameHeight = outputFrameHeight;
732 m_surfaceParamsDS.dwInputBottomFieldOffset = (uint32_t)inputBottomFieldOffset;
733 m_surfaceParamsDS.dwOutputBottomFieldOffset = (uint32_t)outputBottomFieldOffset;
734 m_surfaceParamsDS.bScalingOutUses16UnormSurfFmt = params->b32xScalingInUse;
735 m_surfaceParamsDS.bScalingOutUses32UnormSurfFmt = !params->b32xScalingInUse;
736
737 if (preEncParams)
738 {
739 m_surfaceParamsDS.bPreEncInUse = true;
740 m_surfaceParamsDS.bEnable8x8Statistics = preEncParams->bEnable8x8Statistics;
741 if (params->bScalingforRef)
742 {
743 m_surfaceParamsDS.bMBVProcStatsEnabled = params->bStatsInputProvided;
744 m_surfaceParamsDS.presMBVProcStatsBuffer = (params->bStatsInputProvided) ? &(params->sInputStatsBuffer) : nullptr;
745 m_surfaceParamsDS.presMBVProcStatsBotFieldBuffer = (params->bStatsInputProvided) ? &(params->sInputStatsBotFieldBuffer) : nullptr;
746 }
747 else
748 {
749 m_surfaceParamsDS.bMBVProcStatsEnabled = !preEncParams->bDisableStatisticsOutput;
750 m_surfaceParamsDS.presMBVProcStatsBuffer = &(preEncParams->resStatsBuffer);
751 m_surfaceParamsDS.presMBVProcStatsBotFieldBuffer = &preEncParams->resStatsBotFieldBuffer;
752 }
753 m_surfaceParamsDS.dwMBVProcStatsBottomFieldOffset = m_mbVProcStatsBottomFieldOffset;
754 }
755 else if (m_mbStatsSupported)
756 {
757 //Currently Only Based on Flatness Check, later on Adaptive Transform Decision too
758 m_surfaceParamsDS.bMBVProcStatsEnabled = scaling4xInUse && (m_flatnessCheckEnabled || m_mbStatsEnabled);
759 m_surfaceParamsDS.presMBVProcStatsBuffer = &m_resMbStatsBuffer;
760 m_surfaceParamsDS.dwMBVProcStatsBottomFieldOffset = m_mbStatsBottomFieldOffset;
761
762 m_surfaceParamsDS.bFlatnessCheckEnabled = false; // Disabling flatness check as its encompassed in Mb stats
763 }
764 else
765 {
766 // Enable flatness check only for 4x scaling.
767 m_surfaceParamsDS.bFlatnessCheckEnabled = scaling4xInUse && m_flatnessCheckEnabled;
768 m_surfaceParamsDS.psFlatnessCheckSurface = &m_encoder->m_flatnessCheckSurface;
769 m_surfaceParamsDS.dwFlatnessCheckBottomFieldOffset = m_flatnessCheckBottomFieldOffset;
770 }
771
772 return MOS_STATUS_SUCCESS;
773 }
774
SendSurfaceDS(PMOS_COMMAND_BUFFER cmdBuffer)775 MOS_STATUS CodechalEncodeCscDs::SendSurfaceDS(PMOS_COMMAND_BUFFER cmdBuffer)
776 {
777 CODECHAL_ENCODE_FUNCTION_ENTER;
778
779 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
780
781 auto currPicIsFrame = m_surfaceParamsDS.bCurrPicIsFrame;
782
783 auto verticalLineStride = m_verticalLineStride;
784 auto verticalLineOffsetTop = CODECHAL_VLINESTRIDEOFFSET_TOP_FIELD;
785 auto verticalLineOffsetBottom = CODECHAL_VLINESTRIDEOFFSET_BOT_FIELD;
786
787 auto originalSurface = *m_surfaceParamsDS.psInputSurface;
788 originalSurface.dwWidth = m_surfaceParamsDS.dwInputFrameWidth;
789 originalSurface.dwHeight = m_surfaceParamsDS.dwInputFrameHeight;
790
791 // Use actual width and height for scaling source, not padded allocated dimensions
792 auto scaledSurface = m_surfaceParamsDS.psOutputSurface;
793 scaledSurface->dwWidth = m_surfaceParamsDS.dwOutputFrameWidth;
794 scaledSurface->dwHeight = m_surfaceParamsDS.dwOutputFrameHeight;
795
796 // Account for field case
797 if (!m_fieldScalingOutputInterleaved)
798 {
799 verticalLineStride = verticalLineOffsetTop = verticalLineOffsetBottom = 0;
800 originalSurface.dwHeight =
801 MOS_ALIGN_CEIL((currPicIsFrame) ? originalSurface.dwHeight : originalSurface.dwHeight / 2, 16);
802 scaledSurface->dwHeight =
803 MOS_ALIGN_CEIL((currPicIsFrame) ? scaledSurface->dwHeight : scaledSurface->dwHeight / 2, 16);
804 }
805 originalSurface.UPlaneOffset.iYOffset = originalSurface.dwHeight;
806
807 // Source surface/s
808 CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
809 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
810 surfaceParams.bIs2DSurface = true;
811 surfaceParams.bMediaBlockRW = true;
812 if (m_surfaceParamsDS.bScalingOutUses16UnormSurfFmt)
813 {
814 // 32x scaling requires R16_UNROM
815 surfaceParams.bUse16UnormSurfaceFormat = true;
816 }
817 else
818 {
819 surfaceParams.bUse32UnormSurfaceFormat = true;
820 }
821 surfaceParams.psSurface = &originalSurface;
822 surfaceParams.dwCacheabilityControl =
823 m_hwInterface->ComposeSurfaceCacheabilityControl(
824 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE,
825 (codechalL3 | codechalLLC));
826 surfaceParams.dwVerticalLineStride = verticalLineStride;
827
828 CODECHAL_ENCODE_CHK_NULL_RETURN(m_encoder->m_mmcState);
829 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->m_mmcState->SetSurfaceParams(&surfaceParams));
830
831 if (currPicIsFrame)
832 {
833 // Frame
834 surfaceParams.dwBindingTableOffset = m_dsBTISrcY;
835 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
836 m_hwInterface,
837 cmdBuffer,
838 &surfaceParams,
839 m_dsKernelState));
840 }
841 else
842 {
843 // Top field
844 surfaceParams.dwVerticalLineStrideOffset = verticalLineOffsetTop;
845 surfaceParams.dwBindingTableOffset = m_dsBTISrcYTopField;
846 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
847 m_hwInterface,
848 cmdBuffer,
849 &surfaceParams,
850 m_dsKernelState));
851
852 // Bot field
853 surfaceParams.dwOffset = m_surfaceParamsDS.dwInputBottomFieldOffset;
854 surfaceParams.dwVerticalLineStrideOffset = verticalLineOffsetBottom;
855 surfaceParams.dwBindingTableOffset = m_dsBTISrcYBtmField;
856 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
857 m_hwInterface,
858 cmdBuffer,
859 &surfaceParams,
860 m_dsKernelState));
861 }
862
863 // Destination surface/s
864 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
865 surfaceParams.bIs2DSurface = true;
866 surfaceParams.bIsWritable = true;
867 surfaceParams.bRenderTarget = true;
868 surfaceParams.psSurface = scaledSurface;
869 if (m_surfaceParamsDS.bScalingOutUses32UnormSurfFmt)
870 {
871 surfaceParams.bMediaBlockRW = true;
872 surfaceParams.bUse32UnormSurfaceFormat = true;
873 }
874 else if (m_surfaceParamsDS.bScalingOutUses16UnormSurfFmt)
875 {
876 surfaceParams.bMediaBlockRW = true;
877 surfaceParams.bUse16UnormSurfaceFormat = true;
878 }
879 surfaceParams.dwCacheabilityControl =
880 m_hwInterface->ComposeSurfaceCacheabilityControl(
881 MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE_DST,
882 codechalLLC);
883
884 surfaceParams.dwVerticalLineStride = verticalLineStride;
885 surfaceParams.bRenderTarget = true;
886 surfaceParams.bIsWritable = true;
887
888 if (currPicIsFrame)
889 {
890 // Frame
891 surfaceParams.dwBindingTableOffset = m_dsBTIDstY;
892 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
893 m_hwInterface,
894 cmdBuffer,
895 &surfaceParams,
896 m_dsKernelState));
897 }
898 else
899 {
900 // Top field
901 surfaceParams.dwVerticalLineStrideOffset = verticalLineOffsetTop;
902 surfaceParams.dwBindingTableOffset = m_dsBTIDstYTopField;
903 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
904 m_hwInterface,
905 cmdBuffer,
906 &surfaceParams,
907 m_dsKernelState));
908
909 // Bot field
910 surfaceParams.dwOffset = m_surfaceParamsDS.dwOutputBottomFieldOffset;
911 surfaceParams.dwVerticalLineStrideOffset = verticalLineOffsetBottom;
912 surfaceParams.dwBindingTableOffset = m_dsBTIDstYBtmField;
913 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
914 m_hwInterface,
915 cmdBuffer,
916 &surfaceParams,
917 m_dsKernelState));
918 }
919
920 if (m_surfaceParamsDS.bFlatnessCheckEnabled)
921 {
922 // flatness check surface
923 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
924 surfaceParams.bIs2DSurface = true;
925 surfaceParams.psSurface = m_surfaceParamsDS.psFlatnessCheckSurface;
926 surfaceParams.dwCacheabilityControl =
927 m_hwInterface->ComposeSurfaceCacheabilityControl(
928 MOS_CODEC_RESOURCE_USAGE_SURFACE_FLATNESS_CHECK_ENCODE,
929 codechalL3 | codechalLLC);
930 surfaceParams.bMediaBlockRW = true;
931 surfaceParams.dwVerticalLineStride = 0;
932 surfaceParams.bRenderTarget = true;
933 surfaceParams.bIsWritable = true;
934
935 if (currPicIsFrame)
936 {
937 // Frame
938 surfaceParams.dwBindingTableOffset = m_dsBTIDstFlatness;
939 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
940 m_hwInterface,
941 cmdBuffer,
942 &surfaceParams,
943 m_dsKernelState));
944 }
945 else
946 {
947 // Top field
948 surfaceParams.bUseHalfHeight = true;
949 surfaceParams.dwVerticalLineStrideOffset = 0;
950 surfaceParams.dwBindingTableOffset = m_dsBTIDstFlatnessTopField;
951 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
952 m_hwInterface,
953 cmdBuffer,
954 &surfaceParams,
955 m_dsKernelState));
956
957 // Bot field
958 surfaceParams.dwOffset = m_surfaceParamsDS.dwFlatnessCheckBottomFieldOffset;
959 surfaceParams.dwVerticalLineStrideOffset = 0;
960 surfaceParams.dwBindingTableOffset = m_dsBTIDstFlatnessBtmField;
961 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
962 m_hwInterface,
963 cmdBuffer,
964 &surfaceParams,
965 m_dsKernelState));
966 }
967 }
968
969 if (m_surfaceParamsDS.bMBVProcStatsEnabled)
970 {
971 uint32_t size;
972 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
973 surfaceParams.presBuffer = m_surfaceParamsDS.presMBVProcStatsBuffer;
974 surfaceParams.dwCacheabilityControl =
975 m_hwInterface->ComposeSurfaceCacheabilityControl(
976 MOS_CODEC_RESOURCE_USAGE_MB_STATS_ENCODE,
977 codechalL3 | codechalLLC);
978 surfaceParams.bRenderTarget = true;
979 surfaceParams.bIsWritable = true;
980 surfaceParams.bRawSurface = true;
981
982 if (currPicIsFrame)
983 {
984 if (m_surfaceParamsDS.bPreEncInUse)
985 {
986 size = ((originalSurface.dwWidth + 15) / 16) * ((originalSurface.dwHeight + 15) / 16) * 16 * sizeof(uint32_t);
987 }
988 else
989 {
990 size = ((originalSurface.dwWidth + 15) / 16) * 16 * sizeof(uint32_t) * ((originalSurface.dwHeight + 15) / 16);
991 }
992 surfaceParams.dwSize = size;
993 surfaceParams.dwBindingTableOffset = m_dsBTIDstMbVProc;
994 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
995 m_hwInterface,
996 cmdBuffer,
997 &surfaceParams,
998 m_dsKernelState));
999 }
1000 else
1001 {
1002 if (m_surfaceParamsDS.bPreEncInUse)
1003 {
1004 size = ((originalSurface.dwWidth + 15) / 16) * ((originalSurface.dwHeight / 2 + 15) / 16) * 16 * sizeof(uint32_t);
1005 }
1006 else
1007 {
1008 size = ((originalSurface.dwWidth + 15) / 16) * 16 * sizeof(uint32_t) * ((originalSurface.dwHeight / 2 + 15) / 16);
1009 }
1010 surfaceParams.dwSize = size;
1011
1012 // Top field
1013 surfaceParams.dwBindingTableOffset = m_dsBTIDstMbVProcTopField;
1014 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1015 m_hwInterface,
1016 cmdBuffer,
1017 &surfaceParams,
1018 m_dsKernelState));
1019
1020 // Bot field
1021 if (m_surfaceParamsDS.bPreEncInUse)
1022 {
1023 surfaceParams.presBuffer = m_surfaceParamsDS.presMBVProcStatsBotFieldBuffer;
1024 }
1025 surfaceParams.dwOffset = m_surfaceParamsDS.dwMBVProcStatsBottomFieldOffset;
1026 surfaceParams.dwBindingTableOffset = m_dsBTIDstMbVProcBtmField;
1027 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1028 m_hwInterface,
1029 cmdBuffer,
1030 &surfaceParams,
1031 m_dsKernelState));
1032 }
1033 }
1034
1035 return eStatus;
1036 }
1037
GetBTCount() const1038 uint8_t CodechalEncodeCscDs::GetBTCount() const
1039 {
1040 return (uint8_t)cscNumSurfaces;
1041 }
1042
GetCscAllocation(uint32_t & width,uint32_t & height,MOS_FORMAT & format)1043 void CodechalEncodeCscDs::GetCscAllocation(uint32_t &width, uint32_t &height, MOS_FORMAT &format)
1044 {
1045 uint32_t surfaceWidth, surfaceHeight;
1046 if (m_mode == CODECHAL_ENCODE_MODE_HEVC)
1047 {
1048 // The raw input surface to HEVC Enc should be 32 aligned because of VME hardware restriction as mentioned in DDI.
1049 surfaceWidth = MOS_ALIGN_CEIL(m_encoder->m_oriFrameWidth, 32);
1050 surfaceHeight = MOS_ALIGN_CEIL(m_encoder->m_oriFrameHeight, 32);
1051 }
1052 else
1053 {
1054 surfaceWidth = MOS_ALIGN_CEIL(m_encoder->m_frameWidth, m_rawSurfAlignment);
1055 surfaceHeight = MOS_ALIGN_CEIL(m_encoder->m_frameHeight, m_rawSurfAlignment);
1056 }
1057
1058 if ( (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat)
1059 {
1060 //P208 is 422 8 bit planar with UV interleaved. It has the same memory layout as YUY2V
1061 format = Format_P208;
1062 width = surfaceWidth;
1063 height = surfaceHeight;
1064 }
1065 else
1066 {
1067 format = Format_NV12;
1068 width = surfaceWidth;
1069 height = surfaceHeight;
1070 }
1071 }
1072
Initialize()1073 MOS_STATUS CodechalEncodeCscDs::Initialize()
1074 {
1075 CODECHAL_ENCODE_FUNCTION_ENTER;
1076
1077 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1078
1079 if (m_cscKernelUID)
1080 {
1081 uint8_t* binary;
1082 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
1083 m_kernelBase,
1084 m_cscKernelUID,
1085 &binary,
1086 &m_combinedKernelSize));
1087
1088 CODECHAL_ENCODE_CHK_NULL_RETURN(m_kernelBase = binary);
1089
1090 m_hwInterface->GetStateHeapSettings()->dwIshSize +=
1091 MOS_ALIGN_CEIL(m_combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
1092 }
1093
1094 return eStatus;
1095 }
1096
CheckCondition()1097 MOS_STATUS CodechalEncodeCscDs::CheckCondition()
1098 {
1099 CODECHAL_ENCODE_FUNCTION_ENTER;
1100
1101 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1102
1103 MOS_SURFACE details;
1104 MOS_ZeroMemory(&details, sizeof(details));
1105 details.Format = Format_Invalid;
1106 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetResourceInfo(m_osInterface, &m_rawSurfaceToEnc->OsResource, &details));
1107
1108 auto cscFlagPrev = m_cscFlag;
1109 m_cscFlag = 0;
1110 // Source surface width/height for CSC must be set using the lowest value between
1111 // SequenceParametersSet width/height and real surface width/height
1112 m_cscRawSurfWidth = MOS_MIN(details.dwWidth, m_encoder->m_frameWidth);
1113 m_cscRawSurfHeight = MOS_MIN(details.dwHeight, m_encoder->m_frameHeight);
1114 m_colorRawSurface = cscColorNv12TileY; // by default assume NV12 Tile-Y format
1115 m_threadTraverseSizeX = 5;
1116 m_threadTraverseSizeY = 2; // for NV12, thread space is 32x4
1117
1118 // check raw surface's color/tile format
1119 if (!m_encoder->CheckSupportedFormat(&details))
1120 {
1121 CODECHAL_ENCODE_CHK_COND_RETURN(!m_cscEnableColor && !m_cscEnableSfc, "Input color format = %d is not supported!", details.Format);
1122 CODECHAL_ENCODE_CHK_STATUS_RETURN(CheckRawColorFormat(details.Format, details.TileType));
1123 }
1124
1125 // check raw surface's alignment
1126 CODECHAL_ENCODE_CHK_STATUS_RETURN(CheckRawSurfaceAlignment(details));
1127
1128 // check raw surface's MMC state
1129 if (m_cscEnableMmc)
1130 {
1131 MOS_MEMCOMP_STATE mmcState;
1132
1133 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetMemoryCompressionMode(
1134 m_osInterface, &m_rawSurfaceToEnc->OsResource, &mmcState));
1135
1136 // Gen9 HEVC only: HCP on SKL does not support MMC surface, invoke Ds+Copy kernel to decompress MMC surface
1137 m_cscRequireMmc = (MOS_MEMCOMP_DISABLED != mmcState);
1138 }
1139
1140 // CSC no longer required, free existing CSC surface
1141 if (cscFlagPrev && !m_cscFlag)
1142 {
1143 m_encoder->m_trackedBuf->ResizeCsc();
1144 }
1145
1146 if (RequireCopyOnly())
1147 {
1148 CODECHAL_ENCODE_NORMALMESSAGE("raw surf = %d x %d, tile = %d, raw color format = %d, cscRequireCopy = %d",
1149 details.dwWidth,
1150 details.dwHeight,
1151 details.TileType,
1152 details.Format,
1153 m_cscRequireCopy);
1154 }
1155 else
1156 {
1157 CODECHAL_ENCODE_NORMALMESSAGE("raw surf = %d x %d, tile = %d, color = %d, cscFlag = %d",
1158 details.dwWidth,
1159 details.dwHeight,
1160 details.TileType,
1161 m_colorRawSurface,
1162 m_cscFlag);
1163 }
1164
1165 return eStatus;
1166 }
1167
CheckRawSurfaceAlignment(MOS_SURFACE surface)1168 MOS_STATUS CodechalEncodeCscDs::CheckRawSurfaceAlignment(MOS_SURFACE surface)
1169 {
1170 if (m_cscEnableCopy && (surface.dwWidth % m_rawSurfAlignment || surface.dwHeight % m_rawSurfAlignment))
1171 {
1172 m_cscRequireCopy = 1;
1173 }
1174 return MOS_STATUS_SUCCESS;
1175 }
1176
CheckReconSurfaceAlignment(PMOS_SURFACE surface)1177 MOS_STATUS CodechalEncodeCscDs::CheckReconSurfaceAlignment(PMOS_SURFACE surface)
1178 {
1179 CODECHAL_ENCODE_FUNCTION_ENTER;
1180
1181 uint8_t alignment;
1182 if (m_standard == CODECHAL_HEVC ||
1183 m_standard == CODECHAL_VP9)
1184 {
1185 alignment = m_hcpReconSurfAlignment;
1186 }
1187 else
1188 {
1189 alignment = m_mfxReconSurfAlignment;
1190 }
1191
1192 MOS_SURFACE resDetails;
1193 MOS_ZeroMemory(&resDetails, sizeof(resDetails));
1194 resDetails.Format = Format_Invalid;
1195 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetResourceInfo(m_osInterface, &surface->OsResource, &resDetails));
1196
1197 if (resDetails.dwHeight % alignment)
1198 {
1199 CODECHAL_ENCODE_ASSERTMESSAGE("Recon surface alignment does not meet HW requirement!");
1200 return MOS_STATUS_INVALID_PARAMETER;
1201 }
1202
1203 return MOS_STATUS_SUCCESS;
1204 }
1205
CheckRawSurfaceAlignment(PMOS_SURFACE surface)1206 MOS_STATUS CodechalEncodeCscDs::CheckRawSurfaceAlignment(PMOS_SURFACE surface)
1207 {
1208 CODECHAL_ENCODE_FUNCTION_ENTER;
1209
1210 MOS_SURFACE resDetails;
1211 MOS_ZeroMemory(&resDetails, sizeof(resDetails));
1212 resDetails.Format = Format_Invalid;
1213 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetResourceInfo(m_osInterface, &surface->OsResource, &resDetails));
1214
1215 if (resDetails.dwHeight % m_rawSurfAlignment)
1216 {
1217 CODECHAL_ENCODE_ASSERTMESSAGE("Raw surface alignment does not meet HW requirement!");
1218 return MOS_STATUS_INVALID_PARAMETER;
1219 }
1220
1221 return MOS_STATUS_SUCCESS;
1222 }
1223
SetHcpReconAlignment(uint8_t alignment)1224 void CodechalEncodeCscDs::SetHcpReconAlignment(uint8_t alignment)
1225 {
1226 m_hcpReconSurfAlignment = alignment;
1227 }
1228
WaitCscSurface(MOS_GPU_CONTEXT gpuContext,bool readOnly)1229 MOS_STATUS CodechalEncodeCscDs::WaitCscSurface(MOS_GPU_CONTEXT gpuContext, bool readOnly)
1230 {
1231 CODECHAL_ENCODE_FUNCTION_ENTER;
1232
1233 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1234
1235 auto syncParams = g_cInitSyncParams;
1236 syncParams.GpuContext = gpuContext;
1237 syncParams.bReadOnly = readOnly;
1238 syncParams.presSyncResource = &m_encoder->m_trackedBuf->GetCscSurface(CODEC_CURR_TRACKED_BUFFER)->OsResource;
1239
1240 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
1241 m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams);
1242
1243 return eStatus;
1244 }
1245
KernelFunctions(KernelParams * params)1246 MOS_STATUS CodechalEncodeCscDs::KernelFunctions(
1247 KernelParams* params)
1248 {
1249 CODECHAL_ENCODE_FUNCTION_ENTER;
1250
1251 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1252
1253 bool useDsConvInCombinedKernel = m_useCommonKernel
1254 && !(CODECHAL_AVC == m_standard || CODECHAL_MPEG2 == m_standard || CODECHAL_VP8 == m_standard);
1255
1256 // call Ds+Copy
1257 if (m_cscFlag || useDsConvInCombinedKernel)
1258 {
1259 CODECHAL_ENCODE_CHK_STATUS_RETURN(CscKernel(params));
1260 }
1261
1262 // call 4x DS
1263 if (m_scalingEnabled && !m_currRefList->b4xScalingUsed)
1264 {
1265 params->b32xScalingInUse = false;
1266 params->b16xScalingInUse = false;
1267 CODECHAL_ENCODE_CHK_STATUS_RETURN(DsKernel(params));
1268 }
1269
1270 // call 16x/32x DS
1271 if (m_scalingEnabled && m_16xMeSupported)
1272 {
1273 //disable csc and reset colorFormat in 16x/32x stage since their inputs are 4x/16x DS results (only Y component)
1274 if(m_cscFlag && m_encoder->m_vdencEnabled && CODECHAL_HEVC == m_standard)
1275 {
1276 m_colorRawSurface = cscColorNv12TileY;
1277 m_cscFlag = false;
1278 }
1279
1280 // 4x downscaled images used as the input for 16x downscaling
1281 if (useDsConvInCombinedKernel)
1282 {
1283 params->stageDsConversion = dsStage16x;
1284 CODECHAL_ENCODE_CHK_STATUS_RETURN(CscKernel(params));
1285 }
1286 else
1287 {
1288 params->b16xScalingInUse = true;
1289 CODECHAL_ENCODE_CHK_STATUS_RETURN(DsKernel(params));
1290 }
1291
1292 if (m_32xMeSupported)
1293 {
1294 // 16x downscaled images used as the input for 32x downscaling
1295 if (useDsConvInCombinedKernel)
1296 {
1297 params->stageDsConversion = dsStage32x;
1298 CODECHAL_ENCODE_CHK_STATUS_RETURN(CscKernel(params));
1299 }
1300 else
1301 {
1302 params->b32xScalingInUse = true;
1303 params->b16xScalingInUse = false;
1304 CODECHAL_ENCODE_CHK_STATUS_RETURN(DsKernel(params));
1305 }
1306 }
1307 }
1308
1309 return MOS_STATUS_SUCCESS;
1310 }
1311
CscUsingSfc(ENCODE_INPUT_COLORSPACE colorSpace)1312 MOS_STATUS CodechalEncodeCscDs::CscUsingSfc(ENCODE_INPUT_COLORSPACE colorSpace)
1313 {
1314 CODECHAL_ENCODE_FUNCTION_ENTER;
1315
1316 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1317
1318 // init SFC state
1319 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSfcState());
1320
1321 // wait for raw surface on VEBox context
1322 auto syncParams = g_cInitSyncParams;
1323 syncParams.GpuContext = MOS_GPU_CONTEXT_VEBOX;
1324 syncParams.presSyncResource = &m_rawSurfaceToEnc->OsResource;
1325 syncParams.bReadOnly = true;
1326 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
1327 m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams);
1328
1329 // allocate CSC surface (existing surfaces will be re-used when associated frame goes out of RefList)
1330 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurfaceCsc());
1331
1332 if (m_encoder->m_trackedBuf->GetWaitCsc())
1333 {
1334 // on-demand sync for CSC surface re-use
1335 CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitCscSurface(MOS_GPU_CONTEXT_VEBOX, false));
1336 }
1337
1338 CODECHAL_ENCODE_SFC_PARAMS sfcParams;
1339 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetParamsSfc(&sfcParams));
1340
1341 // set-up color space
1342 switch (colorSpace)
1343 {
1344 case ECOLORSPACE_P601:
1345 m_sfcState->SetOutputColorSpace(MHW_CSpace_BT601);
1346 break;
1347 case ECOLORSPACE_P709:
1348 m_sfcState->SetOutputColorSpace(MHW_CSpace_BT709);
1349 break;
1350 case ECOLORSPACE_P2020:
1351 m_sfcState->SetOutputColorSpace(MHW_CSpace_BT2020);
1352 break;
1353 default:
1354 CODECHAL_ENCODE_ASSERTMESSAGE("Unknow input color space = %d!", colorSpace);
1355 eStatus = MOS_STATUS_INVALID_PARAMETER;
1356 }
1357
1358 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_sfcState->SetParams(
1359 &sfcParams));
1360
1361 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_sfcState->RenderStart(
1362 m_encoder));
1363
1364 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesToEncPak());
1365
1366 return eStatus;
1367 }
1368
CscKernel(KernelParams * params)1369 MOS_STATUS CodechalEncodeCscDs::CscKernel(
1370 KernelParams* params)
1371 {
1372 CODECHAL_ENCODE_FUNCTION_ENTER;
1373
1374 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1375
1376 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1377
1378 if (!m_cscKernelState)
1379 {
1380 CODECHAL_ENCODE_CHK_NULL_RETURN(m_cscKernelState = MOS_New(MHW_KERNEL_STATE));
1381
1382 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateCsc());
1383 }
1384
1385 // allocate CSC surface (existing surfaces will be re-used when associated frame retires from RefList)
1386 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurfaceCsc());
1387
1388 if (m_scalingEnabled)
1389 {
1390 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->m_trackedBuf->AllocateSurfaceDS());
1391 if (m_standard == CODECHAL_VP9)
1392 {
1393 auto seqParams = (PCODEC_VP9_ENCODE_SEQUENCE_PARAMS)(m_encoder->m_encodeParams.pSeqParams);
1394 CODECHAL_ENCODE_CHK_NULL_RETURN(seqParams);
1395 if (seqParams->SeqFlags.fields.EnableDynamicScaling) {
1396 m_encoder->m_trackedBuf->ResizeSurfaceDS();
1397 }
1398 }
1399 }
1400
1401 if (m_2xScalingEnabled)
1402 {
1403 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->m_trackedBuf->AllocateSurface2xDS());
1404 }
1405
1406 if (m_encoder->m_trackedBuf->GetWaitCsc())
1407 {
1408 // on-demand sync for CSC surface re-use
1409 CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitCscSurface(m_renderContext, false));
1410 }
1411
1412 // setup kernel params
1413 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParamsCsc(params));
1414
1415 if(m_encoder->m_vdencEnabled && CODECHAL_HEVC == m_standard)
1416 {
1417 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1418 m_surfaceParamsCsc.psInputSurface,
1419 CodechalDbgAttr::attrEncodeRawInputSurface,
1420 m_curbeParams.downscaleStage == dsStage4x ? "4xDS_Input" : (m_curbeParams.downscaleStage == dsStage16x ? "16xDS_Input" : "32xDS_Input"))));
1421 }
1422
1423 PerfTagSetting perfTag;
1424 perfTag.Value = 0;
1425 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
1426 perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_DS_CONVERSION_KERNEL;
1427 perfTag.PictureCodingType = m_encoder->m_pictureCodingType;
1428 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
1429 // Each scaling kernel buffer counts as a separate perf task
1430 m_osInterface->pfnResetPerfBufferID(m_osInterface);
1431
1432 // if Single Task Phase is not enabled, use BT count for the kernel state.
1433 if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
1434 {
1435 auto maxBtCount = m_singleTaskPhaseSupported ?
1436 m_maxBtCount : m_cscKernelState->KernelParams.iBTCount;
1437 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->RequestSshSpaceForCmdBuf(maxBtCount));
1438 m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
1439 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->VerifySpaceAvailable());
1440 }
1441
1442 // setup CscDsCopy DSH and Interface Descriptor
1443 auto stateHeapInterface = m_renderInterface->m_stateHeapInterface;
1444 CODECHAL_ENCODE_CHK_NULL_RETURN(stateHeapInterface);
1445 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
1446 stateHeapInterface,
1447 m_cscKernelState,
1448 false,
1449 0,
1450 false,
1451 m_storeData));
1452
1453 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
1454 MOS_ZeroMemory(&idParams, sizeof(idParams));
1455 idParams.pKernelState = m_cscKernelState;
1456 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->SetInterfaceDescriptor(1, &idParams));
1457
1458 // send CURBE
1459 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeCsc());
1460
1461 if(m_encoder->m_vdencEnabled && CODECHAL_HEVC == m_standard)
1462 {
1463 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
1464 m_curbeParams.downscaleStage == dsStage4x ? CODECHAL_MEDIA_STATE_4X_SCALING :
1465 (m_curbeParams.downscaleStage == dsStage16x ? CODECHAL_MEDIA_STATE_16X_SCALING : CODECHAL_MEDIA_STATE_32X_SCALING),
1466 m_cscKernelState)));
1467 }
1468
1469 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_CSC_DS_COPY;
1470 CODECHAL_DEBUG_TOOL(
1471 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
1472 encFunctionType,
1473 m_cscKernelState));
1474
1475 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1476 encFunctionType,
1477 MHW_DSH_TYPE,
1478 m_cscKernelState));
1479 )
1480
1481 MOS_COMMAND_BUFFER cmdBuffer;
1482 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
1483
1484 SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
1485 sendKernelCmdsParams.EncFunctionType = encFunctionType;
1486 sendKernelCmdsParams.pKernelState = m_cscKernelState;
1487 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
1488
1489 // add binding table
1490 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->SetBindingTable(m_cscKernelState));
1491
1492 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendSurfaceCsc(&cmdBuffer));
1493
1494 CODECHAL_DEBUG_TOOL(
1495 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1496 encFunctionType,
1497 MHW_SSH_TYPE,
1498 m_cscKernelState));
1499 )
1500
1501 // If m_pollingSyncEnabled is set, insert HW semaphore to wait for external
1502 // raw surface processing to complete, before start CSC. Once the marker in
1503 // raw surface is overwritten by external operation, HW semaphore will be
1504 // signalled and CSC will start. This is to reduce SW latency between
1505 // external raw surface processing and CSC, in usages like remote gaming.
1506 if (m_pollingSyncEnabled)
1507 {
1508 MHW_MI_SEMAPHORE_WAIT_PARAMS miSemaphoreWaitParams;
1509 MOS_ZeroMemory((&miSemaphoreWaitParams), sizeof(miSemaphoreWaitParams));
1510 miSemaphoreWaitParams.presSemaphoreMem = &m_surfaceParamsCsc.psInputSurface->OsResource;
1511 miSemaphoreWaitParams.dwResourceOffset = m_syncMarkerOffset;
1512 miSemaphoreWaitParams.bPollingWaitMode = true;
1513 miSemaphoreWaitParams.dwSemaphoreData = m_syncMarkerValue;
1514 miSemaphoreWaitParams.CompareOperation = MHW_MI_SAD_NOT_EQUAL_SDD;
1515 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiSemaphoreWaitCmd(&cmdBuffer, &miSemaphoreWaitParams));
1516 }
1517
1518 HalOcaInterface::TraceMessage(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, __FUNCTION__, sizeof(__FUNCTION__));
1519 HalOcaInterface::OnDispatch(cmdBuffer, *m_osInterface, *m_miInterface, *m_renderInterface->GetMmioRegisters());
1520 if (!m_encoder->m_computeContextEnabled)
1521 {
1522 MHW_WALKER_PARAMS walkerParams;
1523 MOS_ZeroMemory(&walkerParams, sizeof(walkerParams));
1524 walkerParams.WalkerMode = m_walkerMode;
1525 walkerParams.UseScoreboard = m_useHwScoreboard;
1526 walkerParams.BlockResolution.x =
1527 walkerParams.GlobalResolution.x =
1528 walkerParams.GlobalOutlerLoopStride.x = m_walkerResolutionX;
1529 walkerParams.BlockResolution.y =
1530 walkerParams.GlobalResolution.y =
1531 walkerParams.GlobalInnerLoopUnit.y = m_walkerResolutionY;
1532
1533 //MAX VALUE
1534 walkerParams.dwLocalLoopExecCount = 0xFFFF;
1535 walkerParams.dwGlobalLoopExecCount = 0xFFFF;
1536
1537 // Raster scan walking pattern
1538 walkerParams.LocalOutLoopStride.y = 1;
1539 walkerParams.LocalInnerLoopUnit.x = 1;
1540 walkerParams.LocalEnd.x = m_walkerResolutionX - 1;
1541
1542 if (m_groupIdSelectSupported)
1543 {
1544 walkerParams.GroupIdLoopSelect = m_groupId;
1545 }
1546
1547 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderInterface->AddMediaObjectWalkerCmd(&cmdBuffer, &walkerParams));
1548 }
1549 else
1550 {
1551 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetWalkerCmd(&cmdBuffer, m_cscKernelState));
1552 }
1553
1554 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->EndStatusReport(&cmdBuffer, encFunctionType));
1555
1556 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->SubmitBlocks(m_cscKernelState));
1557
1558 // If m_pollingSyncEnabled is set, write the marker to source surface for next MI_SEMAPHORE_WAIT to check.
1559 if (m_pollingSyncEnabled)
1560 {
1561 MHW_MI_STORE_DATA_PARAMS storeDataParams;
1562 storeDataParams.pOsResource = &m_surfaceParamsCsc.psInputSurface->OsResource;
1563 storeDataParams.dwResourceOffset = m_syncMarkerOffset;
1564 storeDataParams.dwValue = m_syncMarkerValue;
1565 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
1566 }
1567
1568 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1569 {
1570 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->UpdateGlobalCmdBufId());
1571 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
1572 }
1573
1574 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
1575 &cmdBuffer,
1576 encFunctionType,
1577 nullptr)));
1578
1579 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(
1580 &cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
1581
1582 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
1583
1584 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1585 {
1586 HalOcaInterface::On1stLevelBBEnd(cmdBuffer, *m_osInterface);
1587 m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
1588 m_lastTaskInPhase = false;
1589 }
1590
1591 if (dsDisabled == params->stageDsConversion && !(m_encoder->m_vdencEnabled && CODECHAL_HEVC == m_standard))
1592 {
1593 // send appropriate surface to Enc/Pak depending on different CSC operation type
1594 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesToEncPak());
1595 }
1596
1597 return eStatus;
1598 }
1599
DsKernel(KernelParams * params)1600 MOS_STATUS CodechalEncodeCscDs::DsKernel(
1601 KernelParams* params)
1602 {
1603 CODECHAL_ENCODE_FUNCTION_ENTER;
1604
1605 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1606
1607 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1608
1609 if (!m_firstField)
1610 {
1611 // Both fields are scaled when the first field comes in, no need to scale again
1612 return eStatus;
1613 }
1614
1615 if (!m_dsKernelState)
1616 {
1617 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateDS());
1618 }
1619
1620 if (m_scalingEnabled)
1621 {
1622 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->m_trackedBuf->AllocateSurfaceDS());
1623 if (m_standard == CODECHAL_VP9)
1624 {
1625 auto seqParams = (PCODEC_VP9_ENCODE_SEQUENCE_PARAMS)(m_encoder->m_encodeParams.pSeqParams);
1626 CODECHAL_ENCODE_CHK_NULL_RETURN(seqParams);
1627 if (seqParams->SeqFlags.fields.EnableDynamicScaling) {
1628 m_encoder->m_trackedBuf->ResizeSurfaceDS();
1629 }
1630 }
1631 }
1632
1633 if (m_2xScalingEnabled)
1634 {
1635 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->m_trackedBuf->AllocateSurface2xDS());
1636 }
1637
1638 PerfTagSetting perfTag;
1639 perfTag.Value = 0;
1640 perfTag.Mode = m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
1641 perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL;
1642 perfTag.PictureCodingType = m_encoder->m_pictureCodingType;
1643 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
1644 m_osInterface->pfnIncPerfBufferID(m_osInterface);
1645 // Each scaling kernel buffer counts as a separate perf task
1646 m_osInterface->pfnResetPerfBufferID(m_osInterface);
1647
1648 bool fieldPicture = CodecHal_PictureIsField(m_encoder->m_currOriginalPic);
1649 m_dsKernelState = params->b32xScalingInUse ?
1650 &m_encoder->m_scaling2xKernelStates[fieldPicture] :
1651 &m_encoder->m_scaling4xKernelStates[fieldPicture];
1652
1653 // If Single Task Phase is not enabled, use BT count for the kernel state.
1654 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
1655 {
1656 auto maxBtCount = m_singleTaskPhaseSupported ?
1657 m_maxBtCount : m_dsKernelState->KernelParams.iBTCount;
1658 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->RequestSshSpaceForCmdBuf(maxBtCount));
1659 m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
1660 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->VerifySpaceAvailable());
1661 }
1662
1663 //Setup Scaling DSH
1664 auto stateHeapInterface = m_renderInterface->m_stateHeapInterface;
1665 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
1666 stateHeapInterface,
1667 m_dsKernelState,
1668 false,
1669 0,
1670 false,
1671 m_storeData));
1672
1673 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
1674 MOS_ZeroMemory(&idParams, sizeof(idParams));
1675 idParams.pKernelState = m_dsKernelState;
1676 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->SetInterfaceDescriptor(1, &idParams));
1677
1678 uint32_t downscaledWidthInMb, downscaledHeightInMb;
1679 uint32_t inputFrameWidth, inputFrameHeight;
1680
1681 if (params->b32xScalingInUse)
1682 {
1683 downscaledWidthInMb = m_downscaledWidth32x / CODECHAL_MACROBLOCK_WIDTH;
1684 downscaledHeightInMb = m_downscaledHeight32x / CODECHAL_MACROBLOCK_HEIGHT;
1685 if (fieldPicture)
1686 {
1687 downscaledHeightInMb = (downscaledHeightInMb + 1) >> 1 << 1;
1688 }
1689
1690 inputFrameWidth = m_downscaledWidth16x;
1691 inputFrameHeight = m_downscaledHeight16x;
1692
1693 m_lastTaskInPhase = params->bLastTaskInPhase32xDS;
1694 m_currRefList->b32xScalingUsed = true;
1695 }
1696 else if (params->b16xScalingInUse)
1697 {
1698 downscaledWidthInMb = m_downscaledWidth16x / CODECHAL_MACROBLOCK_WIDTH;
1699 downscaledHeightInMb = m_downscaledHeight16x / CODECHAL_MACROBLOCK_HEIGHT;
1700 if (fieldPicture)
1701 {
1702 downscaledHeightInMb = (downscaledHeightInMb + 1) >> 1 << 1;
1703 }
1704
1705 inputFrameWidth = m_downscaledWidth4x;
1706 inputFrameHeight = m_downscaledHeight4x;
1707
1708 m_lastTaskInPhase = params->bLastTaskInPhase16xDS;
1709 m_currRefList->b16xScalingUsed = true;
1710 }
1711 else
1712 {
1713 downscaledWidthInMb = m_downscaledWidth4x / CODECHAL_MACROBLOCK_WIDTH;
1714 downscaledHeightInMb = m_downscaledHeight4x / CODECHAL_MACROBLOCK_HEIGHT;
1715 if (fieldPicture)
1716 {
1717 downscaledHeightInMb = (downscaledHeightInMb + 1) >> 1 << 1;
1718 }
1719
1720 inputFrameWidth = m_encoder->m_oriFrameWidth;
1721 inputFrameHeight = m_encoder->m_oriFrameHeight;
1722
1723 m_lastTaskInPhase = params->bLastTaskInPhase4xDS;
1724 m_currRefList->b4xScalingUsed = true;
1725 }
1726
1727 CODEC_PICTURE originalPic = (params->bRawInputProvided) ? params->inputPicture : m_encoder->m_currOriginalPic;
1728 FeiPreEncParams *preEncParams = nullptr;
1729 if (m_encoder->m_codecFunction == CODECHAL_FUNCTION_FEI_PRE_ENC)
1730 {
1731 preEncParams = (FeiPreEncParams*)m_encoder->m_encodeParams.pPreEncParams;
1732 CODECHAL_ENCODE_CHK_NULL_RETURN(preEncParams);
1733 }
1734
1735 bool scaling4xInUse = !(params->b32xScalingInUse || params->b16xScalingInUse);
1736 m_curbeParams.pKernelState = m_dsKernelState;
1737 m_curbeParams.dwInputPictureWidth = inputFrameWidth;
1738 m_curbeParams.dwInputPictureHeight = inputFrameHeight;
1739 m_curbeParams.b16xScalingInUse = params->b16xScalingInUse;
1740 m_curbeParams.b32xScalingInUse = params->b32xScalingInUse;
1741 m_curbeParams.bFieldPicture = fieldPicture;
1742 // Enable flatness check only for 4x scaling.
1743 m_curbeParams.bFlatnessCheckEnabled = scaling4xInUse && m_flatnessCheckEnabled;
1744 m_curbeParams.bMBVarianceOutputEnabled = m_curbeParams.bMBPixelAverageOutputEnabled =
1745 preEncParams ? !preEncParams->bDisableStatisticsOutput : scaling4xInUse && m_mbStatsEnabled;
1746 m_curbeParams.bBlock8x8StatisticsEnabled = preEncParams ? preEncParams->bEnable8x8Statistics : false;
1747
1748 if (params->b32xScalingInUse)
1749 {
1750 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeDS2x());
1751 }
1752 else
1753 {
1754 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeDS4x());
1755 }
1756
1757 auto encFunctionType = params->b32xScalingInUse ? CODECHAL_MEDIA_STATE_32X_SCALING :
1758 (params->b16xScalingInUse ? CODECHAL_MEDIA_STATE_16X_SCALING : CODECHAL_MEDIA_STATE_4X_SCALING);
1759 CODECHAL_DEBUG_TOOL(
1760 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1761 encFunctionType,
1762 MHW_DSH_TYPE,
1763 m_dsKernelState));
1764 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
1765 encFunctionType,
1766 m_dsKernelState));
1767 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1768 encFunctionType,
1769 MHW_ISH_TYPE,
1770 m_dsKernelState));
1771 )
1772
1773 MOS_COMMAND_BUFFER cmdBuffer;
1774 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
1775
1776 SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
1777 sendKernelCmdsParams.EncFunctionType = encFunctionType;
1778 sendKernelCmdsParams.pKernelState = m_dsKernelState;
1779 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
1780
1781 // Add binding table
1782 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->SetBindingTable(m_dsKernelState));
1783 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfaceParamsDS(params));
1784 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendSurfaceDS(&cmdBuffer));
1785
1786 // Add dump for scaling surface state heap here
1787 CODECHAL_DEBUG_TOOL(
1788 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1789 encFunctionType,
1790 MHW_SSH_TYPE,
1791 m_dsKernelState));
1792 )
1793
1794 uint32_t resolutionX, resolutionY;
1795 if (params->b32xScalingInUse)
1796 {
1797 resolutionX = downscaledWidthInMb;
1798 resolutionY = downscaledHeightInMb;
1799 }
1800 else
1801 {
1802 resolutionX = downscaledWidthInMb * 2; /* looping for Walker is needed at 8x8 block level */
1803 resolutionY = downscaledHeightInMb * 2;
1804 if (fieldPicture && (m_encoder->m_codecFunction == CODECHAL_FUNCTION_FEI_PRE_ENC))
1805 {
1806 resolutionY = MOS_ALIGN_CEIL(downscaledHeightInMb, 2) * 2;
1807 }
1808 }
1809
1810 MHW_WALKER_PARAMS walkerParams;
1811 MOS_ZeroMemory(&walkerParams, sizeof(MHW_WALKER_PARAMS));
1812 walkerParams.WalkerMode = m_walkerMode;
1813 walkerParams.BlockResolution.x =
1814 walkerParams.GlobalResolution.x =
1815 walkerParams.GlobalOutlerLoopStride.x = resolutionX;
1816 walkerParams.BlockResolution.y =
1817 walkerParams.GlobalResolution.y =
1818 walkerParams.GlobalInnerLoopUnit.y = resolutionY;
1819 walkerParams.dwLocalLoopExecCount = 0xFFFF; //MAX VALUE
1820 walkerParams.dwGlobalLoopExecCount = 0xFFFF; //MAX VALUE
1821
1822 // Raster scan walking pattern
1823 walkerParams.LocalOutLoopStride.y = 1;
1824 walkerParams.LocalInnerLoopUnit.x = 1;
1825 walkerParams.LocalEnd.x = resolutionX - 1;
1826
1827 if (m_groupIdSelectSupported)
1828 {
1829 walkerParams.GroupIdLoopSelect = m_groupId;
1830 }
1831
1832 HalOcaInterface::TraceMessage(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, __FUNCTION__, sizeof(__FUNCTION__));
1833 HalOcaInterface::OnDispatch(cmdBuffer, *m_osInterface, *m_miInterface, *m_renderInterface->GetMmioRegisters());
1834
1835 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderInterface->AddMediaObjectWalkerCmd(&cmdBuffer, &walkerParams));
1836
1837 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->EndStatusReport(&cmdBuffer, encFunctionType));
1838
1839 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->SubmitBlocks(m_dsKernelState));
1840
1841 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1842 {
1843 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->UpdateGlobalCmdBufId());
1844 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
1845 }
1846
1847 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
1848 &cmdBuffer,
1849 encFunctionType,
1850 nullptr)));
1851
1852 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(
1853 &cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
1854
1855 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
1856
1857 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1858 {
1859 HalOcaInterface::On1stLevelBBEnd(cmdBuffer, *m_osInterface);
1860 m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
1861 m_lastTaskInPhase = false;
1862 }
1863
1864 return eStatus;
1865 }
1866
RawSurfaceMediaCopy(MOS_FORMAT srcFormat)1867 MOS_STATUS CodechalEncodeCscDs::RawSurfaceMediaCopy(MOS_FORMAT srcFormat)
1868 {
1869 CODECHAL_ENCODE_FUNCTION_ENTER;
1870
1871 // Call m_hwInterface->CreateMediaCopy directly for legacy code
1872 if (nullptr == m_mediaCopyBaseState)
1873 {
1874 m_mediaCopyBaseState = m_hwInterface->CreateMediaCopy(m_osInterface);
1875 }
1876 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mediaCopyBaseState);
1877
1878 // Call raw surface Copy function
1879 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurfaceCopy(srcFormat));
1880
1881 auto cscSurface = m_encoder->m_trackedBuf->GetCscSurface(CODEC_CURR_TRACKED_BUFFER);
1882
1883 // Copy through VEBOX from Linear/TileY to TileY
1884 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mediaCopyBaseState->SurfaceCopy(
1885 &m_rawSurfaceToEnc->OsResource,
1886 &cscSurface->OsResource,
1887 MCPY_METHOD_BALANCE));
1888
1889 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesToEncPak());
1890
1891 return MOS_STATUS_SUCCESS;
1892 }
1893
SetHevcCscFlagAndRawColor()1894 MOS_STATUS CodechalEncodeCscDs::SetHevcCscFlagAndRawColor()
1895 {
1896 CODECHAL_ENCODE_FUNCTION_ENTER;
1897
1898 if(m_rawSurfaceToEnc->Format != Format_NV12 && CheckRawColorFormat(m_rawSurfaceToEnc->Format, m_rawSurfaceToEnc->TileType) == MOS_STATUS_SUCCESS)
1899 {
1900 m_cscFlag = true;
1901 }
1902
1903 return MOS_STATUS_SUCCESS;
1904 }
1905
CodechalEncodeCscDs(CodechalEncoderState * encoder)1906 CodechalEncodeCscDs::CodechalEncodeCscDs(CodechalEncoderState *encoder)
1907 : m_useRawForRef(encoder->m_useRawForRef),
1908 m_useCommonKernel(encoder->m_useCommonKernel),
1909 m_useHwScoreboard(encoder->m_useHwScoreboard),
1910 m_renderContextUsesNullHw(encoder->m_renderContextUsesNullHw),
1911 m_groupIdSelectSupported(encoder->m_groupIdSelectSupported),
1912 m_16xMeSupported(encoder->m_16xMeSupported),
1913 m_32xMeSupported(encoder->m_32xMeSupported),
1914 m_scalingEnabled(encoder->m_scalingEnabled),
1915 m_2xScalingEnabled(encoder->m_2xScalingEnabled),
1916 m_firstField(encoder->m_firstField),
1917 m_fieldScalingOutputInterleaved(encoder->m_fieldScalingOutputInterleaved),
1918 m_flatnessCheckEnabled(encoder->m_flatnessCheckEnabled),
1919 m_mbStatsEnabled(encoder->m_mbStatsEnabled),
1920 m_mbStatsSupported(encoder->m_mbStatsSupported),
1921 m_singleTaskPhaseSupported(encoder->m_singleTaskPhaseSupported),
1922 m_firstTaskInPhase(encoder->m_firstTaskInPhase),
1923 m_lastTaskInPhase(encoder->m_lastTaskInPhase),
1924 m_pollingSyncEnabled(encoder->m_pollingSyncEnabled),
1925 m_groupId(encoder->m_groupId),
1926 m_outputChromaFormat(encoder->m_outputChromaFormat),
1927 m_standard(encoder->m_standard),
1928 m_mode(encoder->m_mode),
1929 m_downscaledWidth4x(encoder->m_downscaledWidth4x),
1930 m_downscaledHeight4x(encoder->m_downscaledHeight4x),
1931 m_downscaledWidth16x(encoder->m_downscaledWidth16x),
1932 m_downscaledHeight16x(encoder->m_downscaledHeight16x),
1933 m_downscaledWidth32x(encoder->m_downscaledWidth32x),
1934 m_downscaledHeight32x(encoder->m_downscaledHeight32x),
1935 m_scaledBottomFieldOffset(encoder->m_scaledBottomFieldOffset),
1936 m_scaled16xBottomFieldOffset(encoder->m_scaled16xBottomFieldOffset),
1937 m_scaled32xBottomFieldOffset(encoder->m_scaled32xBottomFieldOffset),
1938 m_mbVProcStatsBottomFieldOffset(encoder->m_mbvProcStatsBottomFieldOffset),
1939 m_mbStatsBottomFieldOffset(encoder->m_mbStatsBottomFieldOffset),
1940 m_flatnessCheckBottomFieldOffset(encoder->m_flatnessCheckBottomFieldOffset),
1941 m_verticalLineStride(encoder->m_verticalLineStride),
1942 m_maxBtCount(encoder->m_maxBtCount),
1943 m_vmeStatesSize(encoder->m_vmeStatesSize),
1944 m_storeData(encoder->m_storeData),
1945 m_syncMarkerOffset(encoder->m_syncMarkerOffset),
1946 m_syncMarkerValue(encoder->m_syncMarkerValue),
1947 m_renderContext(encoder->m_renderContext),
1948 m_walkerMode(encoder->m_walkerMode),
1949 m_currRefList(encoder->m_currRefList),
1950 m_resMbStatsBuffer(encoder->m_resMbStatsBuffer),
1951 m_rawSurfaceToEnc(encoder->m_rawSurfaceToEnc),
1952 m_rawSurfaceToPak(encoder->m_rawSurfaceToPak)
1953 {
1954 // Initilize interface pointers
1955 m_encoder = encoder;
1956 m_osInterface = encoder->GetOsInterface();
1957 m_hwInterface = encoder->GetHwInterface();
1958 m_debugInterface = encoder->GetDebugInterface();
1959 m_miInterface = m_hwInterface->GetMiInterface();
1960 m_renderInterface = m_hwInterface->GetRenderInterface();
1961 m_stateHeapInterface = m_renderInterface->m_stateHeapInterface->pStateHeapInterface;
1962
1963 m_cscFlag = m_cscDsConvEnable = 0;
1964
1965 m_dsBTCount[0] = ds4xNumSurfaces;
1966 m_dsBTCount[1] = ds2xNumSurfaces;
1967 m_dsCurbeLength[0] = sizeof(Ds4xKernelCurbeData);
1968 m_dsCurbeLength[1] = sizeof(Ds2xKernelCurbeData);
1969 m_dsInlineDataLength = sizeof(DsKernelInlineData);
1970 }
1971
~CodechalEncodeCscDs()1972 CodechalEncodeCscDs::~CodechalEncodeCscDs()
1973 {
1974 MOS_Delete(m_cscKernelState);
1975 MOS_Delete(m_sfcState);
1976 MOS_Delete(m_mediaCopyBaseState);
1977 }
1978