1 /*
2 * Copyright (c) 2017-2022, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 //!
24 //! \file codechal_vdenc_vp9_g12.cpp
25 //! \brief VP9 VDENC encoder for GEN12.
26 //!
27
28 #include "codechal_vdenc_vp9_g12.h"
29 #include "codechal_kernel_header_g12.h"
30 #include "codechal_kernel_hme_g12.h"
31 #include "codeckrnheader.h"
32 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
33 #include "igcodeckrn_g12.h"
34 #endif
35 #include "mhw_vdbox_hcp_g12_X.h"
36 #include "mhw_vdbox_vdenc_g12_X.h"
37 #include "mhw_vdbox_g12_X.h"
38 #include "mhw_vdbox_vdenc_hwcmd_g12_X.h"
39 #include "mhw_mi_g12_X.h"
40 #include "mhw_render_g12_X.h"
41 #include "codechal_mmc_encode_vp9_g12.h"
42 #include "codechal_hw_g12_X.h"
43
44 #define MAXPATH 512
45
46 const uint32_t CodechalVdencVp9StateG12::meCurbeInit[48] =
47 {
48 0x00000000, 0x00200010, 0x00003939, 0x77a43000, 0x00000000, 0x28300000, 0x00000000, 0x00000000,
49 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
50 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
51 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
52 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
53 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
54 };
55
UserFeatureKeyReport()56 MOS_STATUS CodechalVdencVp9StateG12::UserFeatureKeyReport()
57 {
58 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
59
60 CODECHAL_ENCODE_FUNCTION_ENTER;
61
62 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::UserFeatureKeyReport());
63
64 #if (_DEBUG || _RELEASE_INTERNAL)
65 CodecHalEncodeWriteKey(__MEDIA_USER_FEATURE_VALUE_ENCODE_USED_VDBOX_NUM_ID, m_numPipe, m_osInterface->pOsContext);
66 CodecHalEncodeWriteKey(__MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ENABLE_HW_STITCH, m_enableTileStitchByHW, m_osInterface->pOsContext);
67 CodecHalEncodeWriteKey(__MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_SINGLE_PASS_DYS_ENABLE_ID, m_singlePassDys, m_osInterface->pOsContext);
68 #endif
69
70 return eStatus;
71 }
72
CodechalVdencVp9StateG12(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)73 CodechalVdencVp9StateG12::CodechalVdencVp9StateG12(
74 CodechalHwInterface* hwInterface,
75 CodechalDebugInterface* debugInterface,
76 PCODECHAL_STANDARD_INFO standardInfo)
77 :CodechalVdencVp9State(hwInterface, debugInterface, standardInfo)
78 {
79 m_useCommonKernel = true;
80 m_isTilingSupported = true;
81
82 // We need the DYS kernel inside AllVP9Enc_CNLA0, for SHME we need kernels inside
83 // HME_DS_SCOREBOARD_KERNEL, so we need to allocate enough size in ISH for both.
84
85 uint8_t* binary = nullptr;
86 m_scalabilityState = nullptr;
87 uint32_t combinedKernelSize = 0;
88
89 pfnGetKernelHeaderAndSize = GetCommonKernelHeaderAndSizeG12;
90
91 m_hwInterface->GetStateHeapSettings()->dwNumSyncTags = CODECHAL_ENCODE_VP9_NUM_SYNC_TAGS;
92 m_hwInterface->GetStateHeapSettings()->dwDshSize = CODECHAL_ENCODE_VP9_INIT_DSH_SIZE;
93
94 m_kuid = IDR_CODEC_AllVP9Enc;
95
96 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
97
98 if (m_useCommonKernel)
99 {
100 m_kuidCommon = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
101 eStatus = CodecHalGetKernelBinaryAndSize(
102 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
103 (uint8_t*)IGCODECKRN_G12,
104 #else
105 nullptr,
106 #endif
107 m_kuidCommon,
108 &binary,
109 &combinedKernelSize);
110 CODECHAL_ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
111
112 m_hwInterface->GetStateHeapSettings()->dwIshSize +=
113 MOS_ALIGN_CEIL(combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
114 }
115
116 // Initialize to 0
117 MOS_ZeroMemory(&m_realCmdBuffer, sizeof(m_realCmdBuffer));
118 MOS_ZeroMemory(m_veBatchBuffer, sizeof(m_veBatchBuffer));
119 MOS_ZeroMemory(&m_hcpScalabilitySyncBuffer, sizeof(m_hcpScalabilitySyncBuffer));
120
121 for (auto i = 0; i < m_numUncompressedSurface; i++)
122 {
123 MOS_ZeroMemory(&m_tileRecordBuffer[i].sResource, sizeof(m_tileRecordBuffer[i].sResource));
124 }
125 CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_osInterface);
126 m_osInterface->pfnVirtualEngineSupported(m_osInterface, false, true);
127 Mos_SetVirtualEngineSupported(m_osInterface, true);
128 for (auto i = 0; i < m_numUncompressedSurface; i++)
129 {
130 MOS_ZeroMemory(&m_tileStatsPakIntegrationBuffer[i].sResource, sizeof(m_tileStatsPakIntegrationBuffer[i].sResource));
131 }
132 MOS_ZeroMemory(&m_frameStatsPakIntegrationBuffer.sResource, sizeof(m_frameStatsPakIntegrationBuffer.sResource));
133 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
134 {
135 for (auto j = 0; j < m_brcMaxNumPasses; j++)
136 {
137 MOS_ZeroMemory(&m_hucPakIntDmemBuffer[i][j], sizeof(m_hucPakIntDmemBuffer[i][j]));
138 }
139 }
140 MOS_ZeroMemory(&m_hucPakIntDummyBuffer, sizeof(m_hucPakIntDummyBuffer));
141 MOS_ZeroMemory(&m_hucPakIntBrcDataBuffer, sizeof(m_hucPakIntBrcDataBuffer));
142 for (auto i = 0; i < m_maxNumPipes; i++)
143 {
144 MOS_ZeroMemory(&m_stitchWaitSemaphoreMem[i], sizeof(m_stitchWaitSemaphoreMem[i]));
145 MOS_ZeroMemory(&m_hucDoneSemaphoreMem[i], sizeof(m_hucDoneSemaphoreMem[i]));
146 }
147 MOS_ZeroMemory(&m_pakIntDoneSemaphoreMem, sizeof(m_pakIntDoneSemaphoreMem));
148 }
149
~CodechalVdencVp9StateG12()150 CodechalVdencVp9StateG12::~CodechalVdencVp9StateG12()
151 {
152 CODECHAL_ENCODE_FUNCTION_ENTER;
153
154 if (m_scalabilityState)
155 {
156 MOS_FreeMemAndSetNull(m_scalabilityState);
157 }
158 //Note: virtual engine interface destroy is done in MOS layer
159 return;
160 }
161
162 // This is used only for DynamicScaling
ExecuteDysPictureLevel()163 MOS_STATUS CodechalVdencVp9StateG12::ExecuteDysPictureLevel()
164 {
165 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
166
167 CODECHAL_ENCODE_FUNCTION_ENTER;
168
169 CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum");
170 auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
171 CODECHAL_ENCODE_CHK_NULL_RETURN(mmioRegisters);
172 PerfTagSetting perfTag;
173 perfTag.Value = 0;
174 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
175 perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE;
176 perfTag.PictureCodingType = m_pictureCodingType;
177 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
178
179 // We only need to update Huc PAK insert object and picture state for the first pass
180 if (IsFirstPass())
181 {
182 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructPakInsertObjBatchBuf(&m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx]));
183 CODECHAL_ENCODE_CHK_STATUS_RETURN(PakConstructPicStateBatchBuf(
184 &m_brcBuffers.resPicStateBrcWriteHucReadBuffer));
185
186 }
187
188 MOS_COMMAND_BUFFER cmdBuffer;
189 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
190
191 if (!m_singleTaskPhaseSupported || m_firstTaskInPhase)
192 {
193 bool requestFrameTracking = false;
194 MHW_MI_FORCE_WAKEUP_PARAMS forceWakeupParams;
195 MOS_ZeroMemory(&forceWakeupParams, sizeof(MHW_MI_FORCE_WAKEUP_PARAMS));
196 forceWakeupParams.bMFXPowerWellControl = true;
197 forceWakeupParams.bMFXPowerWellControlMask = true;
198 forceWakeupParams.bHEVCPowerWellControl = true;
199 forceWakeupParams.bHEVCPowerWellControlMask = true;
200 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiForceWakeupCmd(&cmdBuffer, &forceWakeupParams));
201 // Send command buffer header at the beginning (OS dependent)
202 // frame tracking tag is only added in the last command buffer header
203 requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
204 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
205 }
206
207 // Making sure ImgStatusCtrl is zeroed out before first PAK pass. HW supposedly does this before start of each frame. Remove this after confirming.
208 if (m_currPass == 0)
209 {
210 MHW_MI_LOAD_REGISTER_IMM_PARAMS miLoadRegImmParams;
211 MOS_ZeroMemory(&miLoadRegImmParams, sizeof(miLoadRegImmParams));
212 miLoadRegImmParams.dwData = 0;
213 miLoadRegImmParams.dwRegister = mmioRegisters->hcpVp9EncImageStatusCtrlRegOffset;
214 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiLoadRegisterImmCmd(&cmdBuffer, &miLoadRegImmParams));
215 }
216
217 // Read Image status before running PAK, to get correct cumulative delta applied for final pass.
218 if (m_currPass != m_numPasses) // Don't read it for Repak
219 {
220 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadImageStatus(&cmdBuffer));
221 }
222
223 //updating the numberofpakpasses in encode staus buffer. should not update for repak.
224 if (m_currPass < m_numPasses)
225 {
226 uint32_t offset =
227 (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
228 m_encodeStatusBuf.dwNumPassesOffset +
229 sizeof(uint32_t) * 2; // encode status doesn't start until 3rd DW
230
231 MHW_MI_STORE_DATA_PARAMS storeDataParams;
232 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
233 storeDataParams.pOsResource = &m_encodeStatusBuf.resStatusBuffer;
234 storeDataParams.dwResourceOffset = offset;
235 storeDataParams.dwValue = m_currPass + 1;
236 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
237 }
238
239 if (!m_currPass && m_osInterface->bTagResourceSync)
240 {
241 // This is a short term WA to solve the sync tag issue: the sync tag write for PAK is inserted at the end of 2nd pass PAK BB
242 // which may be skipped in multi-pass PAK enabled case. The idea here is to insert the previous frame's tag at the beginning
243 // of the BB and keep the current frame's tag at the end of the BB. There will be a delay for tag update but it should be fine
244 // as long as Dec/VP/Enc won't depend on this PAK so soon.
245 PMOS_RESOURCE globalGpuContextSyncTagBuffer = nullptr;
246 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource(
247 m_osInterface,
248 globalGpuContextSyncTagBuffer));
249 CODECHAL_ENCODE_CHK_NULL_RETURN(globalGpuContextSyncTagBuffer);
250
251 uint32_t value = m_osInterface->pfnGetGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
252 MHW_MI_STORE_DATA_PARAMS params;
253 params.pOsResource = globalGpuContextSyncTagBuffer;
254 params.dwResourceOffset = m_osInterface->pfnGetGpuStatusTagOffset(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
255 params.dwValue = (value > 0) ? (value - 1) : 0;
256 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiStoreDataImmCmd(&cmdBuffer, ¶ms));
257 }
258
259 CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
260
261 //Send VD_CONTROL_STATE Pipe Initialization
262 MHW_MI_VD_CONTROL_STATE_PARAMS vdCtrlParam;
263 MOS_ZeroMemory(&vdCtrlParam, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
264 vdCtrlParam.initialization = true;
265 MhwMiInterfaceG12* miInterfaceG12 = static_cast <MhwMiInterfaceG12 *>(m_miInterface);
266 CODECHAL_ENCODE_CHK_STATUS_RETURN((miInterfaceG12)->AddMiVdControlStateCmd(&cmdBuffer, &vdCtrlParam));
267
268 // set HCP_PIPE_MODE_SELECT values
269 PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams = nullptr;
270 pipeModeSelectParams = m_vdencInterface->CreateMhwVdboxPipeModeSelectParams();
271 CODECHAL_ENCODE_CHK_NULL_RETURN(pipeModeSelectParams);
272
273 SetHcpPipeModeSelectParams(*pipeModeSelectParams);
274
275 pipeModeSelectParams->Mode = m_mode;
276 pipeModeSelectParams->bStreamOutEnabled = false;
277 pipeModeSelectParams->bVdencEnabled = false;
278 pipeModeSelectParams->ChromaType = m_vp9SeqParams->SeqFlags.fields.EncodedFormat;
279 pipeModeSelectParams->bDynamicScalingEnabled = m_dysRefFrameFlags && !m_dysVdencMultiPassEnabled;
280
281 eStatus = m_hcpInterface->AddHcpPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams);
282 m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams);
283 CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
284
285 // set HCP_SURFACE_STATE values
286 MHW_VDBOX_SURFACE_PARAMS surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID + 1];
287 for (uint8_t i = 0; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
288 {
289 MOS_ZeroMemory(&surfaceParams[i], sizeof(surfaceParams[i]));
290 surfaceParams[i].Mode = m_mode;
291 surfaceParams[i].ucSurfaceStateId = i;
292 surfaceParams[i].ChromaType = m_outputChromaFormat;
293
294 switch (m_vp9SeqParams->SeqFlags.fields.EncodedBitDepth)
295 {
296 case VP9_ENCODED_BIT_DEPTH_10: //10 bit encoding
297 {
298 surfaceParams[i].ucBitDepthChromaMinus8 = 2;
299 surfaceParams[i].ucBitDepthLumaMinus8 = 2;
300 break;
301 }
302 default:
303 {
304 surfaceParams[i].ucBitDepthChromaMinus8 = 0;
305 surfaceParams[i].ucBitDepthLumaMinus8 = 0;
306 break;
307 }
308 }
309 }
310
311 // For PAK engine, we do NOT use scaled reference images even if dynamic scaling is enabled
312 PMOS_SURFACE refSurface[3];
313 for (auto i = 0; i < 3; i++)
314 {
315 refSurface[i] = nullptr;
316 }
317
318 if (m_pictureCodingType != I_TYPE)
319 {
320 uint8_t refPicIndex;
321 if (m_refFrameFlags & 0x01)
322 {
323 refPicIndex = m_vp9PicParams->RefFlags.fields.LastRefIdx;
324
325 CODECHAL_ENCODE_ASSERT((refPicIndex < CODEC_VP9_NUM_REF_FRAMES) && (!CodecHal_PictureIsInvalid(m_vp9PicParams->RefFrameList[refPicIndex])));
326 refSurface[0] = &(m_refList[m_vp9PicParams->RefFrameList[refPicIndex].FrameIdx]->sRefBuffer);
327 }
328
329 if (m_refFrameFlags & 0x02)
330 {
331 refPicIndex = m_vp9PicParams->RefFlags.fields.GoldenRefIdx;
332
333 CODECHAL_ENCODE_ASSERT((refPicIndex < CODEC_VP9_NUM_REF_FRAMES) && (!CodecHal_PictureIsInvalid(m_vp9PicParams->RefFrameList[refPicIndex])));
334 refSurface[1] = &(m_refList[m_vp9PicParams->RefFrameList[refPicIndex].FrameIdx]->sRefBuffer);
335 }
336
337 if (m_refFrameFlags & 0x04)
338 {
339 refPicIndex = m_vp9PicParams->RefFlags.fields.AltRefIdx;
340
341 CODECHAL_ENCODE_ASSERT((refPicIndex < CODEC_VP9_NUM_REF_FRAMES) && (!CodecHal_PictureIsInvalid(m_vp9PicParams->RefFrameList[refPicIndex])))
342 refSurface[2] = &(m_refList[m_vp9PicParams->RefFrameList[refPicIndex].FrameIdx]->sRefBuffer);
343 }
344
345 if (!refSurface[0])
346 {
347 refSurface[0] = (refSurface[1]) ? refSurface[1] : refSurface[2];
348 }
349
350 if (!refSurface[1])
351 {
352 refSurface[1] = (refSurface[0]) ? refSurface[0] : refSurface[2];
353 }
354
355 if (!refSurface[2])
356 {
357 refSurface[2] = (refSurface[0]) ? refSurface[0] : refSurface[1];
358 }
359
360 // Program Surface params for Last/Golen/Alt Reference surface
361 surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID].psSurface = refSurface[0];
362 surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID].psSurface = refSurface[1];
363 surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID].psSurface = refSurface[2];
364
365 surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID].dwReconSurfHeight = MOS_ALIGN_CEIL((refSurface[0] ? refSurface[0]->dwHeight : 0), CODEC_VP9_MIN_BLOCK_WIDTH);
366 surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID].dwReconSurfHeight = MOS_ALIGN_CEIL((refSurface[1] ? refSurface[1]->dwHeight : 0), CODEC_VP9_MIN_BLOCK_WIDTH);
367 surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID].dwReconSurfHeight = MOS_ALIGN_CEIL((refSurface[2] ? refSurface[2]->dwHeight : 0), CODEC_VP9_MIN_BLOCK_WIDTH);
368 }
369
370 // recon
371 surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID].psSurface = &m_reconSurface;
372 surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID].dwReconSurfHeight = m_rawSurfaceToPak->dwHeight;
373
374 // raw
375 surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID].psSurface = m_rawSurfaceToPak;
376 surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID].bDisplayFormatSwizzle = m_vp9SeqParams->SeqFlags.fields.DisplayFormatSwizzle;
377 surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID].dwActualWidth = MOS_ALIGN_CEIL(m_oriFrameWidth, CODEC_VP9_MIN_BLOCK_WIDTH);
378 surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID].dwActualHeight = MOS_ALIGN_CEIL(m_oriFrameHeight, CODEC_VP9_MIN_BLOCK_WIDTH);
379
380 // Decoded picture
381 #ifdef _MMC_SUPPORTED
382 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
383 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]));
384 #endif
385 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]));
386
387 // Source input
388 #ifdef _MMC_SUPPORTED
389 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
390 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]));
391 #endif
392 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]));
393
394 if (m_pictureCodingType != I_TYPE)
395 {
396 #ifdef _MMC_SUPPORTED
397 //Get each reference surface state and be recorded by skipMask if current surface state is mmc disabled
398 //In VP9 mode, Bit 8is (here is bit0 in skipMask ) for Previous Reference;
399 //Bit 9is (here is bit1 in skipMask ) for Golden Reference and Bit 10is (here is bit2 in skipMask ) for Alterante Reference;
400 //Bits11-15are unused and should be programmed to 0 (skipped)
401 uint8_t skipMask = 0xf8;
402 for (uint8_t i = CODECHAL_HCP_LAST_SURFACE_ID; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
403 {
404 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
405 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[i]));
406 if (surfaceParams[i].mmcState == MOS_MEMCOMP_DISABLED)
407 {
408 skipMask |= (1 << (i - 2));
409 }
410 }
411 CODECHAL_ENCODE_NORMALMESSAGE("MMC skip mask is %d\n", skipMask);
412 for (uint8_t i = CODECHAL_HCP_LAST_SURFACE_ID; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
413 {
414 //Set each ref surface state as MOS_MEMCOMP_MC to satisfy MmcEnable in AddHcpSurfaceCmd
415 //Because each ref surface state should be programmed as the same
416 //The actual mmc state is recorded by skipMask and set each ref surface too
417 surfaceParams[i].mmcState = MOS_MEMCOMP_MC;
418 surfaceParams[i].mmcSkipMask = skipMask;
419 }
420 #endif
421 for (uint8_t i = CODECHAL_HCP_LAST_SURFACE_ID; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
422 {
423 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[i]));
424 }
425 }
426
427 // set HCP_PIPE_BUF_ADDR_STATE values
428 PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS pipeBufAddrParams = nullptr;
429 pipeBufAddrParams = CreateHcpPipeBufAddrParams(pipeBufAddrParams);
430 if (pipeBufAddrParams)
431 {
432 auto delete_func = [&]()
433 {
434 MOS_Delete(pipeBufAddrParams);
435 pipeBufAddrParams = nullptr;
436 };
437
438 pipeBufAddrParams->Mode = m_mode;
439 pipeBufAddrParams->psPreDeblockSurface = &m_reconSurface;
440 pipeBufAddrParams->psPostDeblockSurface = &m_reconSurface;
441 pipeBufAddrParams->psRawSurface = m_rawSurfaceToPak;
442
443 pipeBufAddrParams->presStreamOutBuffer = nullptr;
444 pipeBufAddrParams->presMfdDeblockingFilterRowStoreScratchBuffer =
445 &m_resDeblockingFilterLineBuffer;
446
447 pipeBufAddrParams->presDeblockingFilterTileRowStoreScratchBuffer =
448 &m_resDeblockingFilterTileLineBuffer;
449
450 pipeBufAddrParams->presDeblockingFilterColumnRowStoreScratchBuffer =
451 &m_resDeblockingFilterTileColumnBuffer;
452
453 pipeBufAddrParams->presMetadataLineBuffer = &m_resMetadataLineBuffer;
454 pipeBufAddrParams->presMetadataTileLineBuffer = &m_resMetadataTileLineBuffer;
455 pipeBufAddrParams->presMetadataTileColumnBuffer = &m_resMetadataTileColumnBuffer;
456 pipeBufAddrParams->presCurMvTempBuffer = m_trackedBuf->GetMvTemporalBuffer(m_currMvTemporalBufferIndex);
457
458 #ifdef _MMC_SUPPORTED
459 CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(m_mmcState, delete_func);
460 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_mmcState->SetPipeBufAddr(pipeBufAddrParams), delete_func);
461 #endif
462
463 //Huc is disabled for ref frame scaling, use input region
464 uint8_t frameCtxIdx = m_vp9PicParams->PicFlags.fields.frame_context_idx;
465 CODECHAL_ENCODE_ASSERT(frameCtxIdx < CODEC_VP9_NUM_CONTEXTS);
466 pipeBufAddrParams->presVp9ProbBuffer = &m_resProbBuffer[frameCtxIdx];
467 pipeBufAddrParams->presVp9SegmentIdBuffer = &m_resSegmentIdBuffer;
468
469 if (m_pictureCodingType != I_TYPE)
470 {
471 for (auto i = 0; i < 3; i++)
472 {
473 CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(refSurface[i], delete_func);
474
475 pipeBufAddrParams->presReferences[i] = &refSurface[i]->OsResource;
476 }
477
478 pipeBufAddrParams->presColMvTempBuffer[0] = m_trackedBuf->GetMvTemporalBuffer(m_currMvTemporalBufferIndex ^ 0x01);
479 }
480 #ifdef _MMC_SUPPORTED
481 CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(m_mmcState, delete_func);
482 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_mmcState->SetPipeBufAddr(pipeBufAddrParams), delete_func);
483 #endif
484 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(&cmdBuffer, pipeBufAddrParams), delete_func);
485
486 MOS_Delete(pipeBufAddrParams);
487 }
488
489 // set HCP_IND_OBJ_BASE_ADDR_STATE values
490 MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
491 MOS_ZeroMemory(&indObjBaseAddrParams, sizeof(indObjBaseAddrParams));
492 indObjBaseAddrParams.Mode = m_mode;
493 indObjBaseAddrParams.presMvObjectBuffer = &m_resMbCodeSurface;
494 indObjBaseAddrParams.dwMvObjectOffset = m_mvOffset;
495 indObjBaseAddrParams.dwMvObjectSize = m_mbCodeSize - m_mvOffset;
496 indObjBaseAddrParams.presPakBaseObjectBuffer = &m_resBitstreamBuffer;
497 indObjBaseAddrParams.dwPakBaseObjectSize = m_bitstreamUpperBound;
498 indObjBaseAddrParams.presProbabilityDeltaBuffer = &m_resProbabilityDeltaBuffer;
499 indObjBaseAddrParams.dwProbabilityDeltaSize = 29 * CODECHAL_CACHELINE_SIZE;
500 indObjBaseAddrParams.presCompressedHeaderBuffer = &m_resCompressedHeaderBuffer;
501 indObjBaseAddrParams.dwCompressedHeaderSize = 32 * CODECHAL_CACHELINE_SIZE;
502 indObjBaseAddrParams.presProbabilityCounterBuffer = &m_resProbabilityCounterBuffer;
503 indObjBaseAddrParams.dwProbabilityCounterSize = 193 * CODECHAL_CACHELINE_SIZE;
504 indObjBaseAddrParams.presTileRecordBuffer = &m_resTileRecordStrmOutBuffer;
505 indObjBaseAddrParams.dwTileRecordSize = m_picSizeInSb * CODECHAL_CACHELINE_SIZE;
506 indObjBaseAddrParams.presCuStatsBuffer = &m_resCuStatsStrmOutBuffer;
507 indObjBaseAddrParams.dwCuStatsSize = MOS_ALIGN_CEIL(m_picSizeInSb * 64 * 8, CODECHAL_CACHELINE_SIZE);
508 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams));
509
510 // Using picstate zero with updated QP and LF deltas by HuC for repak, irrespective of how many Pak passes were run in multi-pass mode.
511 MHW_BATCH_BUFFER secondLevelBatchBuffer;
512 MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(secondLevelBatchBuffer));
513 secondLevelBatchBuffer.dwOffset = (m_numPasses > 0) ? CODECHAL_ENCODE_VP9_PIC_STATE_BUFFER_SIZE_PER_PASS * (m_currPass % m_numPasses) : 0;
514 secondLevelBatchBuffer.bSecondLevel = true;
515 //As Huc is disabled for Ref frame scaling, use the ReadBuffer
516 secondLevelBatchBuffer.OsResource = m_brcBuffers.resPicStateBrcWriteHucReadBuffer;
517 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
518 &cmdBuffer,
519 &secondLevelBatchBuffer));
520
521 // HCP_VP9_SEGMENT_STATE
522 uint8_t segmentCount = (m_vp9PicParams->PicFlags.fields.segmentation_enabled) ? CODEC_VP9_MAX_SEGMENTS : 1;
523
524 MHW_VDBOX_VP9_SEGMENT_STATE segmentState;
525 MOS_ZeroMemory(&segmentState, sizeof(segmentState));
526 segmentState.Mode = m_mode;
527 segmentState.pVp9EncodeSegmentParams = m_vp9SegmentParams;
528 segmentState.ucQPIndexLumaAC = m_vp9PicParams->LumaACQIndex;
529
530 // For BRC with segmentation, seg state commands for PAK are copied from BRC seg state buffer
531 // For CQP or BRC with no segmentation, PAK still needs seg state commands and driver prepares those commands.
532 segmentState.pbSegStateBufferPtr = nullptr; // Set this to nullptr, for commands to be prepared by driver
533 segmentState.pcucLfQpLookup = &LF_VALUE_QP_LOOKUP[0];
534 for (uint8_t i = 0; i < segmentCount; i++)
535 {
536 segmentState.ucCurrentSegmentId = i;
537 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpVp9SegmentStateCmd(&cmdBuffer, nullptr, &segmentState));
538 }
539
540 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
541
542 return eStatus;
543 }
ExecuteDysSliceLevel()544 MOS_STATUS CodechalVdencVp9StateG12::ExecuteDysSliceLevel()
545 {
546 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
547
548 CODECHAL_ENCODE_FUNCTION_ENTER;
549
550 CODECHAL_ENCODE_CHK_NULL_RETURN(m_nalUnitParams);
551
552 MOS_COMMAND_BUFFER cmdBuffer;
553 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
554
555 MHW_BATCH_BUFFER secondLevelBatchBuffer;
556 MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(secondLevelBatchBuffer));
557 secondLevelBatchBuffer.dwOffset = 0;
558 secondLevelBatchBuffer.bSecondLevel = true;
559 if (!m_hucEnabled)
560 {
561 secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx];
562 }
563 else
564 {
565 secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderWriteBuffer;
566 }
567 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
568 &cmdBuffer,
569 &secondLevelBatchBuffer));
570
571 // Setup Tile level PAK commands
572 CODECHAL_ENCODE_CHK_NULL_RETURN(m_tileParams);
573 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9StateG12::SetTileData());
574 CODECHAL_ENCODE_CHK_STATUS_RETURN(static_cast<MhwVdboxHcpInterfaceG12 *>(m_hcpInterface)->AddHcpTileCodingCmd(&cmdBuffer, &m_tileParams[0]));
575
576 //Reset Frame Tracking header for this submission as this is not the last submission
577 bool isFrameTrackingHeaderSet = cmdBuffer.Attributes.bEnableMediaFrameTracking;
578 cmdBuffer.Attributes.bEnableMediaFrameTracking = false;
579
580 MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(MHW_BATCH_BUFFER));
581 secondLevelBatchBuffer.OsResource = m_resMbCodeSurface;
582 secondLevelBatchBuffer.dwOffset = 0;
583 secondLevelBatchBuffer.bSecondLevel = true;
584 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &secondLevelBatchBuffer));
585
586 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
587 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
588 // MFXPipeDone should not be set for tail insertion
589 vdPipelineFlushParams.Flags.bWaitDoneMFX =
590 (m_lastPicInStream || m_lastPicInSeq) ? 0 : 1;
591 vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
592 vdPipelineFlushParams.Flags.bFlushHEVC = 1;
593 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
594
595 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
596
597 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
598 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
599 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
600
601 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
602
603 if (!m_scalableMode)
604 {
605 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
606 }
607
608 if (m_currPass >= (m_numPasses - 1)) // Last pass and the one before last
609 {
610 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
611 }
612
613 std::string currPassName = "PAK_PASS_DYS" + std::to_string((int)m_currPass);
614 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
615 &cmdBuffer,
616 CODECHAL_NUM_MEDIA_STATES,
617 currPassName.data())));
618
619 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
620
621 if (m_waitForEnc &&
622 !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
623 {
624 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
625 syncParams.GpuContext = m_videoContext;
626 syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
627
628 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
629 m_waitForEnc = false;
630 }
631
632 if (m_currPass >= (m_numPasses - 1)) // Last pass and the one before last
633 {
634 bool renderFlags;
635
636 renderFlags = m_videoContextUsesNullHw;
637
638 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderFlags));
639 }
640
641 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
642 cmdBuffer.Attributes.bEnableMediaFrameTracking = isFrameTrackingHeaderSet;
643 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
644
645 CODECHAL_DEBUG_TOOL(
646 if (m_vp9PicParams->PicFlags.fields.segmentation_enabled) {
647 //CodecHal_DbgDumpEncodeVp9SegmentStreamout(m_debugInterface, m_encoder);
648 //m_debugInterface->DumpBuffer(
649 // (PCODECHAL_ENCODE_VP9_STATE)pvStandardState.resVdencSegmentMapStreamOut,
650 // CodechalDbgAttr::attrOutput,
651 // "SegMap_Out",
652 // CODECHAL_CACHELINE_SIZE * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameHeight, 64) * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameWidth, 64),
653 // 0,
654 // CODECHAL_MEDIA_STATE_VP9_PAK_LUMA_RECON);
655 } if (m_mmcState) {
656 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
657 });
658
659 return eStatus;
660 }
661
InitKernelStates()662 MOS_STATUS CodechalVdencVp9StateG12::InitKernelStates()
663 {
664 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
665
666 CODECHAL_ENCODE_FUNCTION_ENTER;
667
668 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
669 m_kernelBase = (uint8_t*)IGCODECKRN_G12;
670 #endif
671
672 // KUID for HME + DS + SW SCOREBOARD Kernel
673 m_kuidCommon = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
674
675 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
676 // DYS
677 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateDys());
678
679 // SHME
680 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMe());
681 #endif
682
683 return eStatus;
684 }
685
GetMaxBtCount()686 uint32_t CodechalVdencVp9StateG12::GetMaxBtCount()
687 {
688 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
689
690 CODECHAL_ENCODE_FUNCTION_ENTER;
691 uint32_t maxBtCount = 0;
692
693 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
694 if (m_hmeSupported)
695 {
696 uint32_t scalingBtCount = 0;
697 uint32_t numKernelsToLoad = m_interlacedFieldDisabled ? 1 : CODEC_NUM_FIELDS_PER_FRAME;
698 uint16_t btIdxAlignment = m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
699 for (uint32_t krnStateIdx = 0; krnStateIdx < numKernelsToLoad; krnStateIdx++)
700 {
701 scalingBtCount += MOS_ALIGN_CEIL(
702 m_scaling4xKernelStates[krnStateIdx].KernelParams.iBTCount,
703 btIdxAlignment);
704 }
705 uint32_t meBtCount = 0;
706 // 4xME + Streamin kernel btcount
707 meBtCount += MOS_ALIGN_CEIL(m_meKernelStates[CODECHAL_ENCODE_ME_IDX_VDENC].KernelParams.iBTCount, btIdxAlignment);
708
709 //16xME streamin kernel count added to ME count and scaling kernel 16x added to scaling count
710 if (m_16xMeSupported)
711 {
712 meBtCount += MOS_ALIGN_CEIL(m_meKernelStates[CODECHAL_ENCODE_ME_IDX_P].KernelParams.iBTCount, btIdxAlignment);
713 for (uint32_t krnStateIdx = 0; krnStateIdx < numKernelsToLoad; krnStateIdx++)
714 {
715 scalingBtCount += MOS_ALIGN_CEIL(
716 m_scaling4xKernelStates[krnStateIdx].KernelParams.iBTCount,
717 btIdxAlignment);
718 }
719 }
720 maxBtCount = scalingBtCount + meBtCount;
721 }
722 #endif
723
724 return maxBtCount;
725 }
726
727 // DYS kernel state init
InitKernelStateDys()728 MOS_STATUS CodechalVdencVp9StateG12::InitKernelStateDys()
729 {
730 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
731
732 CODECHAL_ENCODE_FUNCTION_ENTER;
733
734 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
735 uint32_t combinedKernelSize = 0;
736 uint8_t* binary = nullptr;
737 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
738 (uint8_t*)IGCODECKRN_G12,
739 m_kuidCommon,
740 &binary,
741 &combinedKernelSize));
742
743 uint32_t kernelSize = combinedKernelSize;
744 CODECHAL_KERNEL_HEADER currKrnHeader;
745 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG12(
746 binary,
747 ENC_DYS,
748 0,
749 &currKrnHeader,
750 &kernelSize));
751
752 PMHW_KERNEL_STATE kernelState = &m_dysKernelState;
753 kernelState->KernelParams.iBTCount = MOS_ALIGN_CEIL(m_dysNumSurfaces, m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment());
754 kernelState->KernelParams.iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
755 kernelState->KernelParams.iCurbeLength = MOS_ALIGN_CEIL(m_dysStaticDataSize, m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment());
756 kernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;// just assign this to 16, the block resolution for the kernel is decided when its launched depending on the std.
757 kernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;// just assign this to 16, the block resolution for the kernel is decided when its launched depending on the std.
758 kernelState->KernelParams.iIdCount = 1;
759 kernelState->KernelParams.iSamplerCount = 1;
760 kernelState->KernelParams.iSamplerLength = m_stateHeapInterface->pStateHeapInterface->GetSizeofSamplerStateAvs();
761
762 kernelState->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
763 kernelState->dwSamplerOffset = MOS_ALIGN_CEIL(kernelState->dwCurbeOffset + kernelState->KernelParams.iCurbeLength, MHW_SAMPLER_STATE_AVS_ALIGN_G9);
764 kernelState->KernelParams.pBinary =
765 binary +
766 (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
767 kernelState->KernelParams.iSize = kernelSize;
768 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
769 m_stateHeapInterface,
770 kernelState->KernelParams.iBTCount,
771 &kernelState->dwSshSize,
772 &kernelState->dwBindingTableSize));
773
774 m_dysDshSize = kernelState->dwSamplerOffset +
775 MOS_ALIGN_CEIL(kernelState->KernelParams.iSamplerLength * kernelState->KernelParams.iSamplerCount, MHW_SAMPLER_STATE_AVS_ALIGN);
776
777 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelState));
778 #endif
779
780 return eStatus;
781 }
782
SetupSegmentationStreamIn()783 MOS_STATUS CodechalVdencVp9StateG12::SetupSegmentationStreamIn()
784 {
785 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
786
787 CODECHAL_ENCODE_FUNCTION_ENTER;
788
789 if (!m_segmentMapProvided && !m_hmeEnabled) // If we're not going to use the streamin surface leave now
790 {
791 return eStatus;
792 }
793
794 CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface);
795 MOS_LOCK_PARAMS lockFlagsWriteOnly;
796 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
797 lockFlagsWriteOnly.WriteOnly = 1;
798
799 MOS_LOCK_PARAMS lockFlagsReadOnly;
800 MOS_ZeroMemory(&lockFlagsReadOnly, sizeof(MOS_LOCK_PARAMS));
801 lockFlagsReadOnly.ReadOnly = 1;
802
803 mhw_vdbox_vdenc_g12_X::VDENC_HEVC_VP9_STREAMIN_STATE_CMD *
804 streamIn = (mhw_vdbox_vdenc_g12_X::VDENC_HEVC_VP9_STREAMIN_STATE_CMD *)m_osInterface->pfnLockResource(
805 m_osInterface,
806 &m_resVdencStreamInBuffer[m_currRecycledBufIdx],
807 &lockFlagsWriteOnly);
808 CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
809
810 // align to cache line size is OK since streamin state is padded to cacheline size - HW uses cacheline size to read, not command size
811 uint32_t blockWidth = MOS_ALIGN_CEIL(m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
812 uint32_t blockHeight = MOS_ALIGN_CEIL(m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32;
813 uint32_t streamInSize = blockHeight * blockWidth * CODECHAL_CACHELINE_SIZE;
814 MOS_ZeroMemory(streamIn, streamInSize);
815
816 // If segment map isn't provided then we unlock surface and exit function here.
817 // Reason why check isn't done before function call is to take advantage of the fact that
818 // we need the surface locked here if seg map is provided and we want it 0'd either way.
819 // This saves us from doing 2 locks on this buffer per frame.
820 if (!m_segmentMapProvided)
821 {
822 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
823 m_osInterface,
824 &m_resVdencStreamInBuffer[m_currRecycledBufIdx]));
825 return eStatus;
826 }
827
828 char *data = (char *)m_osInterface->pfnLockResource(
829 m_osInterface,
830 &m_mbSegmentMapSurface.OsResource,
831 &lockFlagsReadOnly);
832 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
833
834 // Rasterization is done within a tile and then for each tile within the frame in raster order.
835 if (m_isTilingSupported)
836 {
837 uint32_t numTileColumns = (1 << m_vp9PicParams->log2_tile_columns);
838 uint32_t numTileRows = (1 << m_vp9PicParams->log2_tile_rows);
839 uint32_t numTiles = numTileColumns * numTileRows;
840 uint32_t currTileStartX64Aligned = 0, dwCurrTileStartY64Aligned = 0; //Set tile Y coordinate 0
841 m_32BlocksRasterized = 0; //Count of rasterized blocks for this frame
842 uint32_t tileX = 0;
843 uint32_t tileY = 0;
844 for (uint32_t tileIdx = 0; tileIdx < numTiles; tileIdx++)
845 {
846 tileX = tileIdx % numTileColumns; //Current tile column position
847 tileY = tileIdx / numTileColumns; //Current tile row position
848
849 currTileStartX64Aligned = ((tileX * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns) * CODEC_VP9_SUPER_BLOCK_WIDTH;
850 dwCurrTileStartY64Aligned = ((tileY * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows) * CODEC_VP9_SUPER_BLOCK_HEIGHT;
851
852 uint32_t tileWidth64Aligned = (((tileX == (numTileColumns - 1)) ? m_picWidthInSb : (((tileX + 1) * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns)) *
853 CODEC_VP9_SUPER_BLOCK_WIDTH) -
854 currTileStartX64Aligned;
855
856 uint32_t tileHeight64Aligned = (((tileY == (numTileRows - 1)) ? m_picHeightInSb : (((tileY + 1) * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows)) *
857 CODEC_VP9_SUPER_BLOCK_HEIGHT) -
858 dwCurrTileStartY64Aligned;
859
860 // last tile col raw width and raw height not necessarily 64 aligned, use this length to duplicate values from segmap for empty padding blocks in last tiles.
861 uint32_t lastTileColWidth = (tileX == (numTileColumns - 1)) ? (m_frameWidth - currTileStartX64Aligned) : tileWidth64Aligned;
862 uint32_t lastTileRowHeight = (tileY == (numTileRows - 1)) ? (m_frameHeight - dwCurrTileStartY64Aligned) : tileHeight64Aligned;
863
864 uint32_t tileWidth = (tileX == (numTileColumns - 1)) ? lastTileColWidth : tileWidth64Aligned;
865 uint32_t tileHeight = (tileY == (numTileRows - 1)) ? lastTileRowHeight : tileHeight64Aligned;
866
867 // Recreate the mapbuffer and remap it if, for this frame, tile height and width have changed from previous tile
868 // which was processed from this frame or previous,
869 // or if map buffer is created for previous frame and tile map has changed from previous frame (numtilerows and cols)
870 if (!m_mapBuffer ||
871 tileHeight != m_segStreamInHeight ||
872 tileWidth != m_segStreamInWidth ||
873 numTileColumns != m_tileParams[tileIdx].NumOfTileColumnsInFrame ||
874 m_tileParams[tileIdx].NumOfTilesInFrame != numTiles)
875 {
876 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitZigZagToRasterLUTPerTile(tileHeight,
877 tileWidth,
878 dwCurrTileStartY64Aligned,
879 currTileStartX64Aligned));
880 }
881 m_tileParams[tileIdx].NumOfTileColumnsInFrame = numTileColumns;
882 m_tileParams[tileIdx].NumOfTilesInFrame = numTiles;
883 }
884 }
885
886 uint32_t dwPitch = m_mbSegmentMapSurface.dwPitch;
887 if (m_osInterface->pfnGetResType(&m_mbSegmentMapSurface.OsResource) == MOS_GFXRES_BUFFER)
888 {
889 //application can send 1D or 2D buffer, based on that change the pitch to correctly access the map buffer
890 //driver reads the seg ids from the buffer for each 16x16 block. Reads 4 values for each 32x32 block
891 dwPitch = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH) / CODECHAL_MACROBLOCK_WIDTH;
892 }
893 // set seg ID's of streamin states
894 for (uint32_t i = 0; i < blockHeight * blockWidth; ++i)
895 {
896 uint32_t addrOffset = CalculateBufferOffset(
897 m_mapBuffer[i],
898 m_frameWidth,
899 m_vp9PicParams->PicFlags.fields.seg_id_block_size,
900 dwPitch);
901 uint32_t segId = *(data + addrOffset);
902 streamIn[i].DW7.SegidEnable = 1;
903 streamIn[i].DW7.Segid32X32016X1603Vp9Only = segId | (segId << 4) | (segId << 8) | (segId << 12);
904
905 // TU functions copied from there.
906 streamIn[i].DW0.Maxtusize = 3;
907 streamIn[i].DW0.Maxcusize = 3;
908
909 // For InterFrames we change the CUsize to 32x32 if we have sub 32 blocks with different segids in superblock
910 if ((i % 4) == 3 && m_pictureCodingType == P_TYPE)
911 {
912 if (!(streamIn[i - 3].DW7.Segid32X32016X1603Vp9Only == streamIn[i - 2].DW7.Segid32X32016X1603Vp9Only &&
913 streamIn[i - 2].DW7.Segid32X32016X1603Vp9Only == streamIn[i - 1].DW7.Segid32X32016X1603Vp9Only &&
914 streamIn[i - 1].DW7.Segid32X32016X1603Vp9Only == streamIn[i].DW7.Segid32X32016X1603Vp9Only))
915 {
916 streamIn[i - 3].DW0.Maxcusize = streamIn[i - 2].DW0.Maxcusize = streamIn[i - 1].DW0.Maxcusize = streamIn[i].DW0.Maxcusize = 2;
917 }
918 }
919
920 streamIn[i].DW0.Numimepredictors = CODECHAL_VDENC_NUMIMEPREDICTORS;
921
922 switch (m_vp9SeqParams->TargetUsage)
923 {
924 case 1: // Quality mode
925 case 2:
926 case 4: // Normal mode
927 streamIn[i].DW6.Nummergecandidatecu8X8 = 1;
928 streamIn[i].DW6.Nummergecandidatecu16X16 = 2;
929 streamIn[i].DW6.Nummergecandidatecu32X32 = 3;
930 streamIn[i].DW6.Nummergecandidatecu64X64 = 4;
931 break;
932 case 7: // Speed mode
933 streamIn[i].DW0.Numimepredictors = CODECHAL_VDENC_NUMIMEPREDICTORS_SPEED;
934 streamIn[i].DW6.Nummergecandidatecu8X8 = 0;
935 streamIn[i].DW6.Nummergecandidatecu16X16 = 2;
936 streamIn[i].DW6.Nummergecandidatecu32X32 = 2;
937 streamIn[i].DW6.Nummergecandidatecu64X64 = 2;
938 break;
939 default:
940 MHW_ASSERTMESSAGE("Invalid TU provided!");
941 return MOS_STATUS_INVALID_PARAMETER;
942 }
943 }
944
945 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
946 m_osInterface,
947 &m_mbSegmentMapSurface.OsResource));
948
949 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
950 m_osInterface,
951 &m_resVdencStreamInBuffer[m_currRecycledBufIdx]));
952
953 return eStatus;
954 }
955
GetSystemPipeNumberCommon()956 MOS_STATUS CodechalVdencVp9StateG12::GetSystemPipeNumberCommon()
957 {
958 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
959 MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
960
961 CODECHAL_ENCODE_FUNCTION_ENTER;
962
963 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
964 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
965 statusKey = MOS_UserFeature_ReadValue_ID(
966 NULL,
967 __MEDIA_USER_FEATURE_VALUE_ENCODE_DISABLE_SCALABILITY,
968 &userFeatureData,
969 m_osInterface->pOsContext);
970
971 bool disableScalability = m_hwInterface->IsDisableScalability();
972 if (statusKey == MOS_STATUS_SUCCESS)
973 {
974 disableScalability = userFeatureData.i32Data ? true : false;
975 }
976
977 MEDIA_SYSTEM_INFO *gtSystemInfo = m_gtSystemInfo;
978
979 if (gtSystemInfo && disableScalability == false)
980 {
981 // Both VE mode and media solo mode should be able to get the VDBOX number via the same interface
982 m_numVdbox = (uint8_t)(gtSystemInfo->VDBoxInfo.NumberOfVDBoxEnabled);
983 }
984 else
985 {
986 m_numVdbox = 1;
987 }
988
989 return eStatus;
990 }
991
InitKernelStateMe()992 MOS_STATUS CodechalVdencVp9StateG12::InitKernelStateMe()
993 {
994 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
995
996 CODECHAL_ENCODE_FUNCTION_ENTER;
997
998 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
999 CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderEngineInterface->GetHwCaps());
1000
1001 uint32_t combinedKernelSize = 0;
1002 uint8_t* binary = nullptr;
1003 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
1004 m_kernelBase,
1005 m_kuidCommon,
1006 &binary,
1007 &combinedKernelSize));
1008
1009 for (uint32_t krnStateIdx = 0; krnStateIdx < 2; krnStateIdx++)
1010 {
1011 CODECHAL_KERNEL_HEADER currKrnHeader;
1012 PMHW_KERNEL_STATE kernelStatePtr = &m_meKernelStates[krnStateIdx];
1013 uint32_t kernelSize = combinedKernelSize;
1014 EncOperation encOperation = (krnStateIdx > 0 && m_vdencEnabled) ?
1015 (m_useNonLegacyStreamin ? VDENC_STREAMIN_HEVC : VDENC_ME) : ENC_ME;
1016 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG12(
1017 binary,
1018 encOperation,
1019 (encOperation != ENC_ME) ? 0 : krnStateIdx,
1020 &currKrnHeader,
1021 &kernelSize));
1022
1023 kernelStatePtr->KernelParams.iBTCount = CODECHAL_ENCODE_ME_NUM_SURFACES_G12;
1024 kernelStatePtr->KernelParams.iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
1025 kernelStatePtr->KernelParams.iCurbeLength = sizeof(MeCurbe);
1026 kernelStatePtr->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
1027 kernelStatePtr->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
1028 kernelStatePtr->KernelParams.iIdCount = 1;
1029
1030 kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
1031 kernelStatePtr->KernelParams.pBinary = binary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1032 kernelStatePtr->KernelParams.iSize = kernelSize;
1033 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
1034 m_stateHeapInterface,
1035 kernelStatePtr->KernelParams.iBTCount,
1036 &kernelStatePtr->dwSshSize,
1037 &kernelStatePtr->dwBindingTableSize));
1038
1039 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
1040 if (m_noMeKernelForPFrame)
1041 {
1042 m_meKernelStates[1] = m_meKernelStates[0];
1043 break;
1044 }
1045 }
1046
1047 // Until a better way can be found, maintain old binding table structures
1048 MeKernelBindingTable* bindingTable = &m_meBindingTable;
1049 bindingTable->dwMEMVDataSurface = CODECHAL_ENCODE_ME_MV_DATA_SURFACE_G12;
1050 bindingTable->dw16xMEMVDataSurface = CODECHAL_ENCODE_16xME_MV_DATA_SURFACE_G12;
1051 bindingTable->dw32xMEMVDataSurface = CODECHAL_ENCODE_32xME_MV_DATA_SURFACE_G12;
1052 bindingTable->dwMEDist = CODECHAL_ENCODE_ME_DISTORTION_SURFACE_G12;
1053 bindingTable->dwMEBRCDist = CODECHAL_ENCODE_ME_BRC_DISTORTION_G12;
1054 bindingTable->dwMECurrForFwdRef = CODECHAL_ENCODE_ME_CURR_FOR_FWD_REF_G12;
1055 bindingTable->dwMEFwdRefPicIdx[0] = CODECHAL_ENCODE_ME_FWD_REF_IDX0_G12;
1056 bindingTable->dwMEFwdRefPicIdx[1] = CODECHAL_ENCODE_ME_FWD_REF_IDX1_G12;
1057 bindingTable->dwMEFwdRefPicIdx[2] = CODECHAL_ENCODE_ME_FWD_REF_IDX2_G12;
1058 bindingTable->dwMEFwdRefPicIdx[3] = CODECHAL_ENCODE_ME_FWD_REF_IDX3_G12;
1059 bindingTable->dwMEFwdRefPicIdx[4] = CODECHAL_ENCODE_ME_FWD_REF_IDX4_G12;
1060 bindingTable->dwMEFwdRefPicIdx[5] = CODECHAL_ENCODE_ME_FWD_REF_IDX5_G12;
1061 bindingTable->dwMEFwdRefPicIdx[6] = CODECHAL_ENCODE_ME_FWD_REF_IDX6_G12;
1062 bindingTable->dwMEFwdRefPicIdx[7] = CODECHAL_ENCODE_ME_FWD_REF_IDX7_G12;
1063 bindingTable->dwMECurrForBwdRef = CODECHAL_ENCODE_ME_CURR_FOR_BWD_REF_G12;
1064 bindingTable->dwMEBwdRefPicIdx[0] = CODECHAL_ENCODE_ME_BWD_REF_IDX0_G12;
1065 bindingTable->dwMEBwdRefPicIdx[1] = CODECHAL_ENCODE_ME_BWD_REF_IDX1_G12;
1066 bindingTable->dwVdencStreamInSurface = CODECHAL_ENCODE_ME_VDENC_STREAMIN_OUTPUT_G12;
1067 bindingTable->dwVdencStreamInInputSurface = CODECHAL_ENCODE_ME_VDENC_STREAMIN_INPUT_G12;
1068 #endif
1069
1070 return eStatus;
1071 }
1072
SetCurbeMe(MeCurbeParams * params)1073 MOS_STATUS CodechalVdencVp9StateG12::SetCurbeMe(
1074 MeCurbeParams* params)
1075 {
1076 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1077
1078 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1079 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState);
1080
1081 CODECHAL_ENCODE_ASSERT(params->TargetUsage <= NUM_TARGET_USAGE_MODES);
1082
1083 uint32_t scaleFactor = 0;
1084 bool useMvFromPrevStep = false, writeDistortions = false;
1085 uint8_t mvShiftFactor = 0, prevMvReadPosFactor = 0;
1086 bool framePicture = CodecHal_PictureIsFrame(params->CurrOriginalPic);
1087 char qpPrimeY = (params->pic_init_qp_minus26 + 26) + params->slice_qp_delta;
1088
1089 switch (params->hmeLvl)
1090 {
1091 case HME_LEVEL_32x:
1092 useMvFromPrevStep = m_hmeFirstStep;
1093 writeDistortions = false;
1094 scaleFactor = SCALE_FACTOR_32x;
1095 mvShiftFactor = m_mvShiftFactor32x;
1096 break;
1097 case HME_LEVEL_16x:
1098 useMvFromPrevStep = (params->b32xMeEnabled) ? m_hmeFollowingStep : m_hmeFirstStep;
1099 writeDistortions = false;
1100 scaleFactor = SCALE_FACTOR_16x;
1101 mvShiftFactor = m_mvShiftFactor16x;
1102 prevMvReadPosFactor = m_prevMvReadPosition16x;
1103 break;
1104 case HME_LEVEL_4x:
1105 useMvFromPrevStep = (params->b16xMeEnabled) ? m_hmeFollowingStep : m_hmeFirstStep;
1106 writeDistortions = true;
1107 scaleFactor = SCALE_FACTOR_4x;
1108 mvShiftFactor = m_mvShiftFactor4x;
1109 prevMvReadPosFactor = m_prevMvReadPosition4x;
1110 break;
1111 default:
1112 return MOS_STATUS_INVALID_PARAMETER;
1113 }
1114
1115 MeCurbe cmd;
1116 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
1117 &cmd,
1118 sizeof(MeCurbe),
1119 meCurbeInit,
1120 sizeof(MeCurbe)));
1121
1122 cmd.DW3.SubPelMode = 3;
1123 if (m_fieldScalingOutputInterleaved)
1124 {
1125 cmd.DW3.SrcAccess =
1126 cmd.DW3.RefAccess = CodecHal_PictureIsField(params->CurrOriginalPic) ? 1 : 0;
1127 cmd.DW7.SrcFieldPolarity = CodecHal_PictureIsBottomField(params->CurrOriginalPic) ? 1 : 0;
1128 }
1129
1130 cmd.DW4.PictureHeightMinus1 = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scaleFactor) - 1;
1131 cmd.DW4.PictureWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth / scaleFactor);
1132 cmd.DW5.QpPrimeY = qpPrimeY;
1133 cmd.DW6.WriteDistortions = writeDistortions;
1134 cmd.DW6.UseMvFromPrevStep = useMvFromPrevStep;
1135
1136 cmd.DW6.SuperCombineDist = m_superCombineDistGeneric[params->TargetUsage];
1137 cmd.DW6.MaxVmvR = (framePicture) ?
1138 params->MaxMvLen * 4 : (params->MaxMvLen >> 1) * 4;
1139
1140 if (m_pictureCodingType == B_TYPE)
1141 {
1142 // This field is irrelevant since we are not using the bi-direct search.
1143 cmd.DW1.BiWeight = 32;
1144 cmd.DW13.NumRefIdxL1MinusOne = params->num_ref_idx_l1_active_minus1;
1145 }
1146
1147 if (m_pictureCodingType == P_TYPE ||
1148 m_pictureCodingType == B_TYPE)
1149 {
1150 if (params->hmeLvl == HME_LEVEL_4x && m_useNonLegacyStreamin)
1151 {
1152 cmd.DW30.ActualMBHeight = m_frameHeight;
1153 cmd.DW30.ActualMBWidth = m_frameWidth;
1154 }
1155 else if (m_vdencEnabled && m_16xMeSupported)
1156 {
1157 cmd.DW30.ActualMBHeight = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight);
1158 cmd.DW30.ActualMBWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth);
1159 }
1160 cmd.DW13.NumRefIdxL0MinusOne =
1161 params->num_ref_idx_l0_active_minus1;
1162 }
1163
1164 cmd.DW13.RefStreaminCost = 5;
1165 // This flag is to indicate the ROI source type instead of indicating ROI is enabled or not
1166 cmd.DW13.ROIEnable = 0;
1167
1168 if (!framePicture)
1169 {
1170 if (m_pictureCodingType != I_TYPE)
1171 {
1172 cmd.DW14.List0RefID0FieldParity = params->List0RefID0FieldParity;
1173 cmd.DW14.List0RefID1FieldParity = params->List0RefID1FieldParity;
1174 cmd.DW14.List0RefID2FieldParity = params->List0RefID2FieldParity;
1175 cmd.DW14.List0RefID3FieldParity = params->List0RefID3FieldParity;
1176 cmd.DW14.List0RefID4FieldParity = params->List0RefID4FieldParity;
1177 cmd.DW14.List0RefID5FieldParity = params->List0RefID5FieldParity;
1178 cmd.DW14.List0RefID6FieldParity = params->List0RefID6FieldParity;
1179 cmd.DW14.List0RefID7FieldParity = params->List0RefID7FieldParity;
1180 }
1181 if (m_pictureCodingType == B_TYPE)
1182 {
1183 cmd.DW14.List1RefID0FieldParity = params->List1RefID0FieldParity;
1184 cmd.DW14.List1RefID1FieldParity = params->List1RefID1FieldParity;
1185 }
1186 }
1187
1188 cmd.DW15.MvShiftFactor = mvShiftFactor;
1189 cmd.DW15.PrevMvReadPosFactor = prevMvReadPosFactor;
1190
1191 // r3 & r4
1192 uint8_t targetUsage = params->TargetUsage;
1193 uint8_t meMethod = 0;
1194 if (m_pictureCodingType == B_TYPE)
1195 {
1196 meMethod = params->pBMEMethodTable ?
1197 params->pBMEMethodTable[targetUsage]
1198 : m_bMeMethodGeneric[targetUsage];
1199 }
1200 else
1201 {
1202 meMethod = params->pMEMethodTable ?
1203 params->pMEMethodTable[targetUsage]
1204 : m_meMethodGeneric[targetUsage];
1205 }
1206
1207 uint8_t tableIdx = (m_pictureCodingType == B_TYPE) ? 1 : 0;
1208 eStatus = MOS_SecureMemcpy(&(cmd.SPDelta), 14 * sizeof(uint32_t), m_encodeSearchPath[tableIdx][meMethod], 14 * sizeof(uint32_t));
1209 if (eStatus != MOS_STATUS_SUCCESS)
1210 {
1211 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to copy memory.");
1212 return eStatus;
1213 }
1214
1215 // Non legacy stream in is for hevc vp9 streamin kernel
1216 if (params->hmeLvl == HME_LEVEL_4x && m_useNonLegacyStreamin)
1217 {
1218 //StreamIn CURBE
1219 cmd.DW6.LCUSize = 1;//Only LCU64 supported by the VDEnc HW
1220 cmd.DW6.InputStreamInSurfaceEnable = params->segmapProvided;
1221 cmd.DW31.MaxCuSize = 3;
1222 cmd.DW31.MaxTuSize = 3;
1223 cmd.DW31.NumImePredictors = CODECHAL_VDENC_NUMIMEPREDICTORS;
1224 switch (params->TargetUsage)
1225 {
1226 case 1: // Quality mode
1227 case 2:
1228 case 4: // Normal mode
1229 cmd.DW36.NumMergeCandidateCu64x64 = 4;
1230 cmd.DW36.NumMergeCandidateCu32x32 = 3;
1231 cmd.DW36.NumMergeCandidateCu16x16 = 2;
1232 cmd.DW36.NumMergeCandidateCu8x8 = 1;
1233 break;
1234 case 7: // Speed mode
1235 cmd.DW36.NumMergeCandidateCu64x64 = 2;
1236 cmd.DW36.NumMergeCandidateCu32x32 = 2;
1237 cmd.DW36.NumMergeCandidateCu16x16 = 2;
1238 cmd.DW36.NumMergeCandidateCu8x8 = 0;
1239 cmd.DW31.NumImePredictors = CODECHAL_VDENC_NUMIMEPREDICTORS_SPEED;
1240 break;
1241 default:
1242 MHW_ASSERTMESSAGE("Invalid TU provided!");
1243 return MOS_STATUS_INVALID_PARAMETER;
1244 }
1245 }
1246
1247 // r5
1248 cmd.DW40._4xMeMvOutputDataSurfIndex = CODECHAL_ENCODE_ME_MV_DATA_SURFACE_G12;
1249 cmd.DW41._16xOr32xMeMvInputDataSurfIndex = (params->hmeLvl == HME_LEVEL_32x) ?
1250 CODECHAL_ENCODE_32xME_MV_DATA_SURFACE_G12 : CODECHAL_ENCODE_16xME_MV_DATA_SURFACE_G12;
1251 cmd.DW42._4xMeOutputDistSurfIndex = CODECHAL_ENCODE_ME_DISTORTION_SURFACE_G12;
1252 cmd.DW43._4xMeOutputBrcDistSurfIndex = CODECHAL_ENCODE_ME_BRC_DISTORTION_G12;
1253 cmd.DW44.VMEFwdInterPredictionSurfIndex = CODECHAL_ENCODE_ME_CURR_FOR_FWD_REF_G12;
1254 cmd.DW45.VMEBwdInterPredictionSurfIndex = CODECHAL_ENCODE_ME_CURR_FOR_BWD_REF_G12;
1255 cmd.DW46.VDEncStreamInOutputSurfIndex = CODECHAL_ENCODE_ME_VDENC_STREAMIN_OUTPUT_G12;
1256 cmd.DW47.VDEncStreamInInputSurfIndex = CODECHAL_ENCODE_ME_VDENC_STREAMIN_INPUT_G12;
1257
1258 CODECHAL_ENCODE_CHK_STATUS_RETURN(params->pKernelState->m_dshRegion.AddData(
1259 &cmd,
1260 params->pKernelState->dwCurbeOffset,
1261 sizeof(cmd)));
1262
1263 return eStatus;
1264 }
1265
SendMeSurfaces(PMOS_COMMAND_BUFFER cmdBuffer,MeSurfaceParams * params)1266 MOS_STATUS CodechalVdencVp9StateG12::SendMeSurfaces(
1267 PMOS_COMMAND_BUFFER cmdBuffer,
1268 MeSurfaceParams* params)
1269 {
1270 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1271
1272 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
1273 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1274 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState);
1275 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pCurrOriginalPic);
1276 CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps4xMeMvDataBuffer);
1277 CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeDistortionBuffer);
1278
1279 if (!params->bVdencStreamInEnabled)
1280 {
1281 CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeBrcDistortionBuffer);
1282 }
1283 else
1284 {
1285 CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeVdencStreamInBuffer);
1286 }
1287
1288 CODECHAL_MEDIA_STATE_TYPE encMediaStateType = (params->b32xMeInUse) ? CODECHAL_MEDIA_STATE_32X_ME :
1289 params->b16xMeInUse ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME;
1290
1291 if (params->bVdencStreamInEnabled && encMediaStateType == CODECHAL_MEDIA_STATE_4X_ME)
1292 {
1293 encMediaStateType = CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN;
1294 }
1295
1296 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pMeBindingTable);
1297 MeKernelBindingTable* meBindingTable = params->pMeBindingTable;
1298
1299 bool isFieldPicture = CodecHal_PictureIsField(*(params->pCurrOriginalPic)) ? 1 : 0;
1300 bool isBottomField = CodecHal_PictureIsBottomField(*(params->pCurrOriginalPic)) ? 1 : 0;
1301 uint8_t currVDirection = (!isFieldPicture) ? CODECHAL_VDIRECTION_FRAME :
1302 ((isBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
1303
1304 PMOS_SURFACE currScaledSurface = nullptr, meMvDataBuffer = nullptr;
1305 uint32_t meMvBottomFieldOffset = 0, currScaledBottomFieldOffset = 0;
1306 if (params->b32xMeInUse)
1307 {
1308 CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps32xMeMvDataBuffer);
1309 currScaledSurface = m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER);
1310 meMvDataBuffer = params->ps32xMeMvDataBuffer;
1311 meMvBottomFieldOffset = params->dw32xMeMvBottomFieldOffset;
1312 currScaledBottomFieldOffset = params->dw32xScaledBottomFieldOffset;
1313 }
1314 else if (params->b16xMeInUse)
1315 {
1316 CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps16xMeMvDataBuffer);
1317 currScaledSurface = m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
1318 meMvDataBuffer = params->ps16xMeMvDataBuffer;
1319 meMvBottomFieldOffset = params->dw16xMeMvBottomFieldOffset;
1320 currScaledBottomFieldOffset = params->dw16xScaledBottomFieldOffset;
1321 }
1322 else
1323 {
1324 currScaledSurface = m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
1325 meMvDataBuffer = params->ps4xMeMvDataBuffer;
1326 meMvBottomFieldOffset = params->dw4xMeMvBottomFieldOffset;
1327 currScaledBottomFieldOffset = params->dw4xScaledBottomFieldOffset;
1328 }
1329
1330 // Reference height and width information should be taken from the current scaled surface rather
1331 // than from the reference scaled surface in the case of PAFF.
1332
1333 uint32_t width = MOS_ALIGN_CEIL(params->dwDownscaledWidthInMb * 32, 64);
1334 uint32_t height = params->dwDownscaledHeightInMb * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER;
1335
1336 // Force the values
1337 meMvDataBuffer->dwWidth = width;
1338 meMvDataBuffer->dwHeight = height;
1339 meMvDataBuffer->dwPitch = width;
1340
1341 CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
1342 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1343 surfaceParams.bIs2DSurface = true;
1344 surfaceParams.bMediaBlockRW = true;
1345 surfaceParams.psSurface = meMvDataBuffer;
1346 surfaceParams.dwOffset = meMvBottomFieldOffset;
1347 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
1348 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEMVDataSurface;
1349 surfaceParams.bIsWritable = true;
1350 surfaceParams.bRenderTarget = true;
1351 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1352 m_hwInterface,
1353 cmdBuffer,
1354 &surfaceParams,
1355 params->pKernelState));
1356
1357 if (params->b16xMeInUse && params->b32xMeEnabled)
1358 {
1359 // Pass 32x MV to 16x ME operation
1360 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1361 surfaceParams.bIs2DSurface = true;
1362 surfaceParams.bMediaBlockRW = true;
1363 surfaceParams.psSurface = params->ps32xMeMvDataBuffer;
1364 surfaceParams.dwOffset =
1365 isBottomField ? params->dw32xMeMvBottomFieldOffset : 0;
1366 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
1367 surfaceParams.dwBindingTableOffset = meBindingTable->dw32xMEMVDataSurface;
1368 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1369 m_hwInterface,
1370 cmdBuffer,
1371 &surfaceParams,
1372 params->pKernelState));
1373 }
1374 else if (!params->b32xMeInUse && params->b16xMeEnabled)
1375 {
1376 // Pass 16x MV to 4x ME operation
1377 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1378 surfaceParams.bIs2DSurface = true;
1379 surfaceParams.bMediaBlockRW = true;
1380 surfaceParams.psSurface = params->ps16xMeMvDataBuffer;
1381 surfaceParams.dwOffset =
1382 isBottomField ? params->dw16xMeMvBottomFieldOffset : 0;
1383 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
1384 surfaceParams.dwBindingTableOffset = meBindingTable->dw16xMEMVDataSurface;
1385 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1386 m_hwInterface,
1387 cmdBuffer,
1388 &surfaceParams,
1389 params->pKernelState));
1390 }
1391
1392 // Insert Distortion buffers only for 4xMe case
1393 if (!params->b32xMeInUse && !params->b16xMeInUse)
1394 {
1395 if (!params->bVdencStreamInEnabled)
1396 {
1397 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1398 surfaceParams.bIs2DSurface = true;
1399 surfaceParams.bMediaBlockRW = true;
1400 surfaceParams.psSurface = params->psMeBrcDistortionBuffer;
1401 surfaceParams.dwOffset = params->dwMeBrcDistortionBottomFieldOffset;
1402 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEBRCDist;
1403 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
1404 surfaceParams.bIsWritable = true;
1405 surfaceParams.bRenderTarget = true;
1406 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1407 m_hwInterface,
1408 cmdBuffer,
1409 &surfaceParams,
1410 params->pKernelState));
1411 }
1412
1413 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1414 surfaceParams.bIs2DSurface = true;
1415 surfaceParams.bMediaBlockRW = true;
1416 surfaceParams.psSurface = params->psMeDistortionBuffer;
1417 surfaceParams.dwOffset = params->dwMeDistortionBottomFieldOffset;
1418 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEDist;
1419 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value;
1420 surfaceParams.bIsWritable = true;
1421 surfaceParams.bRenderTarget = true;
1422 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1423 m_hwInterface,
1424 cmdBuffer,
1425 &surfaceParams,
1426 params->pKernelState));
1427 }
1428
1429 // Setup references 1...n
1430 // LIST 0 references
1431 CODEC_PICTURE refPic;
1432 bool isRefFieldPicture = false, isRefBottomField = false;
1433 uint8_t refPicIdx = 0;
1434 if (params->pL0RefFrameList)
1435 {
1436 for (uint8_t refIdx = 0; refIdx <= params->dwNumRefIdxL0ActiveMinus1; refIdx++)
1437 {
1438 refPic = params->pL0RefFrameList[refIdx];
1439
1440 if (!CodecHal_PictureIsInvalid(refPic) && params->pPicIdx[refPic.FrameIdx].bValid)
1441 {
1442 if (refIdx == 0)
1443 {
1444 // Current Picture Y - VME
1445 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1446 surfaceParams.bUseAdvState = true;
1447 surfaceParams.psSurface = currScaledSurface;
1448 surfaceParams.dwOffset = isBottomField ? currScaledBottomFieldOffset : 0;
1449 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
1450 surfaceParams.dwBindingTableOffset = meBindingTable->dwMECurrForFwdRef;
1451 surfaceParams.ucVDirection = currVDirection;
1452 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1453 m_hwInterface,
1454 cmdBuffer,
1455 &surfaceParams,
1456 params->pKernelState));
1457 }
1458
1459 isRefFieldPicture = CodecHal_PictureIsField(refPic) ? 1 : 0;
1460 isRefBottomField = (CodecHal_PictureIsBottomField(refPic)) ? 1 : 0;
1461 refPicIdx = params->pPicIdx[refPic.FrameIdx].ucPicIdx;
1462 uint8_t scaledIdx = params->ppRefList[refPicIdx]->ucScalingIdx;
1463 uint32_t refScaledBottomFieldOffset = 0;
1464 MOS_SURFACE *refScaledSurface;
1465 if (params->b32xMeInUse)
1466 {
1467 refScaledSurface = m_trackedBuf->Get32xDsSurface(scaledIdx);
1468 }
1469 else if (params->b16xMeInUse)
1470 {
1471 refScaledSurface = m_trackedBuf->Get16xDsSurface(scaledIdx);
1472 }
1473 else
1474 {
1475 refScaledSurface = m_trackedBuf->Get4xDsSurface(scaledIdx);
1476 }
1477 refScaledBottomFieldOffset = isRefBottomField ? currScaledBottomFieldOffset : 0;
1478
1479 // L0 Reference Picture Y - VME
1480 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1481 surfaceParams.bUseAdvState = true;
1482 surfaceParams.psSurface = refScaledSurface;
1483 surfaceParams.dwOffset = isRefBottomField ? refScaledBottomFieldOffset : 0;
1484 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
1485 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEFwdRefPicIdx[refIdx];
1486 surfaceParams.ucVDirection = !isFieldPicture ? CODECHAL_VDIRECTION_FRAME :
1487 ((isRefBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
1488 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1489 m_hwInterface,
1490 cmdBuffer,
1491 &surfaceParams,
1492 params->pKernelState));
1493
1494 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEFwdRefPicIdx[refIdx] + 1;
1495 surfaceParams.ucVDirection = !isFieldPicture ? CODECHAL_VDIRECTION_FRAME :
1496 ((isRefBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
1497 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1498 m_hwInterface,
1499 cmdBuffer,
1500 &surfaceParams,
1501 params->pKernelState));
1502 }
1503 }
1504 }
1505
1506 // Setup references 1...n
1507 // LIST 1 references
1508 if (params->pL1RefFrameList)
1509 {
1510 for (uint8_t refIdx = 0; refIdx <= params->dwNumRefIdxL1ActiveMinus1; refIdx++)
1511 {
1512 refPic = params->pL1RefFrameList[refIdx];
1513
1514 if (!CodecHal_PictureIsInvalid(refPic) && params->pPicIdx[refPic.FrameIdx].bValid)
1515 {
1516 if (refIdx == 0)
1517 {
1518 // Current Picture Y - VME
1519 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1520 surfaceParams.bUseAdvState = true;
1521 surfaceParams.psSurface = currScaledSurface;
1522 surfaceParams.dwOffset = isBottomField ? currScaledBottomFieldOffset : 0;
1523 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
1524 surfaceParams.dwBindingTableOffset = meBindingTable->dwMECurrForBwdRef;
1525 surfaceParams.ucVDirection = currVDirection;
1526 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1527 m_hwInterface,
1528 cmdBuffer,
1529 &surfaceParams,
1530 params->pKernelState));
1531 }
1532
1533 isRefFieldPicture = CodecHal_PictureIsField(refPic) ? 1 : 0;
1534 isRefBottomField = (CodecHal_PictureIsBottomField(refPic)) ? 1 : 0;
1535 refPicIdx = params->pPicIdx[refPic.FrameIdx].ucPicIdx;
1536 uint8_t scaledIdx = params->ppRefList[refPicIdx]->ucScalingIdx;
1537 uint32_t refScaledBottomFieldOffset = 0;
1538 MOS_SURFACE *refScaledSurface;
1539 if (params->b32xMeInUse)
1540 {
1541 refScaledSurface = m_trackedBuf->Get32xDsSurface(scaledIdx);
1542 }
1543 else if (params->b16xMeInUse)
1544 {
1545 refScaledSurface = m_trackedBuf->Get16xDsSurface(scaledIdx);
1546 }
1547 else
1548 {
1549 refScaledSurface = m_trackedBuf->Get4xDsSurface(scaledIdx);
1550 }
1551 refScaledBottomFieldOffset = isRefBottomField ? currScaledBottomFieldOffset : 0;
1552
1553
1554 // L1 Reference Picture Y - VME
1555 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1556 surfaceParams.bUseAdvState = true;
1557 surfaceParams.psSurface = refScaledSurface;
1558 surfaceParams.dwOffset = isRefBottomField ? refScaledBottomFieldOffset : 0;
1559 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
1560 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEBwdRefPicIdx[refIdx];
1561 surfaceParams.ucVDirection = (!isFieldPicture) ? CODECHAL_VDIRECTION_FRAME :
1562 ((isRefBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
1563 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1564 m_hwInterface,
1565 cmdBuffer,
1566 &surfaceParams,
1567 params->pKernelState));
1568 }
1569 }
1570 }
1571
1572 if (encMediaStateType == CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN)
1573 {
1574 // Output buffer
1575 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1576 surfaceParams.dwSize = params->dwVDEncStreamInSurfaceSize;
1577 surfaceParams.bIs2DSurface = false;
1578 surfaceParams.presBuffer = params->psMeVdencStreamInBuffer;
1579 surfaceParams.dwBindingTableOffset = meBindingTable->dwVdencStreamInSurface;
1580 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
1581 surfaceParams.bIsWritable = true;
1582 surfaceParams.bRenderTarget = true;
1583 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1584 m_hwInterface,
1585 cmdBuffer,
1586 &surfaceParams,
1587 params->pKernelState));
1588
1589 // Input buffer (for AVC case we only read the surface and update data)
1590 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1591 surfaceParams.dwSize = params->dwVDEncStreamInSurfaceSize;
1592 surfaceParams.bIs2DSurface = false;
1593 surfaceParams.presBuffer = params->psMeVdencStreamInBuffer;
1594 surfaceParams.dwBindingTableOffset = meBindingTable->dwVdencStreamInInputSurface;
1595 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
1596 surfaceParams.bIsWritable = true;
1597 surfaceParams.bRenderTarget = true;
1598 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1599 m_hwInterface,
1600 cmdBuffer,
1601 &surfaceParams,
1602 params->pKernelState));
1603 }
1604
1605 return eStatus;
1606 }
1607
InitInterface()1608 MOS_STATUS CodechalVdencVp9StateG12::InitInterface()
1609 {
1610 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1611
1612 CODECHAL_ENCODE_FUNCTION_ENTER;
1613
1614 uint8_t* binary = nullptr;
1615 uint32_t kernelSize = 0;
1616 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
1617 m_kernelBase,
1618 m_kuidCommon,
1619 &binary,
1620 &kernelSize));
1621
1622 GetHwInterface()->GetStateHeapSettings()->dwIshSize +=
1623 MOS_ALIGN_CEIL(kernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
1624
1625 return eStatus;
1626 }
1627
SetMeSurfaceParams(MeSurfaceParams * meSurfaceParams)1628 MOS_STATUS CodechalVdencVp9StateG12::SetMeSurfaceParams(MeSurfaceParams *meSurfaceParams)
1629 {
1630 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1631
1632 CODECHAL_ENCODE_FUNCTION_ENTER;
1633
1634 CODECHAL_ENCODE_CHK_NULL_RETURN(meSurfaceParams);
1635
1636 meSurfaceParams->bMbaff = false;
1637 meSurfaceParams->b4xMeDistortionBufferSupported = true;
1638 meSurfaceParams->dwNumRefIdxL0ActiveMinus1 = (m_vp9PicParams->PicFlags.fields.frame_type) ? m_numRefFrames - 1 : 0;
1639 meSurfaceParams->dwNumRefIdxL1ActiveMinus1 = 0;
1640
1641 MOS_ZeroMemory(&m_refPicList0, sizeof(m_refPicList0));
1642
1643 if (m_lastRefPic)
1644 {
1645 m_refPicList0[0].FrameIdx = m_vp9PicParams->RefFlags.fields.LastRefIdx;
1646 m_refPicList0[0].PicFlags = PICTURE_FRAME;
1647 }
1648 if (m_goldenRefPic)
1649 {
1650 m_refPicList0[1].FrameIdx = m_vp9PicParams->RefFlags.fields.GoldenRefIdx;
1651 m_refPicList0[1].PicFlags = PICTURE_FRAME;
1652 }
1653 if (m_altRefPic)
1654 {
1655 m_refPicList0[2].FrameIdx = m_vp9PicParams->RefFlags.fields.AltRefIdx;
1656 m_refPicList0[2].PicFlags = PICTURE_FRAME;
1657 }
1658
1659 meSurfaceParams->pL0RefFrameList = &(m_refPicList0[0]);
1660 meSurfaceParams->ppRefList = &m_refList[0];
1661 meSurfaceParams->pPicIdx = &m_picIdx[0];
1662 meSurfaceParams->pCurrOriginalPic = &m_currOriginalPic;
1663 meSurfaceParams->ps4xMeMvDataBuffer = &m_4xMeMvDataBuffer;
1664 meSurfaceParams->ps16xMeMvDataBuffer = &m_16xMeMvDataBuffer;
1665 meSurfaceParams->psMeDistortionBuffer = &m_4xMeDistortionBuffer;
1666 meSurfaceParams->dwVerticalLineStride = m_verticalLineStride;
1667 meSurfaceParams->dwVerticalLineStrideOffset = m_verticalLineStrideOffset;
1668 meSurfaceParams->b32xMeEnabled = m_32xMeSupported;
1669 meSurfaceParams->b16xMeEnabled = m_16xMeEnabled;
1670 meSurfaceParams->pMeBindingTable = &m_meBindingTable;
1671 meSurfaceParams->bVdencStreamInEnabled = true;
1672 meSurfaceParams->psMeVdencStreamInBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
1673 meSurfaceParams->dwVDEncStreamInSurfaceSize = MOS_BYTES_TO_DWORDS((MOS_ALIGN_CEIL(m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32) *
1674 (MOS_ALIGN_CEIL(m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32) *
1675 CODECHAL_CACHELINE_SIZE);
1676 return eStatus;
1677 }
1678
SetMeCurbeParams(MeCurbeParams * meParams)1679 MOS_STATUS CodechalVdencVp9StateG12::SetMeCurbeParams(MeCurbeParams *meParams)
1680 {
1681 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1682
1683 CODECHAL_ENCODE_FUNCTION_ENTER;
1684
1685 CODECHAL_ENCODE_CHK_NULL_RETURN(meParams);
1686
1687 meParams->b16xMeEnabled = m_16xMeEnabled;
1688 meParams->b32xMeEnabled = m_32xMeSupported;
1689 meParams->TargetUsage = TU_QUALITY;
1690 meParams->MaxMvLen = m_hmeMaxMvLength;
1691 meParams->CurrOriginalPic.FrameIdx = m_vp9PicParams->CurrOriginalPic.FrameIdx;
1692 meParams->CurrOriginalPic.PicEntry = m_vp9PicParams->CurrOriginalPic.PicEntry;
1693 meParams->CurrOriginalPic.PicFlags = m_vp9PicParams->CurrOriginalPic.PicFlags;
1694 meParams->pic_init_qp_minus26 = m_vp9PicParams->LumaACQIndex - 26;
1695 meParams->num_ref_idx_l0_active_minus1 = (m_vp9PicParams->PicFlags.fields.frame_type) ? m_numRefFrames - 1 : 0;
1696 meParams->num_ref_idx_l1_active_minus1 = 0;
1697
1698 return eStatus;
1699 }
1700
ExecuteMeKernel(MeCurbeParams * meParams,MeSurfaceParams * meSurfaceParams,HmeLevel hmeLevel)1701 MOS_STATUS CodechalVdencVp9StateG12::ExecuteMeKernel(
1702 MeCurbeParams *meParams,
1703 MeSurfaceParams *meSurfaceParams,
1704 HmeLevel hmeLevel)
1705 {
1706 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1707
1708 CODECHAL_ENCODE_FUNCTION_ENTER;
1709
1710 CODECHAL_ENCODE_CHK_NULL_RETURN(meParams);
1711 CODECHAL_ENCODE_CHK_NULL_RETURN(meSurfaceParams);
1712
1713 PerfTagSetting perfTag;
1714 perfTag.Value = 0;
1715 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
1716 perfTag.CallType = m_singleTaskPhaseSupported ? CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL : CODECHAL_ENCODE_PERFTAG_CALL_ME_KERNEL;
1717 perfTag.PictureCodingType = m_pictureCodingType;
1718 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
1719 // Each ME kernel buffer counts as a separate perf task
1720 m_osInterface->pfnResetPerfBufferID(m_osInterface);
1721
1722 CODECHAL_MEDIA_STATE_TYPE encFunctionType = (hmeLevel == HME_LEVEL_32x) ? CODECHAL_MEDIA_STATE_32X_ME :
1723 (hmeLevel == HME_LEVEL_16x) ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME;
1724
1725 bool vdencMeInUse = false;
1726 if (m_vdencEnabled && (encFunctionType == CODECHAL_MEDIA_STATE_4X_ME))
1727 {
1728 vdencMeInUse = true;
1729 // Non legacy stream in is for hevc vp9 streamin kernel
1730 encFunctionType = m_useNonLegacyStreamin ? CODECHAL_MEDIA_STATE_4X_ME : CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN;
1731 }
1732
1733 uint32_t krnStateIdx = vdencMeInUse ?
1734 CODECHAL_ENCODE_ME_IDX_VDENC :
1735 ((m_pictureCodingType == P_TYPE) ? CODECHAL_ENCODE_ME_IDX_P : CODECHAL_ENCODE_ME_IDX_B);
1736
1737 PMHW_KERNEL_STATE kernelState = &m_meKernelStates[krnStateIdx];
1738
1739 // If Single Task Phase is not enabled, use BT count for the kernel state.
1740 if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
1741 {
1742 uint32_t maxBtCount = m_singleTaskPhaseSupported ?
1743 m_maxBtCount : kernelState->KernelParams.iBTCount;
1744 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
1745 m_stateHeapInterface,
1746 maxBtCount));
1747 m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
1748 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
1749 }
1750
1751 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
1752 m_stateHeapInterface,
1753 kernelState,
1754 false,
1755 0,
1756 false,
1757 m_storeData));
1758 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
1759 MOS_ZeroMemory(&idParams, sizeof(idParams));
1760 idParams.pKernelState = kernelState;
1761 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
1762 m_stateHeapInterface,
1763 1,
1764 &idParams));
1765
1766 // Setup Additional MeParams (Most of them set up in codec specific function, so don't zero out here)
1767 meParams->hmeLvl = hmeLevel;
1768 meParams->pKernelState = kernelState;
1769
1770 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeMe(meParams));
1771
1772 CODECHAL_DEBUG_TOOL(
1773 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1774 encFunctionType,
1775 MHW_DSH_TYPE,
1776 kernelState));
1777 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
1778 encFunctionType,
1779 kernelState));
1780 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1781 encFunctionType,
1782 MHW_ISH_TYPE,
1783 kernelState));
1784 )
1785 MOS_COMMAND_BUFFER cmdBuffer;
1786 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
1787 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
1788 SendKernelCmdsParams sendKernelCmdsParams;
1789 sendKernelCmdsParams = SendKernelCmdsParams();
1790 sendKernelCmdsParams.EncFunctionType = encFunctionType;
1791 sendKernelCmdsParams.pKernelState = kernelState;
1792
1793 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
1794
1795 // Add binding table
1796 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
1797 m_stateHeapInterface,
1798 kernelState));
1799
1800 // Setup Additional ME surface params (Most of them set up in codec specific function, so don't zero out here)
1801 meSurfaceParams->dwDownscaledWidthInMb = (hmeLevel == HME_LEVEL_32x) ? m_downscaledWidthInMb32x :
1802 (hmeLevel == HME_LEVEL_16x) ? m_downscaledWidthInMb16x : m_downscaledWidthInMb4x;
1803 meSurfaceParams->dwDownscaledHeightInMb = (hmeLevel == HME_LEVEL_32x) ? m_downscaledFrameFieldHeightInMb32x :
1804 (hmeLevel == HME_LEVEL_16x) ? m_downscaledFrameFieldHeightInMb16x : m_downscaledFrameFieldHeightInMb4x;
1805 meSurfaceParams->b32xMeInUse = (hmeLevel == HME_LEVEL_32x) ? true : false;
1806 meSurfaceParams->b16xMeInUse = (hmeLevel == HME_LEVEL_16x) ? true : false;
1807 meSurfaceParams->pKernelState = kernelState;
1808
1809 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMeSurfaces(&cmdBuffer, meSurfaceParams));
1810
1811 // Dump SSH for ME kernel
1812 CODECHAL_DEBUG_TOOL(
1813 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1814 encFunctionType,
1815 MHW_SSH_TYPE,
1816 kernelState)));
1817
1818 /* zero out the mv data memory and me distortion buffer for the driver ULT
1819 kernel only writes out this data used for current frame, in some cases the the data used for
1820 previous frames would be left in the buffer (for example, the L1 mv for B frame would still show
1821 in the P frame mv data buffer */
1822
1823 // Zeroing out the buffers has perf impact, so zero it out only when dumps are actually enabled
1824 CODECHAL_DEBUG_TOOL(
1825 CODECHAL_ENCODE_CHK_NULL_RETURN(m_debugInterface);
1826 uint8_t* data = NULL;
1827 uint32_t size = 0;
1828 bool driverMeDumpEnabled = m_debugInterface->DumpIsEnabled(CodechalDbgAttr::attrOutput, encFunctionType);
1829
1830 if (driverMeDumpEnabled)
1831 {
1832 MOS_LOCK_PARAMS lockFlags;
1833 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1834 lockFlags.WriteOnly = 1;
1835
1836 switch (hmeLevel)
1837 {
1838 case HME_LEVEL_32x:
1839 data = (uint8_t*)m_osInterface->pfnLockResource(
1840 m_osInterface,
1841 &meSurfaceParams->ps32xMeMvDataBuffer->OsResource,
1842 &lockFlags);
1843 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1844 size = MOS_ALIGN_CEIL((m_downscaledWidthInMb32x * 32), 64) *
1845 (m_downscaledHeightInMb32x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
1846 MOS_ZeroMemory(data, size);
1847 m_osInterface->pfnUnlockResource(
1848 m_osInterface,
1849 &meSurfaceParams->ps32xMeMvDataBuffer->OsResource);
1850 break;
1851 case HME_LEVEL_16x:
1852 data = (uint8_t*)m_osInterface->pfnLockResource(
1853 m_osInterface,
1854 &meSurfaceParams->ps16xMeMvDataBuffer->OsResource,
1855 &lockFlags);
1856 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1857 size = MOS_ALIGN_CEIL((m_downscaledWidthInMb16x * 32), 64) *
1858 (m_downscaledHeightInMb16x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
1859 MOS_ZeroMemory(data, size);
1860 m_osInterface->pfnUnlockResource(
1861 m_osInterface,
1862 &meSurfaceParams->ps16xMeMvDataBuffer->OsResource);
1863 break;
1864 case HME_LEVEL_4x:
1865 if (!m_vdencEnabled)
1866 {
1867 data = (uint8_t*)m_osInterface->pfnLockResource(
1868 m_osInterface,
1869 &meSurfaceParams->ps4xMeMvDataBuffer->OsResource,
1870 &lockFlags);
1871 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1872 size = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 32), 64) *
1873 (m_downscaledHeightInMb4x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
1874 MOS_ZeroMemory(data, size);
1875 m_osInterface->pfnUnlockResource(
1876 m_osInterface,
1877 &meSurfaceParams->ps4xMeMvDataBuffer->OsResource);
1878 }
1879 break;
1880 default:
1881 return MOS_STATUS_INVALID_PARAMETER;
1882 }
1883
1884 // zeroing out ME dist buffer
1885 if (meSurfaceParams->b4xMeDistortionBufferSupported)
1886 {
1887 data = (uint8_t*)m_osInterface->pfnLockResource(
1888 m_osInterface, &meSurfaceParams->psMeDistortionBuffer->OsResource, &lockFlags);
1889 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1890 size = meSurfaceParams->psMeDistortionBuffer->dwHeight * meSurfaceParams->psMeDistortionBuffer->dwPitch;
1891 MOS_ZeroMemory(data, size);
1892 m_osInterface->pfnUnlockResource(
1893 m_osInterface,
1894 &meSurfaceParams->psMeDistortionBuffer->OsResource);
1895 }
1896 }
1897 );
1898
1899 uint32_t scalingFactor = (hmeLevel == HME_LEVEL_32x) ? SCALE_FACTOR_32x :
1900 (hmeLevel == HME_LEVEL_16x) ? SCALE_FACTOR_16x : SCALE_FACTOR_4x;
1901
1902 uint32_t resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / scalingFactor);
1903 uint32_t resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scalingFactor);
1904
1905 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
1906 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
1907 walkerCodecParams.WalkerMode = m_walkerMode;
1908 walkerCodecParams.dwResolutionX = resolutionX;
1909 walkerCodecParams.dwResolutionY = resolutionY;
1910 walkerCodecParams.bNoDependency = true;
1911 walkerCodecParams.bMbaff = meSurfaceParams->bMbaff;
1912 walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported;
1913 walkerCodecParams.ucGroupId = m_groupId;
1914
1915 MHW_WALKER_PARAMS walkerParams;
1916 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
1917 m_hwInterface,
1918 &walkerParams,
1919 &walkerCodecParams));
1920
1921 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
1922 &cmdBuffer,
1923 &walkerParams));
1924
1925 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
1926
1927 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
1928 m_stateHeapInterface,
1929 kernelState));
1930
1931 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
1932
1933 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1934 {
1935 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
1936 m_stateHeapInterface));
1937 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
1938 }
1939
1940 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
1941 &cmdBuffer,
1942 encFunctionType,
1943 nullptr)));
1944
1945 m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase);
1946
1947 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
1948
1949 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1950 {
1951 m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
1952 m_lastTaskInPhase = false;
1953 }
1954
1955 return eStatus;
1956 }
1957
ExecuteKernelFunctions()1958 MOS_STATUS CodechalVdencVp9StateG12::ExecuteKernelFunctions()
1959 {
1960 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1961
1962 CODECHAL_ENCODE_FUNCTION_ENTER;
1963
1964 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
1965 uint32_t dumpFormat = 0;
1966 CODECHAL_DEBUG_TOOL(
1967 // CodecHal_DbgMapSurfaceFormatToDumpFormat(m_rawSurfaceToEnc->Format, &dumpFormat);
1968 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1969 m_rawSurfaceToEnc,
1970 CodechalDbgAttr::attrEncodeRawInputSurface,
1971 "SrcSurf"));
1972
1973 if (m_lastRefPic)
1974 {
1975 // CodecHal_DbgMapSurfaceFormatToDumpFormat(m_lastRefPic->Format, &dumpFormat);
1976 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1977 m_lastRefPic,
1978 CodechalDbgAttr::attrReferenceSurfaces,
1979 "LastRefSurface"));
1980 }
1981
1982 if (m_goldenRefPic)
1983 {
1984 // CodecHal_DbgMapSurfaceFormatToDumpFormat(m_goldenRefPic->Format, &dumpFormat);
1985 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1986 m_goldenRefPic,
1987 CodechalDbgAttr::attrReferenceSurfaces,
1988 "GoldenRefSurface"));
1989 }
1990
1991 if (m_altRefPic)
1992 {
1993 // CodecHal_DbgMapSurfaceFormatToDumpFormat(m_altRefPic->Format, &dumpFormat);
1994 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1995 m_altRefPic,
1996 CodechalDbgAttr::attrReferenceSurfaces,
1997 "_AltRefSurface"));
1998 }
1999 );
2000
2001 m_setRequestedEUSlices = ((m_frameHeight * m_frameWidth) >= m_ssdResolutionThreshold &&
2002 m_targetUsage <= m_ssdTargetUsageThreshold) ? true : false;
2003
2004 m_hwInterface->m_numRequestedEuSlices = (m_setRequestedEUSlices) ?
2005 m_sliceShutdownRequestState : m_sliceShutdownDefaultState;
2006
2007 // While this streamin isn't a kernel function, we 0 the surface here which is needed before HME kernel
2008 SetupSegmentationStreamIn();
2009 if (m_16xMeSupported)
2010 {
2011 //4x Downscaling
2012 CodechalEncodeCscDs::KernelParams cscScalingKernelParams;
2013 MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams));
2014 cscScalingKernelParams.bLastTaskInPhaseCSC =
2015 cscScalingKernelParams.bLastTaskInPhase4xDS = !(m_16xMeSupported || m_hmeEnabled);
2016 cscScalingKernelParams.bLastTaskInPhase16xDS = !(m_32xMeSupported || m_hmeEnabled);
2017 cscScalingKernelParams.bLastTaskInPhase32xDS = !m_hmeEnabled;
2018
2019 m_firstTaskInPhase = true;
2020 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->KernelFunctions(&cscScalingKernelParams));
2021 }
2022
2023 if (m_16xMeEnabled)
2024 {
2025 //Initialize the ME struct for HME kernel calls
2026 MeCurbeParams meParams;
2027 MOS_ZeroMemory(&meParams, sizeof(MeCurbeParams));
2028 SetMeCurbeParams(&meParams);
2029
2030 MeSurfaceParams meSurfaceParams;
2031 MOS_ZeroMemory(&meSurfaceParams, sizeof(MeSurfaceParams));
2032 SetMeSurfaceParams(&meSurfaceParams);
2033
2034 // P_HME kernel (16x HME)
2035 m_lastTaskInPhase = false;
2036 CODECHAL_ENCODE_CHK_STATUS_RETURN(ExecuteMeKernel(&meParams, &meSurfaceParams, HME_LEVEL_16x));
2037
2038 //StreamIn kernel, 4xME
2039 m_lastTaskInPhase = true;
2040 meParams.segmapProvided = m_segmentMapProvided;
2041 CODECHAL_ENCODE_CHK_STATUS_RETURN(ExecuteMeKernel(&meParams, &meSurfaceParams, HME_LEVEL_4x));
2042 }
2043
2044 if (!Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
2045 {
2046 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
2047 syncParams.GpuContext = m_renderContext;
2048 syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
2049
2050 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
2051 m_waitForEnc = true;
2052 }
2053 #endif
2054
2055 return eStatus;
2056 }
2057
StatusReportCleanup(EncodeStatusReport * encodeStatusReport,HCPPakHWTileSizeRecord_G12 * tileStatusReport,CODECHAL_ENCODE_BUFFER * tileSizeStreamoutBuffer,PMOS_INTERFACE osInterface,uint8_t * tempBsBuffer,uint8_t * bitstream)2058 static void StatusReportCleanup(
2059 EncodeStatusReport* encodeStatusReport,
2060 HCPPakHWTileSizeRecord_G12* tileStatusReport,
2061 CODECHAL_ENCODE_BUFFER* tileSizeStreamoutBuffer,
2062 PMOS_INTERFACE osInterface,
2063 uint8_t* tempBsBuffer,
2064 uint8_t* bitstream)
2065 {
2066
2067 if (tempBsBuffer)
2068 {
2069 MOS_FreeMemory(tempBsBuffer);
2070 }
2071
2072 if (bitstream)
2073 {
2074 osInterface->pfnUnlockResource(osInterface, &encodeStatusReport->pCurrRefList->resBitstreamBuffer);
2075 }
2076
2077 if (tileStatusReport)
2078 {
2079 // clean-up the tile status report buffer
2080 if (encodeStatusReport->CodecStatus == CODECHAL_STATUS_SUCCESSFUL)
2081 {
2082 for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
2083 {
2084 MOS_ZeroMemory(&tileStatusReport[i], sizeof(tileStatusReport[i]));
2085 }
2086 }
2087
2088 osInterface->pfnUnlockResource(osInterface, &tileSizeStreamoutBuffer->sResource);
2089 }
2090 }
2091
GetStatusReport(EncodeStatus * encodeStatus,EncodeStatusReport * encodeStatusReport)2092 MOS_STATUS CodechalVdencVp9StateG12::GetStatusReport(
2093 EncodeStatus* encodeStatus,
2094 EncodeStatusReport* encodeStatusReport)
2095 {
2096 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2097
2098 CODECHAL_ENCODE_FUNCTION_ENTER;
2099
2100 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus);
2101 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport);
2102
2103 if (encodeStatusReport->UsedVdBoxNumber == 1)
2104 {
2105 encodeStatusReport->bitstreamSize = encodeStatus->dwMFCBitstreamByteCountPerFrame + encodeStatus->dwHeaderBytesInserted;
2106 encodeStatusReport->NumberPasses = (uint8_t)encodeStatus->dwNumberPasses;
2107 ENCODE_VERBOSEMESSAGE("statusReportData->numberPasses: %d\n", encodeStatusReport->NumberPasses);
2108 encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
2109 return eStatus;
2110 }
2111
2112 // Tile record always in m_tileRecordBuffer even in scala mode
2113 PCODECHAL_ENCODE_BUFFER presTileSizeStatusReport = &m_tileRecordBuffer[encodeStatusReport->CurrOriginalPic.FrameIdx];
2114
2115 MOS_LOCK_PARAMS lockFlags;
2116 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2117 HCPPakHWTileSizeRecord_G12* tileStatusReport = (HCPPakHWTileSizeRecord_G12*)m_osInterface->pfnLockResource(
2118 m_osInterface,
2119 &presTileSizeStatusReport->sResource,
2120 &lockFlags);
2121 CODECHAL_ENCODE_CHK_NULL_RETURN(tileStatusReport);
2122
2123 encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
2124 encodeStatusReport->PanicMode = false;
2125 encodeStatusReport->AverageQp = 0;
2126 encodeStatusReport->QpY = 0;
2127 encodeStatusReport->SuggestedQpYDelta = 0;
2128 encodeStatusReport->NumberPasses = 1;
2129 encodeStatusReport->bitstreamSize = 0;
2130 encodeStatus->ImageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQp = 0;
2131
2132 double sum_qp = 0.0;
2133 uint32_t totalCU = 0;
2134 CODECHAL_ENCODE_CHK_COND_RETURN((encodeStatusReport->NumberTilesInFrame == 0), "ERROR - invalid number of tiles in frame");
2135 for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
2136 {
2137 if (tileStatusReport[i].Length == 0)
2138 {
2139 encodeStatusReport->CodecStatus = CODECHAL_STATUS_INCOMPLETE;
2140 StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, nullptr, nullptr);
2141 return eStatus;
2142 }
2143
2144 encodeStatusReport->bitstreamSize += tileStatusReport[i].Length;
2145 totalCU += (m_tileParams[i].TileHeightInMinCbMinus1 + 1) * (m_tileParams[i].TileWidthInMinCbMinus1 + 1);
2146 sum_qp += tileStatusReport[i].Hcp_Qp_Status_Count;
2147 }
2148
2149 encodeStatusReport->QpY = encodeStatusReport->AverageQp =
2150 (uint8_t)((sum_qp / (double)totalCU) / 4.0); // due to TU is 4x4 and there are 4 TUs in one CU
2151
2152 if (m_enableTileStitchByHW)
2153 {
2154 StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, nullptr, nullptr);
2155 return eStatus;
2156 }
2157
2158 uint8_t* bufPtr = (uint8_t*)MOS_AllocAndZeroMemory(encodeStatusReport->bitstreamSize);
2159 uint8_t* tempBsBuffer = bufPtr;
2160 CODECHAL_ENCODE_CHK_NULL_RETURN(tempBsBuffer);
2161
2162 CODEC_REF_LIST currRefList = *(encodeStatus->encodeStatusReport.pCurrRefList);
2163 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2164 lockFlags.ReadOnly = 1;
2165 uint8_t* bitstream = (uint8_t*)m_osInterface->pfnLockResource(
2166 m_osInterface,
2167 &currRefList.resBitstreamBuffer,
2168 &lockFlags);
2169 if (bitstream == nullptr)
2170 {
2171 MOS_SafeFreeMemory(tempBsBuffer);
2172 CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr);
2173 }
2174
2175 for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
2176 {
2177 uint32_t offset = m_tileParams[i].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
2178 uint32_t len = tileStatusReport[i].Length;
2179
2180 if (offset + len >= m_bitstreamUpperBound)
2181 {
2182 eStatus = MOS_STATUS_INVALID_FILE_SIZE;
2183 CODECHAL_ENCODE_ASSERTMESSAGE("Error: Tile offset and length add up to more than bitstream upper bound");
2184 encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR;
2185 encodeStatusReport->bitstreamSize = 0;
2186 StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, tempBsBuffer, bitstream);
2187 return eStatus;
2188 }
2189
2190 MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len);
2191 bufPtr += len;
2192 }
2193
2194 MOS_SecureMemcpy(bitstream, encodeStatusReport->bitstreamSize, tempBsBuffer, encodeStatusReport->bitstreamSize);
2195 MOS_ZeroMemory(&bitstream[encodeStatusReport->bitstreamSize],
2196 m_bitstreamUpperBound - encodeStatusReport->bitstreamSize);
2197
2198 StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, tempBsBuffer, bitstream);
2199
2200 return eStatus;
2201 }
2202
DecideEncodingPipeNumber()2203 MOS_STATUS CodechalVdencVp9StateG12::DecideEncodingPipeNumber()
2204 {
2205 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2206
2207 CODECHAL_ENCODE_FUNCTION_ENTER;
2208
2209 m_numPipe = m_numVdbox;
2210
2211 uint8_t num_tile_columns = (1 << m_vp9PicParams->log2_tile_columns);
2212
2213 if (num_tile_columns > m_numPipe)
2214 {
2215 m_numPipe = 1;
2216 }
2217
2218 if (num_tile_columns < m_numPipe)
2219 {
2220 if (num_tile_columns >= 1 && num_tile_columns <= 4)
2221 {
2222 m_numPipe = num_tile_columns;
2223 }
2224 else
2225 {
2226 m_numPipe = 1; // invalid tile column test cases and switch back to the single VDBOX mode
2227 }
2228 }
2229
2230 if (m_numPipe == 0 || m_numPipe > CODECHAL_ENCODE_VP9_MAX_NUM_HCP_PIPE)
2231 {
2232 m_numPipe = 1;
2233 }
2234
2235 if (m_numPipe > 1)
2236 {
2237 m_scalableMode = true; // KMD VE is now enabled by default. Mediasolo can also use the VE interface.
2238 }
2239 else
2240 {
2241 m_scalableMode = false;
2242 }
2243
2244 if (m_scalabilityState)
2245 {
2246 // Create/ re-use a GPU context with 2 pipes
2247 m_scalabilityState->ucScalablePipeNum = m_numPipe;
2248 }
2249
2250 return eStatus;
2251 }
2252
PlatformCapabilityCheck()2253 MOS_STATUS CodechalVdencVp9StateG12::PlatformCapabilityCheck()
2254 {
2255 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2256
2257 CODECHAL_ENCODE_FUNCTION_ENTER;
2258
2259 CODECHAL_ENCODE_CHK_STATUS_RETURN(DecideEncodingPipeNumber());
2260
2261 if (MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
2262 {
2263 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ChkGpuCtxReCreation(this, m_scalabilityState,
2264 (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
2265 }
2266
2267
2268 if (m_numPipe > 1)
2269 {
2270 m_singleTaskPhaseSupported = m_singleTaskPhaseSupportedInPak = false;
2271 }
2272
2273 //so far only validate Tiling for VDEnc VP9
2274 uint8_t col = (1 << (m_vp9PicParams->log2_tile_columns));
2275 uint8_t row = (1 << (m_vp9PicParams->log2_tile_rows));
2276
2277 // Handling invalid tiling and scalability cases. When NumTilingColumn does not match NumPipe fall back to single pipe mode
2278 if (m_numPipe > 1 && (col != m_numPipe))
2279 {
2280 if ((col == 1) || (row == 1))
2281 {
2282 m_numPipe = 1; // number of tile columns cannot be greater than number of pipes (VDBOX), run in single pipe mode
2283 m_scalableMode = false;
2284 }
2285 else
2286 {
2287 CODECHAL_ENCODE_ASSERTMESSAGE("Number of tile columns cannot be greater than number of pipes (VDBOX) when number of rows > 1");
2288 return MOS_STATUS_INVALID_PARAMETER;
2289 }
2290 }
2291
2292 //num columns must be either 2 or 4 for scalability mode, H/W limitation
2293 if ((m_numPipe > 1) && (m_numPipe != 2) && (m_numPipe != 4))
2294 {
2295 CODECHAL_ENCODE_ASSERTMESSAGE("Num pipes must be either 2 or 4 for scalability mode, H/W limitation");
2296 return MOS_STATUS_INVALID_PARAMETER;
2297 }
2298
2299 // Tile width needs to be minimum size 256, error out if less
2300 if ((col != 1) && ((m_vp9PicParams->SrcFrameWidthMinus1 + 1) < col * CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH))
2301 {
2302 CODECHAL_ENCODE_ASSERTMESSAGE("Incorrect number of columns input parameter, Tile width is < 256");
2303 return MOS_STATUS_INVALID_PARAMETER;
2304 }
2305
2306 if (row > 4)
2307 {
2308 CODECHAL_ENCODE_ASSERTMESSAGE("Max number of rows cannot exceeds 4 by VP9 Spec.");
2309 return MOS_STATUS_INVALID_PARAMETER;
2310 }
2311
2312 // number of tiles for this frame
2313 m_numberTilesInFrame = col * row;
2314 m_numUsedVdbox = m_numPipe;
2315
2316 if (!m_newSeq)
2317 {
2318 // If there is no new SEQ header, then the number of passes is decided here.
2319 // Otherwise, it is done in SetSequenceStructs. For example, BRC setting may be changed.
2320 m_numPasses = (m_numPassesInOnePipe + 1) * m_numPipe - 1;
2321 }
2322
2323 //Last place where scalable mode is decided
2324 if(m_frameNum == 0)
2325 {
2326 m_lastFrameScalableMode = m_scalableMode;
2327 }
2328
2329 return eStatus;
2330 }
2331
SetGpuCtxCreatOption()2332 MOS_STATUS CodechalVdencVp9StateG12::SetGpuCtxCreatOption()
2333 {
2334 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2335
2336 CODECHAL_ENCODE_FUNCTION_ENTER;
2337
2338 if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
2339 {
2340 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SetGpuCtxCreatOption());
2341 }
2342 else
2343 {
2344 m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS_ENHANCED);
2345 CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt);
2346
2347 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ConstructParmsForGpuCtxCreation(
2348 m_scalabilityState,
2349 (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
2350 }
2351
2352 return eStatus;
2353 }
2354
SetAndPopulateVEHintParams(PMOS_COMMAND_BUFFER cmdBuffer)2355 MOS_STATUS CodechalVdencVp9StateG12::SetAndPopulateVEHintParams(
2356 PMOS_COMMAND_BUFFER cmdBuffer)
2357 {
2358 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2359
2360 CODECHAL_ENCODE_FUNCTION_ENTER;
2361
2362 if (!MOS_VE_SUPPORTED(m_osInterface))
2363 {
2364 return eStatus;
2365 }
2366
2367 CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS scalSetParms;
2368 MOS_ZeroMemory(&scalSetParms, sizeof(CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS));
2369
2370 if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
2371 {
2372 scalSetParms.bNeedSyncWithPrevious = true;
2373 }
2374
2375 int32_t currentPass = GetCurrentPass();
2376 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
2377 // Scalable mode only
2378 if (m_scalableMode)
2379 {
2380 for (auto i = 0; i < m_numPipe; i++)
2381 {
2382 scalSetParms.veBatchBuffer[i] = m_veBatchBuffer[m_virtualEngineBBIndex][i][passIndex].OsResource;
2383 }
2384 }
2385
2386 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_SetHintParams(this, m_scalabilityState, &scalSetParms));
2387
2388 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2389 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_PopulateHintParams(m_scalabilityState, cmdBuffer));
2390
2391 return eStatus;
2392 }
2393
SetTileData()2394 MOS_STATUS CodechalVdencVp9StateG12::SetTileData()
2395 {
2396 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2397
2398 CODECHAL_ENCODE_FUNCTION_ENTER;
2399
2400 MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 *tileCodingParams = m_tileParams;
2401
2402 tileCodingParams->Mode = CODECHAL_ENCODE_MODE_VP9;
2403
2404 uint32_t numTileRows = (1 << m_vp9PicParams->log2_tile_rows);
2405 uint32_t numTileColumns = (1 << m_vp9PicParams->log2_tile_columns);
2406 uint32_t numTiles = numTileRows * numTileColumns;
2407
2408 uint32_t bitstreamSizePerTile = m_bitstreamUpperBound / (numTiles * CODECHAL_CACHELINE_SIZE);
2409 uint32_t numLcusInTiles = 0, numCuRecord = 64;
2410 uint32_t cuLevelStreamoutOffset = 0, sliceSizeStreamoutOffset = 0, bitstreamByteOffset = 0, sseRowstoreOffset = 0;
2411
2412 for (uint32_t tileCntr = 0; tileCntr < numTiles; tileCntr++)
2413 {
2414 uint32_t tileX, tileY, tileStartSbX, tileStartSbY, tileWidthInSb, tileHeightInSb, lastTileColWidth, lastTileRowHeight, numLcuInTile;
2415 bool isLastTileCol, isLastTileRow;
2416
2417 tileX = tileCntr % numTileColumns;
2418 tileY = tileCntr / numTileColumns;
2419
2420 isLastTileCol = ((numTileColumns - 1) == tileX);
2421 isLastTileRow = ((numTileRows - 1) == tileY);
2422
2423 tileStartSbX = (tileX * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns;
2424 tileStartSbY = (tileY * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows;
2425
2426 tileWidthInSb = (isLastTileCol ? m_picWidthInSb : (((tileX + 1) * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns)) - tileStartSbX;
2427 tileHeightInSb = (isLastTileRow ? m_picHeightInSb : (((tileY + 1) * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows)) - tileStartSbY;
2428
2429 lastTileColWidth = (MOS_ALIGN_CEIL((m_vp9PicParams->SrcFrameWidthMinus1 + 1 - tileStartSbX * CODEC_VP9_SUPER_BLOCK_WIDTH), CODEC_VP9_MIN_BLOCK_WIDTH) / CODEC_VP9_MIN_BLOCK_WIDTH) - 1;
2430 lastTileRowHeight = (MOS_ALIGN_CEIL((m_vp9PicParams->SrcFrameHeightMinus1 + 1 - tileStartSbY * CODEC_VP9_SUPER_BLOCK_HEIGHT), CODEC_VP9_MIN_BLOCK_HEIGHT) / CODEC_VP9_MIN_BLOCK_HEIGHT) - 1;
2431
2432 numLcuInTile = tileWidthInSb * tileHeightInSb;
2433 tileCodingParams[tileCntr].NumberOfActiveBePipes = m_numPipe;
2434 tileCodingParams[tileCntr].NumOfTilesInFrame = numTiles;
2435 tileCodingParams[tileCntr].NumOfTileColumnsInFrame = numTileColumns;
2436 tileCodingParams[tileCntr].TileStartLCUX = tileStartSbX;
2437 tileCodingParams[tileCntr].TileStartLCUY = tileStartSbY;
2438 tileCodingParams[tileCntr].IsLastTileofColumn = isLastTileRow;
2439 tileCodingParams[tileCntr].IsLastTileofRow = isLastTileCol;
2440
2441 tileCodingParams[tileCntr].TileWidthInMinCbMinus1 = isLastTileCol ? lastTileColWidth : (tileWidthInSb * CODEC_VP9_MIN_BLOCK_WIDTH) - 1;
2442 tileCodingParams[tileCntr].TileHeightInMinCbMinus1 = isLastTileRow ? lastTileRowHeight : (tileHeightInSb * CODEC_VP9_MIN_BLOCK_HEIGHT) - 1;
2443
2444 if (m_scalableMode)
2445 {
2446 sseRowstoreOffset = (tileStartSbX + (3 * tileX)) << 5;
2447
2448 tileCodingParams[tileCntr].CuRecordOffset = MOS_ALIGN_CEIL(((numCuRecord * numLcusInTiles) * 64),
2449 CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
2450 tileCodingParams[tileCntr].presHcpSyncBuffer = &m_hcpScalabilitySyncBuffer.sResource;
2451 tileCodingParams[tileCntr].SliceSizeStreamoutOffset = sliceSizeStreamoutOffset;
2452 tileCodingParams[tileCntr].SseRowstoreOffset = sseRowstoreOffset;
2453 tileCodingParams[tileCntr].BitstreamByteOffset = bitstreamByteOffset;
2454 tileCodingParams[tileCntr].CuLevelStreamoutOffset = cuLevelStreamoutOffset;
2455
2456 cuLevelStreamoutOffset += (tileCodingParams[tileCntr].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[tileCntr].TileHeightInMinCbMinus1 + 1);
2457 sliceSizeStreamoutOffset += (tileCodingParams[tileCntr].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[tileCntr].TileHeightInMinCbMinus1 + 1);
2458 sseRowstoreOffset += (numLcuInTile * m_sizeOfSseSrcPixelRowStoreBufferPerLcu) / CODECHAL_CACHELINE_SIZE;
2459 bitstreamByteOffset += bitstreamSizePerTile;
2460 numLcusInTiles += numLcuInTile;
2461
2462 tileCodingParams[tileCntr].TileSizeStreamoutOffset = (tileCntr*m_hcpInterface->GetPakHWTileSizeRecordSize() + CODECHAL_CACHELINE_SIZE - 1) / CODECHAL_CACHELINE_SIZE;
2463
2464 //DW5
2465 const uint32_t frameStatsStreamoutSize = m_brcPakStatsBufSize;
2466 tileCodingParams[tileCntr].PakTileStatisticsOffset = (tileCntr*frameStatsStreamoutSize + CODECHAL_CACHELINE_SIZE - 1) / CODECHAL_CACHELINE_SIZE;
2467
2468 //DW12
2469 tileCodingParams[tileCntr].Vp9ProbabilityCounterStreamoutOffset = ((tileCntr * m_probabilityCounterBufferSize) + (CODECHAL_CACHELINE_SIZE - 1)) / CODECHAL_CACHELINE_SIZE;
2470 }
2471 else
2472 {
2473 tileCodingParams[tileCntr].CuRecordOffset = 0;
2474 tileCodingParams[tileCntr].presHcpSyncBuffer = nullptr;
2475 tileCodingParams[tileCntr].SliceSizeStreamoutOffset = 0;
2476 tileCodingParams[tileCntr].SseRowstoreOffset = 0;
2477 tileCodingParams[tileCntr].BitstreamByteOffset = 0;
2478 tileCodingParams[tileCntr].CuLevelStreamoutOffset = 0;
2479 tileCodingParams[tileCntr].TileSizeStreamoutOffset = 0;
2480
2481 //DW5
2482 tileCodingParams[tileCntr].PakTileStatisticsOffset = 0;
2483
2484 //DW12
2485 tileCodingParams[tileCntr].Vp9ProbabilityCounterStreamoutOffset = 0;
2486 }
2487 }
2488
2489 return eStatus;
2490 }
2491
SetTileCommands(PMOS_COMMAND_BUFFER cmdBuffer)2492 MOS_STATUS CodechalVdencVp9StateG12::SetTileCommands(
2493 PMOS_COMMAND_BUFFER cmdBuffer)
2494 {
2495 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2496
2497 CODECHAL_ENCODE_FUNCTION_ENTER;
2498
2499 MHW_VDBOX_VDENC_WALKER_STATE_PARAMS_G12 vdencWalkerStateParams;
2500 vdencWalkerStateParams.Mode = CODECHAL_ENCODE_MODE_VP9;
2501 vdencWalkerStateParams.pVp9EncPicParams = m_vp9PicParams;
2502 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_SINGLE_PIPE;
2503
2504 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2505 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2506 // MFXPipeDone should not be set for tail insertion
2507 vdPipelineFlushParams.Flags.bWaitDoneMFX =
2508 (m_lastPicInStream || m_lastPicInSeq) ? 0 : 1;
2509 vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1;
2510 vdPipelineFlushParams.Flags.bFlushVDENC = 1;
2511 vdPipelineFlushParams.Flags.bFlushHEVC = 1;
2512 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2513
2514 if (IsFirstPipe() && IsFirstPass())
2515 {
2516 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData());
2517 }
2518
2519 MHW_VDBOX_VDENC_WEIGHT_OFFSET_PARAMS vdencWeightOffsetParams;
2520 uint32_t numTileColumns = (1 << m_vp9PicParams->log2_tile_columns);
2521 uint32_t numTileRows = (1 << m_vp9PicParams->log2_tile_rows);
2522 int currentPipe = GetCurrentPipe();
2523 for (uint32_t tileRow = 0, tileIdx = 0; tileRow < numTileRows; tileRow++)
2524 {
2525 for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++, tileIdx++)
2526 {
2527 if (m_numPipe > 1)
2528 {
2529 if (tileCol != currentPipe)
2530 {
2531 continue;
2532 }
2533 }
2534
2535 if (m_scalableMode)
2536 {
2537 MHW_MI_VD_CONTROL_STATE_PARAMS vdCtrlParam;
2538 //in scalability mode
2539 MOS_ZeroMemory(&vdCtrlParam, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
2540 vdCtrlParam.scalableModePipeLock = true;
2541 MhwMiInterfaceG12 *miInterfaceG12 = static_cast <MhwMiInterfaceG12 *>(m_miInterface);
2542 CODECHAL_ENCODE_CHK_STATUS_RETURN((miInterfaceG12)->AddMiVdControlStateCmd(cmdBuffer, &vdCtrlParam));
2543 }
2544
2545 // HCP_TILE_CODING commmand
2546 CODECHAL_ENCODE_CHK_STATUS_RETURN(static_cast<MhwVdboxHcpInterfaceG12 *>(m_hcpInterface)->AddHcpTileCodingCmd(cmdBuffer, &m_tileParams[tileIdx]));
2547
2548 MOS_ZeroMemory(&vdencWeightOffsetParams, sizeof(vdencWeightOffsetParams));
2549 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWeightsOffsetsStateCmd(cmdBuffer, nullptr, &vdencWeightOffsetParams));
2550
2551 vdencWalkerStateParams.pTileCodingParams = &m_tileParams[tileIdx];
2552 vdencWalkerStateParams.dwTileId = tileIdx;
2553 switch (m_numPipe)
2554 {
2555 case 0:
2556 case 1:
2557 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_SINGLE_PIPE;
2558 break;
2559 case 2:
2560 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_TWO_PIPE;
2561 break;
2562 case 4:
2563 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_FOUR_PIPE;
2564 break;
2565 default:
2566 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_INVALID;
2567 CODECHAL_ENCODE_ASSERTMESSAGE("Num Pipes invalid");
2568 return eStatus;
2569 break;
2570 }
2571 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWalkerStateCmd(cmdBuffer, &vdencWalkerStateParams));
2572
2573 if (m_scalableMode)
2574 {
2575 MHW_MI_VD_CONTROL_STATE_PARAMS vdCtrlParam;
2576 MOS_ZeroMemory(&vdCtrlParam, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
2577 vdCtrlParam.scalableModePipeUnlock = true;
2578 MhwMiInterfaceG12 *miInterfaceG12 = static_cast <MhwMiInterfaceG12 *>(m_miInterface);
2579 CODECHAL_ENCODE_CHK_STATUS_RETURN((miInterfaceG12)->AddMiVdControlStateCmd(cmdBuffer, &vdCtrlParam));
2580 }
2581
2582 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(cmdBuffer, &vdPipelineFlushParams));
2583 // Send MI_FLUSH command
2584 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2585 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2586 flushDwParams.bVideoPipelineCacheInvalidate = true;
2587 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
2588 }
2589 }
2590
2591 return eStatus;
2592 }
2593
StoreNumPasses(EncodeStatusBuffer * encodeStatusBuf,MhwMiInterface * miInterface,PMOS_COMMAND_BUFFER cmdBuffer,uint32_t currPass)2594 MOS_STATUS CodechalVdencVp9StateG12::StoreNumPasses(
2595 EncodeStatusBuffer *encodeStatusBuf,
2596 MhwMiInterface *miInterface,
2597 PMOS_COMMAND_BUFFER cmdBuffer,
2598 uint32_t currPass)
2599 {
2600 MHW_MI_STORE_DATA_PARAMS storeDataParams;
2601 uint32_t offset;
2602 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2603
2604 CODECHAL_ENCODE_FUNCTION_ENTER;
2605
2606 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusBuf);
2607 CODECHAL_ENCODE_CHK_NULL_RETURN(miInterface);
2608 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2609
2610 offset =
2611 (encodeStatusBuf->wCurrIndex * encodeStatusBuf->dwReportSize) +
2612 encodeStatusBuf->dwNumPassesOffset + // Num passes offset
2613 sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource
2614
2615 storeDataParams.pOsResource = &encodeStatusBuf->resStatusBuffer;
2616 storeDataParams.dwResourceOffset = offset;
2617 storeDataParams.dwValue = currPass + 1;
2618 CODECHAL_ENCODE_CHK_STATUS_RETURN(miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
2619
2620 return MOS_STATUS_SUCCESS;
2621 }
ExecuteTileLevel()2622 MOS_STATUS CodechalVdencVp9StateG12::ExecuteTileLevel()
2623 {
2624 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2625
2626 CODECHAL_ENCODE_FUNCTION_ENTER;
2627
2628 int currentPipe = GetCurrentPipe();
2629 int currentPass = GetCurrentPass();
2630
2631 if (currentPipe < 0 || currentPass < 0)
2632 {
2633 return MOS_STATUS_INVALID_PARAMETER;
2634 }
2635
2636 MOS_COMMAND_BUFFER cmdBuffer;
2637 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2638
2639 if (IsFirstPipe())
2640 {
2641 MHW_BATCH_BUFFER secondLevelBatchBuffer;
2642 MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(secondLevelBatchBuffer));
2643 secondLevelBatchBuffer.dwOffset = 0;
2644 secondLevelBatchBuffer.bSecondLevel = true;
2645
2646 if (!m_hucEnabled)
2647 {
2648 secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx];
2649 }
2650 else
2651 {
2652 secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderWriteBuffer;
2653 }
2654 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
2655 &cmdBuffer,
2656 &secondLevelBatchBuffer));
2657 }
2658
2659 // Setup Tile level PAK commands
2660 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileCommands(&cmdBuffer));
2661
2662 MHW_MI_VD_CONTROL_STATE_PARAMS vdCtrlParam;
2663 MOS_ZeroMemory(&vdCtrlParam, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
2664 vdCtrlParam.memoryImplicitFlush = true;
2665 MhwMiInterfaceG12 *miInterfaceG12 = static_cast <MhwMiInterfaceG12 *>(m_miInterface);
2666 CODECHAL_ENCODE_CHK_STATUS_RETURN((miInterfaceG12)->AddMiVdControlStateCmd(&cmdBuffer, &vdCtrlParam));
2667
2668 // Send VD_PIPELINE_FLUSH command
2669 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2670 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2671 // MFXPipeDone should not be set for tail insertion
2672 vdPipelineFlushParams.Flags.bWaitDoneMFX = 1;
2673 vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
2674 vdPipelineFlushParams.Flags.bFlushHEVC = 1;
2675 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2676 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
2677
2678 // Send MI_FLUSH command
2679 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2680 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2681 flushDwParams.bVideoPipelineCacheInvalidate = true;
2682 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2683
2684 if (IsFirstPipe())
2685 {
2686 if (m_numPipe > 1 && m_enableTileStitchByHW)
2687 {
2688 for (auto i = 1; i < m_numPipe; i++)
2689 {
2690 if (!Mos_ResourceIsNull(&m_stitchWaitSemaphoreMem[i].sResource) && m_hucEnabled)
2691 {
2692 // This semaphore waits for all pipes except pipe 1 vdenc+pak to finish processing before stitching bitstream
2693 SendHWWaitCommand(&m_stitchWaitSemaphoreMem[i].sResource, &cmdBuffer, (currentPass + 1));
2694 SetSemaphoreMem(&m_stitchWaitSemaphoreMem[i].sResource, &cmdBuffer, 0); // Reset above semaphore
2695 }
2696 }
2697 }
2698 // PAK integration kernel to integrate stats for next HUC pass
2699 if (m_scalableMode && m_hucEnabled && m_isTilingSupported && IsFirstPipe())
2700 {
2701 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCVp9PakInt(&cmdBuffer));
2702 // Signal pak int done semaphore here for next pass to proceed
2703 if (!IsLastPass())
2704 {
2705 SetSemaphoreMem(&m_pakIntDoneSemaphoreMem.sResource, &cmdBuffer, (currentPass + 1));
2706 }
2707
2708 if (m_enableTileStitchByHW)
2709 {
2710 // 2nd level BB buffer for stitching cmd
2711 // current location to add cmds in 2nd level batch buffer
2712 m_HucStitchCmdBatchBuffer.iCurrent = 0;
2713 // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
2714 m_HucStitchCmdBatchBuffer.dwOffset = 0;
2715 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_HucStitchCmdBatchBuffer));
2716 // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases
2717 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, m_osInterface->osCpInterface->IsCpEnabled() ? true : false));
2718 }
2719 }
2720
2721 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2722
2723 if (!m_scalableMode) // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer
2724 {
2725 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
2726 }
2727 }
2728 else // 2nd Pipe
2729 {
2730 // Signal stitch command to proceed because vdenc+pak is done in this pipe and we can stitch bs
2731 if (m_hucEnabled && m_isTilingSupported && !Mos_ResourceIsNull(&m_stitchWaitSemaphoreMem[currentPipe].sResource))
2732 {
2733 SetSemaphoreMem(&m_stitchWaitSemaphoreMem[currentPipe].sResource, &cmdBuffer, (currentPass + 1));
2734 }
2735 }
2736
2737 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2738 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2739 if (m_singleTaskPhaseSupported && m_hucEnabled && IsLastPass())
2740 {
2741 m_lastTaskInPhase = true; //HPU singletask phase mode only
2742 }
2743 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreNumPasses(
2744 &(m_encodeStatusBuf),
2745 m_miInterface,
2746 &cmdBuffer,
2747 m_currPass));
2748
2749 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase || m_scalableMode)
2750 {
2751 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
2752 }
2753
2754 std::string currPassName = "PAK_PASS" + std::to_string((int)m_currPass);
2755 if ((m_dysRefFrameFlags != DYS_REF_NONE) && m_dysVdencMultiPassEnabled)
2756 {
2757 // Added extra symbol into log to avoid log's file overwrite on the next pass
2758 // For DYS Mutlipass mode next pass should run with "m_currPass = 0" again
2759 // See ExecutePictureLevel() function for all details
2760 currPassName.append("_0");
2761 }
2762 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
2763 &cmdBuffer,
2764 CODECHAL_NUM_MEDIA_STATES,
2765 currPassName.data())));
2766
2767 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2768
2769 if (IsLastPipe())
2770 {
2771 if (m_hucEnabled)
2772 {
2773 // We save the index of the 2nd level batch buffer in case there is a pass that needs the last SLBB
2774 m_lastVdencPictureState2ndLevelBBIndex = m_vdencPictureState2ndLevelBBIndex;
2775 }
2776 m_vdencPictureState2ndLevelBBIndex = (m_vdencPictureState2ndLevelBBIndex + 1) % CODECHAL_VP9_ENCODE_RECYCLED_BUFFER_NUM;
2777 }
2778
2779 if (IsFirstPipe() &&
2780 m_waitForEnc &&
2781 IsFirstPass() &&
2782 !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
2783 {
2784 MOS_SYNC_PARAMS syncParams;
2785 syncParams = g_cInitSyncParams;
2786 syncParams.GpuContext = m_videoContext;
2787 syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
2788
2789 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
2790 m_waitForEnc = false;
2791 }
2792
2793 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2794 {
2795 bool renderFlags = m_videoContextUsesNullHw;
2796
2797 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderFlags));
2798 m_lastTaskInPhase = false;
2799
2800 CODECHAL_DEBUG_TOOL(
2801 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
2802 m_resVdencPakObjCmdStreamOutBuffer,
2803 CodechalDbgAttr::attrPakObjStreamout,
2804 currPassName.data(),
2805 m_mbCodeSize + 8 * CODECHAL_CACHELINE_SIZE,
2806 0,
2807 CODECHAL_NUM_MEDIA_STATES));
2808
2809 if (m_vp9PicParams->PicFlags.fields.segmentation_enabled) {
2810 //CodecHal_DbgDumpEncodeVp9SegmentStreamout(m_debugInterface, m_encoder);
2811 //m_debugInterface->DumpBuffer(
2812 // (PCODECHAL_ENCODE_VP9_STATE)pvStandardState.resVdencSegmentMapStreamOut,
2813 // CodechalDbgAttr::attrOutput,
2814 // "SegMap_Out",
2815 // CODECHAL_CACHELINE_SIZE * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameHeight, 64) * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameWidth, 64),
2816 // 0,
2817 // CODECHAL_MEDIA_STATE_VP9_PAK_LUMA_RECON);
2818 }
2819
2820 if (m_mmcState && !m_mmcUserFeatureUpdated) {
2821 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
2822 m_mmcUserFeatureUpdated = true;
2823 });
2824 }
2825
2826 // Reset parameters for next PAK execution
2827 if (IsLastPipe() && IsLastPass())
2828 {
2829 if (m_vp9PicParams->PicFlags.fields.super_frame && m_tsEnabled)
2830 {
2831 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructSuperFrame());
2832 }
2833
2834 if ((currentPipe == 0) &&
2835 m_signalEnc &&
2836 !Mos_ResourceIsNull(&m_resSyncObjectVideoContextInUse))
2837 {
2838 // signal semaphore
2839 MOS_SYNC_PARAMS syncParams;
2840 syncParams = g_cInitSyncParams;
2841 syncParams.GpuContext = m_videoContext;
2842 syncParams.presSyncResource = &m_resSyncObjectVideoContextInUse;
2843
2844 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
2845 m_semaphoreObjCount++;
2846 }
2847
2848 m_prevFrameInfo.KeyFrame = !m_vp9PicParams->PicFlags.fields.frame_type;
2849 m_prevFrameInfo.IntraOnly = (m_vp9PicParams->PicFlags.fields.frame_type == CODEC_VP9_KEY_FRAME) || m_vp9PicParams->PicFlags.fields.intra_only;
2850 m_prevFrameInfo.ShowFrame = m_vp9PicParams->PicFlags.fields.show_frame;
2851 m_prevFrameInfo.FrameWidth = m_oriFrameWidth;
2852 m_prevFrameInfo.FrameHeight = m_oriFrameHeight;
2853 m_currMvTemporalBufferIndex ^= 0x01;
2854 m_contextFrameTypes[m_vp9PicParams->PicFlags.fields.frame_context_idx] = m_vp9PicParams->PicFlags.fields.frame_type;
2855 m_prevFrameSegEnabled = m_vp9PicParams->PicFlags.fields.segmentation_enabled;
2856
2857 // Reset parameters for next PAK execution
2858 if ((!m_singleTaskPhaseSupported) && (IsLastPass()))
2859 {
2860 m_osInterface->pfnResetPerfBufferID(m_osInterface);
2861 }
2862
2863 m_newPpsHeader = 0;
2864 m_newSeqHeader = 0;
2865 m_frameNum++;
2866 //Save the last frame's scalable mode flag to prevent switching buffers when doing next pass
2867 m_lastFrameScalableMode = m_scalableMode;
2868 }
2869
2870 return eStatus;
2871 }
2872
ExecuteSliceLevel()2873 MOS_STATUS CodechalVdencVp9StateG12::ExecuteSliceLevel()
2874 {
2875 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2876
2877 CODECHAL_ENCODE_FUNCTION_ENTER;
2878
2879 return ExecuteTileLevel();
2880 }
2881
SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS & pipeModeSelectParams)2882 void CodechalVdencVp9StateG12::SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS& pipeModeSelectParams)
2883 {
2884 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2885
2886 CODECHAL_ENCODE_FUNCTION_ENTER;
2887
2888 CodechalVdencVp9State::SetHcpPipeModeSelectParams(pipeModeSelectParams);
2889
2890 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12& pipeModeSelectParamsG12 = static_cast<MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12&>(pipeModeSelectParams);
2891
2892 pipeModeSelectParamsG12.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
2893 pipeModeSelectParamsG12.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY;
2894 pipeModeSelectParamsG12.bDynamicScalingEnabled = (m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled;
2895 if (m_scalableMode)
2896 {
2897 // Running in the multiple VDBOX mode
2898 if (IsFirstPipe())
2899 {
2900 pipeModeSelectParamsG12.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT;
2901 }
2902 else
2903 {
2904 if (IsLastPipe())
2905 {
2906 pipeModeSelectParamsG12.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT;
2907 }
2908 else
2909 {
2910 pipeModeSelectParamsG12.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE;
2911 }
2912 }
2913
2914 pipeModeSelectParamsG12.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_CODEC_BE;
2915 }
2916
2917 return;
2918 }
2919
SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS & indObjBaseAddrParams)2920 void CodechalVdencVp9StateG12::SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS& indObjBaseAddrParams)
2921 {
2922 CodechalVdencVp9State::SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
2923
2924 PCODECHAL_ENCODE_BUFFER tileRecordBuffer = &m_tileRecordBuffer[m_virtualEngineBBIndex];
2925 bool useTileRecordBuffer = !Mos_ResourceIsNull(&tileRecordBuffer->sResource);
2926
2927 if (m_scalableMode && m_hucEnabled && m_isTilingSupported)
2928 {
2929 // overwrite presProbabilityCounterBuffer and it's params for scalable mode
2930 indObjBaseAddrParams.presProbabilityCounterBuffer = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource;
2931 indObjBaseAddrParams.dwProbabilityCounterOffset = m_tileStatsOffset.counterBuffer;
2932 indObjBaseAddrParams.dwProbabilityCounterSize = m_statsSize.counterBuffer;
2933 }
2934
2935 // Need to use presPakTileSizeStasBuffer instead of presTileRecordBuffer, so setting to null
2936 indObjBaseAddrParams.presTileRecordBuffer = nullptr;
2937 indObjBaseAddrParams.dwTileRecordSize = 0;
2938 indObjBaseAddrParams.presPakTileSizeStasBuffer = useTileRecordBuffer? &tileRecordBuffer->sResource : nullptr;
2939 indObjBaseAddrParams.dwPakTileSizeStasBufferSize = useTileRecordBuffer? ((m_statsSize.tileSizeRecord) * GetNumTilesInFrame()) : 0;
2940 indObjBaseAddrParams.dwPakTileSizeRecordOffset = useTileRecordBuffer? m_tileStatsOffset.tileSizeRecord: 0;
2941 }
2942
VerifyCommandBufferSize()2943 MOS_STATUS CodechalVdencVp9StateG12::VerifyCommandBufferSize()
2944 {
2945 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2946
2947 CODECHAL_ENCODE_FUNCTION_ENTER;
2948
2949 if (UseLegacyCommandBuffer()) // legacy mode & resize CommandBuffer Size for every BRC pass
2950 {
2951 if (!m_singleTaskPhaseSupported)
2952 {
2953 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
2954 }
2955 }
2956 else // virtual engine
2957 {
2958 uint32_t requestedSize =
2959 m_pictureStatesSize +
2960 m_picturePatchListSize +
2961 m_extraPictureStatesSize +
2962 (m_sliceStatesSize * m_numSlices);
2963 requestedSize += requestedSize*m_numPassesInOnePipe;
2964 if (m_hucEnabled && m_brcEnabled)
2965 {
2966 requestedSize += m_brcMaxNumPasses*(m_defaultHucCmdsSize + m_defaultHucPatchListSize);
2967 }
2968 // Running in the multiple VDBOX mode
2969 int currentPipe = GetCurrentPipe();
2970 int currentPass = GetCurrentPass();
2971 if (currentPipe < 0 || currentPipe >= m_numPipe)
2972 {
2973 return MOS_STATUS_INVALID_PARAMETER;
2974 }
2975 if (currentPass < 0 || currentPass >= m_brcMaxNumPasses)
2976 {
2977 return MOS_STATUS_INVALID_PARAMETER;
2978 }
2979
2980 if (IsFirstPipe() && m_osInterface->bUsesPatchList)
2981 {
2982 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
2983 }
2984 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
2985 PMOS_COMMAND_BUFFER cmdBuffer;
2986 if (m_osInterface->phasedSubmission)
2987 {
2988 m_osInterface->pfnVerifyCommandBufferSize(m_osInterface, requestedSize, 0);
2989 return eStatus;
2990 }
2991 else
2992 {
2993 cmdBuffer = &m_veBatchBuffer[m_virtualEngineBBIndex][(uint32_t)currentPipe][passIndex];
2994 }
2995
2996 if (Mos_ResourceIsNull(&cmdBuffer->OsResource) ||
2997 m_sizeOfVEBatchBuffer < requestedSize)
2998 {
2999 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3000
3001 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3002 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3003 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3004 allocParamsForBufferLinear.Format = Format_Buffer;
3005 allocParamsForBufferLinear.dwBytes = requestedSize;
3006 allocParamsForBufferLinear.pBufName = "Batch buffer for each VDBOX";
3007
3008 if (!Mos_ResourceIsNull(&cmdBuffer->OsResource))
3009 {
3010 if (cmdBuffer->pCmdBase)
3011 {
3012 m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
3013 }
3014 m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
3015 }
3016
3017 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
3018 m_osInterface,
3019 &allocParamsForBufferLinear,
3020 &cmdBuffer->OsResource);
3021 CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
3022
3023 m_sizeOfVEBatchBuffer = requestedSize;
3024 }
3025
3026 if (cmdBuffer->pCmdBase == 0)
3027 {
3028 MOS_LOCK_PARAMS lockParams;
3029 MOS_ZeroMemory(&lockParams, sizeof(lockParams));
3030 lockParams.WriteOnly = true;
3031 cmdBuffer->pCmdPtr = cmdBuffer->pCmdBase = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &cmdBuffer->OsResource, &lockParams);
3032 cmdBuffer->iRemaining = m_sizeOfVEBatchBuffer;
3033 cmdBuffer->iOffset = 0;
3034
3035 if (cmdBuffer->pCmdBase == nullptr)
3036 {
3037 return MOS_STATUS_NULL_POINTER;
3038 }
3039 }
3040 }
3041
3042 return eStatus;
3043 }
3044
GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)3045 MOS_STATUS CodechalVdencVp9StateG12::GetCommandBuffer(
3046 PMOS_COMMAND_BUFFER cmdBuffer)
3047 {
3048 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3049
3050 CODECHAL_ENCODE_FUNCTION_ENTER;
3051
3052 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
3053
3054 if (UseLegacyCommandBuffer()) // legacy mode
3055 {
3056 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, 0));
3057 }
3058 else // virtual engine
3059 {
3060 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &m_realCmdBuffer, 0));
3061 if (m_osInterface->phasedSubmission)
3062 {
3063 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, GetCurrentPipe() + 1));
3064
3065 CodecHalEncodeScalability_EncodePhaseToSubmissionType(IsFirstPipe(), cmdBuffer);
3066 if (IsLastPipe())
3067 {
3068 cmdBuffer->iSubmissionType |= SUBMISSION_TYPE_MULTI_PIPE_FLAGS_LAST_PIPE;
3069 }
3070 }
3071 else
3072 {
3073 int currentPipe = GetCurrentPipe();
3074 int currentPass = GetCurrentPass();
3075 if (currentPipe < 0 || currentPipe >= m_numPipe)
3076 {
3077 return MOS_STATUS_INVALID_PARAMETER;
3078 }
3079 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
3080 *cmdBuffer = m_veBatchBuffer[m_virtualEngineBBIndex][currentPipe][passIndex];
3081 }
3082 }
3083
3084 return eStatus;
3085 }
3086
ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)3087 MOS_STATUS CodechalVdencVp9StateG12::ReturnCommandBuffer(
3088 PMOS_COMMAND_BUFFER cmdBuffer)
3089 {
3090 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3091
3092 CODECHAL_ENCODE_FUNCTION_ENTER;
3093
3094 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
3095
3096 if (UseLegacyCommandBuffer()) // legacy mode
3097 {
3098 m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, 0);
3099 }
3100 else // virtual engine
3101 {
3102 if (m_osInterface->phasedSubmission)
3103 {
3104 m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, GetCurrentPipe() + 1);
3105 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
3106 }
3107 else
3108 {
3109
3110 int currentPipe = GetCurrentPipe();
3111 int currentPass = GetCurrentPass();
3112 if (currentPipe < 0 || currentPipe >= m_numPipe)
3113 {
3114 return MOS_STATUS_INVALID_PARAMETER;
3115 }
3116
3117 if (eStatus == MOS_STATUS_SUCCESS)
3118 {
3119 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
3120 m_veBatchBuffer[m_virtualEngineBBIndex][currentPipe][passIndex] = *cmdBuffer;
3121 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
3122 }
3123 }
3124 }
3125
3126 return eStatus;
3127 }
3128
SubmitCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer,bool bNullRendering)3129 MOS_STATUS CodechalVdencVp9StateG12::SubmitCommandBuffer(
3130 PMOS_COMMAND_BUFFER cmdBuffer,
3131 bool bNullRendering)
3132 {
3133 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3134
3135 CODECHAL_ENCODE_FUNCTION_ENTER;
3136
3137 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
3138
3139 if (UseLegacyCommandBuffer()) // legacy mode
3140 {
3141 if (!IsRenderContext()) // Set VE Hints for video contexts only
3142 {
3143 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(cmdBuffer));
3144 }
3145 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, bNullRendering));
3146 }
3147 else // virtual engine
3148 {
3149 if (!IsLastPipe())
3150 {
3151 return eStatus;
3152 }
3153
3154 if (m_osInterface->phasedSubmission)
3155 {
3156 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
3157 }
3158 else
3159 {
3160 int currentPass = GetCurrentPass();
3161 for (auto i = 0; i < m_numPipe; i++)
3162 {
3163 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
3164 PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[m_virtualEngineBBIndex][i][passIndex];
3165
3166 if (cmdBuffer->pCmdBase)
3167 {
3168 m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
3169 }
3170
3171 cmdBuffer->pCmdBase = 0;
3172 cmdBuffer->iOffset = cmdBuffer->iRemaining = 0;
3173 }
3174
3175 if (eStatus == MOS_STATUS_SUCCESS)
3176 {
3177 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(&m_realCmdBuffer));
3178 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
3179 }
3180 }
3181 }
3182
3183 return eStatus;
3184 }
3185
SendPrologWithFrameTracking(PMOS_COMMAND_BUFFER cmdBuffer,bool frameTrackingRequested,MHW_MI_MMIOREGISTERS * mmioRegister)3186 MOS_STATUS CodechalVdencVp9StateG12::SendPrologWithFrameTracking(
3187 PMOS_COMMAND_BUFFER cmdBuffer,
3188 bool frameTrackingRequested,
3189 MHW_MI_MMIOREGISTERS *mmioRegister)
3190 {
3191 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3192
3193 CODECHAL_ENCODE_FUNCTION_ENTER;
3194
3195 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
3196
3197 MOS_GPU_CONTEXT gpuContext = m_osInterface->pfnGetGpuContext(m_osInterface);
3198
3199 if (IsRenderContext()) //Render context only
3200 {
3201 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SendPrologWithFrameTracking(cmdBuffer, frameTrackingRequested, mmioRegister));
3202 return eStatus;
3203 }
3204 else // Legacy mode or virtual engine
3205 {
3206 #ifdef _MMC_SUPPORTED
3207 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
3208 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SendPrologCmd(m_miInterface, cmdBuffer, gpuContext));
3209 #endif
3210
3211 if (!IsLastPipe())
3212 {
3213 return eStatus;
3214 }
3215 PMOS_COMMAND_BUFFER commandBufferInUse;
3216 if (m_realCmdBuffer.pCmdBase)
3217 {
3218 commandBufferInUse = &m_realCmdBuffer; //virtual engine mode
3219 }
3220 else
3221 {
3222 if (cmdBuffer && cmdBuffer->pCmdBase)
3223 {
3224 commandBufferInUse = cmdBuffer; //legacy mode
3225 }
3226 else
3227 {
3228 eStatus = MOS_STATUS_INVALID_PARAMETER;
3229 return eStatus;
3230 }
3231 }
3232
3233 commandBufferInUse->Attributes.bTurboMode = m_hwInterface->m_turboMode;
3234 commandBufferInUse->Attributes.dwNumRequestedEUSlices = m_hwInterface->m_numRequestedEuSlices;
3235 commandBufferInUse->Attributes.dwNumRequestedSubSlices = m_hwInterface->m_numRequestedSubSlices;
3236 commandBufferInUse->Attributes.dwNumRequestedEUs = m_hwInterface->m_numRequestedEus;
3237 commandBufferInUse->Attributes.bValidPowerGatingRequest = true;
3238
3239 if (frameTrackingRequested && m_frameTrackingEnabled)
3240 {
3241 commandBufferInUse->Attributes.bEnableMediaFrameTracking = true;
3242 commandBufferInUse->Attributes.resMediaFrameTrackingSurface =
3243 &m_encodeStatusBuf.resStatusBuffer;
3244 commandBufferInUse->Attributes.dwMediaFrameTrackingTag = m_storeData;
3245 // Set media frame tracking address offset(the offset from the encoder status buffer page)
3246 commandBufferInUse->Attributes.dwMediaFrameTrackingAddrOffset = 0;
3247 }
3248
3249 MHW_GENERIC_PROLOG_PARAMS genericPrologParams;
3250 MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
3251 genericPrologParams.pOsInterface = m_hwInterface->GetOsInterface();
3252 genericPrologParams.pvMiInterface = m_hwInterface->GetMiInterface();
3253 genericPrologParams.bMmcEnabled = m_mmcState ? m_mmcState->IsMmcEnabled() : false;
3254 genericPrologParams.dwStoreDataValue = m_storeData - 1;
3255 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(commandBufferInUse, &genericPrologParams));
3256 }
3257
3258 return eStatus;
3259 }
3260
SetSemaphoreMem(PMOS_RESOURCE semaphoreMem,PMOS_COMMAND_BUFFER cmdBuffer,uint32_t value)3261 MOS_STATUS CodechalVdencVp9StateG12::SetSemaphoreMem(
3262 PMOS_RESOURCE semaphoreMem,
3263 PMOS_COMMAND_BUFFER cmdBuffer,
3264 uint32_t value)
3265 {
3266 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3267
3268 CODECHAL_ENCODE_FUNCTION_ENTER;
3269
3270 CODECHAL_ENCODE_CHK_NULL_RETURN(semaphoreMem);
3271 MHW_MI_STORE_DATA_PARAMS storeDataParams;
3272 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3273 storeDataParams.pOsResource = semaphoreMem;
3274 storeDataParams.dwResourceOffset = 0;
3275 storeDataParams.dwValue = value;
3276
3277 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
3278 cmdBuffer,
3279 &storeDataParams));
3280
3281 return eStatus;
3282 }
3283
SendHWWaitCommand(PMOS_RESOURCE semaphoreMem,PMOS_COMMAND_BUFFER cmdBuffer,uint32_t value)3284 MOS_STATUS CodechalVdencVp9StateG12::SendHWWaitCommand(
3285 PMOS_RESOURCE semaphoreMem,
3286 PMOS_COMMAND_BUFFER cmdBuffer,
3287 uint32_t value)
3288 {
3289 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3290
3291 CODECHAL_ENCODE_FUNCTION_ENTER;
3292 CODECHAL_ENCODE_CHK_NULL_RETURN(semaphoreMem);
3293
3294 MHW_MI_SEMAPHORE_WAIT_PARAMS semaphoreWaitParams;
3295 MOS_ZeroMemory(&semaphoreWaitParams, sizeof(semaphoreWaitParams));
3296 semaphoreWaitParams.presSemaphoreMem = semaphoreMem;
3297 semaphoreWaitParams.bPollingWaitMode = true;
3298 semaphoreWaitParams.dwSemaphoreData = value;
3299 semaphoreWaitParams.CompareOperation = MHW_MI_SAD_EQUAL_SDD;
3300 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiSemaphoreWaitCmd(cmdBuffer, &semaphoreWaitParams));
3301
3302 return eStatus;
3303 }
3304
SetDmemHuCPakInt()3305 MOS_STATUS CodechalVdencVp9StateG12::SetDmemHuCPakInt()
3306 {
3307 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3308
3309 CODECHAL_ENCODE_FUNCTION_ENTER;
3310
3311 uint8_t currPass = (uint8_t)GetCurrentPass();
3312
3313 MOS_LOCK_PARAMS lockFlags;
3314 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3315 lockFlags.WriteOnly = 1;
3316 // All bytes in below dmem for fields not used by VP9 to be set to 0xFF.
3317 HucPakIntDmem* dmem = (HucPakIntDmem*)m_osInterface->pfnLockResource(
3318 m_osInterface, &m_hucPakIntDmemBuffer[m_currRecycledBufIdx][currPass], &lockFlags);
3319 CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
3320
3321 MOS_ZeroMemory(dmem, sizeof(HucPakIntDmem));
3322 // CODECHAL_VDENC_VP9_PAK_INT_DMEM_OFFSETS_SIZE size of offsets in the CODECHAL_VDENC_VP9_HUC_PAK_INT_DMEM struct.
3323 // Reset offsets to 0xFFFFFFFF as unavailable
3324 memset(dmem, 0xFF, m_pakIntDmemOffsetsSize);
3325
3326 dmem->totalSizeInCommandBuffer = GetNumTilesInFrame() * CODECHAL_CACHELINE_SIZE;
3327 dmem->offsetInCommandBuffer = 0xFFFF; // Not used for VP9, all bytes in dmem for fields not used are 0xFF
3328 dmem->picWidthInPixel = (uint16_t)m_frameWidth;
3329 dmem->picHeightInPixel = (uint16_t)m_frameHeight;
3330 dmem->totalNumberOfPaks = m_numPipe;
3331 dmem->codec = m_pakIntVp9CodecId;
3332 dmem->maxPass = m_brcMaxNumPasses; // Only VDEnc CQP and BRC
3333 dmem->currentPass = currPass + 1;
3334 dmem->lastTileBSStartInBytes = m_tileParams[GetNumTilesInFrame() - 1].TileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8;
3335 dmem->picStateStartInBytes = 0xFFFF;
3336
3337 if (m_enableTileStitchByHW)
3338 {
3339 dmem->StitchEnable = true;
3340 dmem->StitchCommandOffset = 0;
3341 dmem->BBEndforStitch = HUC_BATCH_BUFFER_END;
3342 }
3343
3344 // Offset 0 is for region 1 - output of integrated frame stats from PAK integration kernel
3345
3346 dmem->tileSizeRecordOffset[0] = m_frameStatsOffset.tileSizeRecord;
3347 dmem->vdencStatOffset[0] = m_frameStatsOffset.vdencStats;
3348 dmem->vp9PakStatOffset[0] = m_frameStatsOffset.pakStats;
3349 dmem->vp9CounterBufferOffset[0] = m_frameStatsOffset.counterBuffer;
3350
3351 //Offset 1 - 4 is for region 0 - Input to PAK integration kernel for all tile statistics per pipe
3352 for (auto i = 1; i <= m_numPipe; i++)
3353 {
3354 dmem->numTiles[i - 1] = (GetNumTilesInFrame()) / m_numPipe;
3355 dmem->tileSizeRecordOffset[i] = m_tileStatsOffset.tileSizeRecord + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.tileSizeRecord);
3356 dmem->vdencStatOffset[i] = m_tileStatsOffset.vdencStats + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.vdencStats);
3357 dmem->vp9PakStatOffset[i] = m_tileStatsOffset.pakStats + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.pakStats);
3358 dmem->vp9CounterBufferOffset[i] = m_tileStatsOffset.counterBuffer + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.counterBuffer);
3359 }
3360 m_osInterface->pfnUnlockResource(m_osInterface, &m_hucPakIntDmemBuffer[m_currRecycledBufIdx][currPass]);
3361
3362 return eStatus;
3363 }
3364
SetSequenceStructs()3365 MOS_STATUS CodechalVdencVp9StateG12::SetSequenceStructs()
3366 {
3367 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3368
3369 CODECHAL_ENCODE_FUNCTION_ENTER;
3370
3371 #if defined(LINUX) && !defined(WDDM_LINUX)
3372 MOS_SURFACE rawSurface;
3373 PCODEC_VP9_ENCODE_SEQUENCE_PARAMS seqParams = (PCODEC_VP9_ENCODE_SEQUENCE_PARAMS)m_encodeParams.pSeqParams;
3374 rawSurface = *(m_encodeParams.psRawSurface);
3375
3376 if (rawSurface.OsResource.Format == Format_A8R8G8B8 ||
3377 rawSurface.OsResource.Format == Format_B10G10R10A2)
3378 {
3379 seqParams->SeqFlags.fields.DisplayFormatSwizzle = 1;
3380 }
3381 #endif
3382
3383 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::SetSequenceStructs());
3384
3385 // All pipe need to go through the picture-level and slice-level commands
3386 m_numPassesInOnePipe = m_numPasses;
3387 m_numPasses = (m_numPasses + 1) * m_numPipe - 1;
3388
3389 return eStatus;
3390 }
3391
SetPictureStructs()3392 MOS_STATUS CodechalVdencVp9StateG12::SetPictureStructs()
3393 {
3394 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3395
3396 CODECHAL_ENCODE_FUNCTION_ENTER;
3397
3398 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::SetPictureStructs());
3399
3400 m_virtualEngineBBIndex = m_currOriginalPic.FrameIdx;
3401 m_picWidthInMinBlk =
3402 MOS_ALIGN_CEIL(m_oriFrameWidth, CODEC_VP9_MIN_BLOCK_WIDTH);
3403 m_picHeightInMinBlk =
3404 MOS_ALIGN_CEIL(m_oriFrameHeight, CODEC_VP9_MIN_BLOCK_WIDTH);
3405
3406 // When buffers start recycling , we need to know the index of last buffer for next frame.
3407 if (m_dysRefFrameFlags != DYS_REF_NONE && m_dysVdencMultiPassEnabled)
3408 {
3409 if (!m_hucEnabled)
3410 {
3411 m_numPassesInOnePipe = (m_dysRefFrameFlags != DYS_REF_NONE);
3412 }
3413 if (m_vdencBrcEnabled)
3414 {
3415 //Reduce per pipe passes by 1, as m_numPassesInOnePipe == 1 becomes m_numPassesInOnePipe = 0 for Huc to run
3416 m_dysBrc = true;
3417 m_numPassesInOnePipe = (m_numPassesInOnePipe > 0 ) ? m_numPassesInOnePipe - 1 : m_numPassesInOnePipe;
3418 }
3419 else
3420 {
3421 m_dysCqp = true;
3422 }
3423 m_numPasses = (m_numPassesInOnePipe + 1) * m_numPipe - 1;
3424 }
3425 // This is BRC DYS SinglePass case
3426 // Actually, repak is disabled
3427 if (m_vdencBrcEnabled && (m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
3428 {
3429 m_dysBrc = true;
3430 m_numPassesInOnePipe = 1;
3431 m_numPasses = (m_numPassesInOnePipe + 1) * m_numPipe - 1;
3432 }
3433 if (!m_vdencBrcEnabled && (m_dysRefFrameFlags != DYS_REF_NONE))
3434 {
3435 m_dysCqp = true;
3436 }
3437
3438 #ifdef _MMC_SUPPORTED
3439 //WA to clear CCS by VE resolve
3440 if (MEDIA_IS_WA(m_waTable, Wa_1408785368))
3441 {
3442 bool clearccswa = false;
3443 MOS_SURFACE surfaceDetails = {};
3444 m_osInterface->pfnGetResourceInfo(m_osInterface, &m_reconSurface.OsResource, &surfaceDetails);
3445
3446 // Restore CCS if the surface's width/height is not aligned with that of current frame due to resolution change
3447 if ((m_frameNum != 0) &&
3448 ((surfaceDetails.dwWidth != m_picWidthInMinBlk) ||
3449 (surfaceDetails.dwHeight != m_picHeightInMinBlk)))
3450 {
3451 clearccswa = true;
3452 }
3453
3454 if (clearccswa && m_mmcState && m_mmcState->IsMmcEnabled())
3455 {
3456 m_osInterface->pfnDecompResource(m_osInterface, &m_reconSurface.OsResource);
3457 m_osInterface->pfnSetGpuContext(m_osInterface, m_renderContext);
3458 }
3459 }
3460 #endif
3461
3462 return eStatus;
3463 }
3464
ExecutePictureLevel()3465 MOS_STATUS CodechalVdencVp9StateG12::ExecutePictureLevel()
3466 {
3467 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3468
3469 CODECHAL_ENCODE_FUNCTION_ENTER;
3470
3471 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifyCommandBufferSize());
3472
3473 PerfTagSetting perfTag;
3474 perfTag.Value = 0;
3475 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
3476 perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE;
3477 perfTag.PictureCodingType = m_pictureCodingType;
3478 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
3479
3480 if ((m_dysRefFrameFlags == DYS_REF_NONE) && m_pakOnlyModeEnabledForLastPass)
3481 {
3482 //This flag sets pak-only mode in slbb for RePak pass. In single-pass mode, this flag should be disabled.
3483 m_vdencPakonlyMultipassEnabled = ((m_numPasses > 0) && (IsLastPass())) ? true : false;
3484 }
3485
3486 // Scalable Mode header
3487 if (m_scalableMode)
3488 {
3489 MOS_COMMAND_BUFFER cmdBuffer;
3490 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3491
3492 MHW_MI_FORCE_WAKEUP_PARAMS forceWakeupParams;
3493 MOS_ZeroMemory(&forceWakeupParams, sizeof(MHW_MI_FORCE_WAKEUP_PARAMS));
3494 forceWakeupParams.bMFXPowerWellControl = true;
3495 forceWakeupParams.bMFXPowerWellControlMask = true;
3496 forceWakeupParams.bHEVCPowerWellControl = true;
3497 forceWakeupParams.bHEVCPowerWellControlMask = true;
3498
3499 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiForceWakeupCmd(
3500 &cmdBuffer,
3501 &forceWakeupParams));
3502
3503 bool requestFrameTracking = m_singleTaskPhaseSupported ? IsFirstPass() : IsLastPass();
3504 // In scalable mode, command buffer header is sent on last pipe only
3505 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
3506 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
3507 }
3508
3509 // for VDENC dynamic scaling, here are the steps we need to process
3510 // 1. Use PAK to down scale the reference picture (PASS 0)
3511 // 2. Run VDENC to stream out PakObjCmd (PASS 0)
3512 // 3. Run VDENC (with PAK only multi pass enabled) to stream in PakObjCmd from previous pass (PASS 0)
3513 // 4. Repak (PASS 1) - it is only for CQP mode
3514 // 5. Extra note: Repak is disabled for BRC Dynamic scaling single pass mode
3515 if (m_dysRefFrameFlags != DYS_REF_NONE)
3516 {
3517 if (m_currPass == 0)
3518 {
3519 // Turn off scalability and Tiling for Dynamic scaling pass 0 for reference scaling
3520 uint8_t logTileRows = m_vp9PicParams->log2_tile_rows;
3521 uint8_t logTileColumns = m_vp9PicParams->log2_tile_columns;
3522 bool scalableMode = m_scalableMode;
3523 uint8_t numPipe = m_numPipe;
3524 m_vp9PicParams->log2_tile_rows = 0;
3525 m_vp9PicParams->log2_tile_columns = 0;
3526 m_scalableMode = false;
3527 m_numPipe = 1;
3528 // Execute Reference scaling pass
3529 CODECHAL_ENCODE_CHK_STATUS_RETURN(DysRefFrames());
3530
3531 // Restore scalability and Tiling status for subsequent passes
3532 m_vp9PicParams->log2_tile_rows = logTileRows;
3533 m_vp9PicParams->log2_tile_columns = logTileColumns;
3534 m_scalableMode = scalableMode;
3535 m_numPipe = numPipe;
3536
3537 if (m_dysVdencMultiPassEnabled)
3538 {
3539 m_vdencPakObjCmdStreamOutEnabled = true;
3540 m_resVdencPakObjCmdStreamOutBuffer = &m_resMbCodeSurface;
3541 // enable single task phase here since we need to combine the pakobj streamout and pakonly pass into one batch buffer
3542 m_singleTaskPhaseSupported = true;
3543 m_firstTaskInPhase = true;
3544
3545 if (Mos_ResourceIsNull(&m_resVdencDysPictureState2NdLevelBatchBuffer))
3546 {
3547 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3548
3549 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3550 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3551 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3552 allocParamsForBufferLinear.Format = Format_Buffer;
3553 allocParamsForBufferLinear.dwBytes = m_vdencPicStateSecondLevelBatchBufferSize;
3554 allocParamsForBufferLinear.pBufName = "VDEnc DYS Picture Second Level Batch Buffer";
3555
3556 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
3557 m_osInterface,
3558 &allocParamsForBufferLinear,
3559 &m_resVdencDysPictureState2NdLevelBatchBuffer);
3560
3561 if (eStatus != MOS_STATUS_SUCCESS)
3562 {
3563 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate VDEnc DYS Picture Second Level Batch Buffer.");
3564 return eStatus;
3565 }
3566 }
3567
3568 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
3569 }
3570 else
3571 {
3572 m_hucEnabled = m_dysHucEnabled; // recover huc state
3573 }
3574 }
3575 else if (m_currPass == 1 && m_dysVdencMultiPassEnabled)
3576 {
3577 m_hucEnabled = m_dysHucEnabled; // recover huc state
3578 m_vdencPakonlyMultipassEnabled = true;
3579 m_dysRefFrameFlags = DYS_REF_NONE;
3580 m_currPass = 0; // reset ucCurrPass = 0 to run the Huc
3581 m_lastTaskInPhase = false;
3582 }
3583 }
3584 else
3585 {
3586 if (!(IsLastPass()))
3587 {
3588 m_vdencPakObjCmdStreamOutEnabled = true;
3589 m_resVdencPakObjCmdStreamOutBuffer = &m_resMbCodeSurface;
3590 }
3591 else
3592 {
3593 m_vdencPakObjCmdStreamOutEnabled = false;
3594 }
3595 }
3596
3597 if (m_isTilingSupported)
3598 {
3599 MOS_LOCK_PARAMS lockFlagsWriteOnly;
3600 uint8_t* tileStatsData = nullptr;
3601 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
3602 lockFlagsWriteOnly.WriteOnly = 1;
3603 if (Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBBIndex].sResource))
3604 {
3605 // Allocate Tile Stats Buffer for PAK integration and to be used everywhere for tile stats
3606 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3607 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3608 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3609 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3610 allocParamsForBufferLinear.Format = Format_Buffer;
3611 auto size = m_maxTileNumber * MOS_ALIGN_CEIL(m_hcpInterface->GetPakHWTileSizeRecordSize(), CODECHAL_CACHELINE_SIZE);
3612 allocParamsForBufferLinear.dwBytes = size;
3613 allocParamsForBufferLinear.pBufName = "Tile Record Buffer";
3614
3615 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
3616 m_osInterface,
3617 &allocParamsForBufferLinear,
3618 &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource));
3619 m_tileRecordBuffer[m_virtualEngineBBIndex].dwSize = size;
3620 auto tileRecordData = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource, &lockFlagsWriteOnly);
3621
3622 MOS_ZeroMemory(tileRecordData, allocParamsForBufferLinear.dwBytes);
3623 m_osInterface->pfnUnlockResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource);
3624 }
3625 }
3626
3627 if (m_isTilingSupported && m_scalableMode && m_hucEnabled && IsFirstPipe() && IsFirstPass())
3628 {
3629 // Max row is 4 by VP9 Spec
3630 uint32_t m_maxScalableModeRows = 4;
3631 uint32_t m_maxScalableModeTiles = m_numVdbox * m_maxScalableModeRows;
3632
3633 // Fill Pak integration kernel input tile stats structure
3634 MOS_ZeroMemory(&m_tileStatsOffset, sizeof(StatsInfo));
3635 // TileSizeRecord has to be 4k aligned
3636 m_tileStatsOffset.tileSizeRecord = 0;
3637 // VdencStats has to be 4k aligned
3638 m_tileStatsOffset.vdencStats = MOS_ALIGN_CEIL((m_tileStatsOffset.tileSizeRecord + (m_maxScalableModeTiles * m_statsSize.tileSizeRecord)), CODECHAL_PAGE_SIZE);
3639 // VP9PAKStats has to be 64 byte aligned
3640 m_tileStatsOffset.pakStats = MOS_ALIGN_CEIL((m_tileStatsOffset.vdencStats + (m_maxScalableModeTiles * m_statsSize.vdencStats)), CODECHAL_PAGE_SIZE);
3641 // VP9CounterBuffer has to be 4k aligned
3642 m_tileStatsOffset.counterBuffer = MOS_ALIGN_CEIL((m_tileStatsOffset.pakStats + (m_maxScalableModeTiles * m_statsSize.pakStats)), CODECHAL_PAGE_SIZE);
3643
3644 MOS_LOCK_PARAMS lockFlagsWriteOnly;
3645 uint8_t* tileStatsData = nullptr;
3646 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
3647 lockFlagsWriteOnly.WriteOnly = 1;
3648
3649 if (Mos_ResourceIsNull(&m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource))
3650 {
3651 // Allocate Tile Stats Buffer for PAK integration and to be used everywhere for tile stats
3652 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3653 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3654 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3655 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3656 allocParamsForBufferLinear.Format = Format_Buffer;
3657 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL((m_tileStatsOffset.counterBuffer + (m_maxScalableModeTiles * m_statsSize.counterBuffer)), CODECHAL_PAGE_SIZE);
3658 allocParamsForBufferLinear.pBufName = "GEN12 Tile Level Statistics Buffer";
3659
3660 m_tileStatsPakIntegrationBufferSize = allocParamsForBufferLinear.dwBytes;
3661
3662 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
3663 m_osInterface,
3664 &allocParamsForBufferLinear,
3665 &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource));
3666 m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].dwSize = allocParamsForBufferLinear.dwBytes;
3667
3668 tileStatsData = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource, &lockFlagsWriteOnly);
3669
3670 MOS_ZeroMemory(tileStatsData, allocParamsForBufferLinear.dwBytes);
3671 m_osInterface->pfnUnlockResource(m_osInterface, &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource);
3672 }
3673 }
3674
3675 if (IsFirstPass())
3676 {
3677 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructPakInsertObjBatchBuf(&m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx]));
3678 }
3679 int currPass = GetCurrentPass();
3680 if ((m_dysRefFrameFlags != DYS_REF_NONE) && m_dysVdencMultiPassEnabled)
3681 {
3682 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructPicStateBatchBuf(&m_resVdencDysPictureState2NdLevelBatchBuffer));
3683 }
3684 else
3685 {
3686 if (IsFirstPipe())
3687 {
3688 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructPicStateBatchBuf(&m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex]));
3689 }
3690
3691 if (!m_scalableMode)
3692 {
3693 MOS_COMMAND_BUFFER cmdBuffer;
3694 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3695 MHW_MI_FORCE_WAKEUP_PARAMS forceWakeupParams;
3696 MOS_ZeroMemory(&forceWakeupParams, sizeof(MHW_MI_FORCE_WAKEUP_PARAMS));
3697 forceWakeupParams.bMFXPowerWellControl = true;
3698 forceWakeupParams.bMFXPowerWellControlMask = true;
3699 forceWakeupParams.bHEVCPowerWellControl = true;
3700 forceWakeupParams.bHEVCPowerWellControlMask = true;
3701 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiForceWakeupCmd(&cmdBuffer, &forceWakeupParams));
3702 ReturnCommandBuffer(&cmdBuffer);
3703 }
3704 }
3705
3706 if (m_dysRefFrameFlags != DYS_REF_NONE)
3707 {
3708 m_brcReset = 1;
3709 }
3710
3711 if (m_vdencBrcEnabled && IsFirstPipe())
3712 {
3713 // Invoke BRC init/reset FW
3714 if (m_brcInit || m_brcReset)
3715 {
3716 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_INIT_RESET);
3717 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcInitReset());
3718 m_brcInit = m_brcReset = false;
3719 }
3720 // For multipass and singlepass+RePAK we call BRC update for all passes except last pass (RePAK)
3721 // For single pass w/o RePAK (1 total pass) we call BRC update on one and only pass
3722 if (!IsLastPass() || (m_currPass == 0 && m_numPasses == 0))
3723 {
3724 bool origFrameTrackingHeader = false;
3725 bool origSingleTaskPhase = m_singleTaskPhaseSupported;
3726 // If this is the case of Dynamic Scaling + BRC Pass 0' VDENC + Pak pass
3727 // Disable SingleTaskPhase before running 1st BRC update
3728 // To run HPU0 on the next pass i.e Pak only pass, we make Pass 1 as Pass 0 in which case the
3729 // BRC dmem buffer( resVdencBrcUpdateDmemBuffer[0] ) will get overridden if we do not submit BRC command now.
3730 if (m_dysBrc && m_dysRefFrameFlags != DYS_REF_NONE)
3731 {
3732 //Reset Frame Tracking Header for this submission
3733 MOS_COMMAND_BUFFER cmdBuffer;
3734 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3735 origFrameTrackingHeader = cmdBuffer.Attributes.bEnableMediaFrameTracking;
3736 cmdBuffer.Attributes.bEnableMediaFrameTracking = false;
3737 ReturnCommandBuffer(&cmdBuffer);
3738 m_singleTaskPhaseSupported = false;
3739 }
3740
3741 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE);
3742 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcUpdate());
3743 //Restore Original Frame Tracking Header
3744 if (m_dysBrc && m_dysRefFrameFlags != DYS_REF_NONE)
3745 {
3746 MOS_COMMAND_BUFFER cmdBuffer;
3747 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3748 cmdBuffer.Attributes.bEnableMediaFrameTracking = origFrameTrackingHeader;
3749 ReturnCommandBuffer(&cmdBuffer);
3750 }
3751 //Restore the original state of SingleTaskPhaseSupported flag
3752 m_singleTaskPhaseSupported = origSingleTaskPhase;
3753 }
3754 }
3755
3756 // run HuC_VP9Prob first pass (it runs in parallel with ENC)
3757 if (m_hucEnabled)
3758 {
3759 if (IsFirstPipe() && (IsFirstPass() || IsLastPass() || m_vdencBrcEnabled)) // Before the first PAK pass and for RePak pass
3760 {
3761 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_PU);
3762 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCVp9Prob());
3763 // restore perf tag to PAK
3764 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE);
3765 }
3766 }
3767 else
3768 {
3769 CODECHAL_ENCODE_CHK_STATUS_RETURN(RefreshFrameInternalBuffers());
3770 }
3771
3772 // set HCP_SURFACE_STATE values
3773 MHW_VDBOX_SURFACE_PARAMS surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID + 1];
3774 for (uint8_t i = 0; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
3775 {
3776 MOS_ZeroMemory(&surfaceParams[i], sizeof(surfaceParams[i]));
3777 surfaceParams[i].Mode = m_mode;
3778 surfaceParams[i].ucSurfaceStateId = i;
3779 surfaceParams[i].ChromaType = m_outputChromaFormat;
3780 surfaceParams[i].bSrc8Pak10Mode = (m_vp9SeqParams->SeqFlags.fields.EncodedBitDepth) && (!m_vp9SeqParams->SeqFlags.fields.SourceBitDepth);
3781
3782 switch (m_vp9SeqParams->SeqFlags.fields.EncodedBitDepth)
3783 {
3784 case VP9_ENCODED_BIT_DEPTH_10: //10 bit encoding
3785 {
3786 surfaceParams[i].ucBitDepthChromaMinus8 = 2;
3787 surfaceParams[i].ucBitDepthLumaMinus8 = 2;
3788 break;
3789 }
3790 default:
3791 {
3792 surfaceParams[i].ucBitDepthChromaMinus8 = 0;
3793 surfaceParams[i].ucBitDepthLumaMinus8 = 0;
3794 break;
3795 }
3796 }
3797 }
3798
3799 // For PAK engine, we do NOT use scaled reference images even if dynamic scaling is enabled
3800 PMOS_SURFACE refSurface[3], refSurfaceNonScaled[3], dsRefSurface4x[3], dsRefSurface8x[3];
3801 for (auto i = 0; i < 3; i++)
3802 {
3803 refSurface[i] = refSurfaceNonScaled[i] = dsRefSurface4x[i] = dsRefSurface8x[i] = nullptr;
3804 }
3805 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetHcpSrcSurfaceParams(surfaceParams, refSurface, refSurfaceNonScaled, dsRefSurface4x, dsRefSurface8x));
3806
3807 MOS_COMMAND_BUFFER cmdBuffer;
3808 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3809
3810 // Non scalable mode header
3811 if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && !m_scalableMode)
3812 {
3813 // Send command buffer header at the beginning (OS dependent)
3814 // frame tracking tag is only added in the last command buffer header
3815 bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
3816 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
3817 }
3818
3819 // Place hw semaphore on all other pipe to wait for first pipe HUC to finish.
3820 int currPipe = GetCurrentPipe();
3821 if (m_scalableMode && m_hucEnabled && m_isTilingSupported)
3822 {
3823 if (!IsFirstPipe())
3824 {
3825 if (!Mos_ResourceIsNull(&m_hucDoneSemaphoreMem[currPipe].sResource))
3826 {
3827 // On second pipe, wait here for huc to finish on first pipe
3828 SendHWWaitCommand(&m_hucDoneSemaphoreMem[currPipe].sResource, &cmdBuffer, (currPass + 1));
3829 SetSemaphoreMem(&m_hucDoneSemaphoreMem[currPipe].sResource, &cmdBuffer, 0);
3830 }
3831 }
3832 }
3833
3834 // Repak conditional batch buffer end based on repak flag written by Huc to HUC_STATUS regster
3835 if (m_hucEnabled && (m_numPasses > 0) && IsLastPass())
3836 {
3837 // Insert conditional batch buffer end
3838 // Bit 30 has been added as a success condition, therefore this needs to be masked to only check 31 for RePAK
3839 // or else if HuC decides not to do RePAK for conditional RePAK yet terminates successfully RePAK will still happen.
3840 // Success = bit 30 set to 1, Do RePAK = bit 31 set to 1, value is always 0; if 0 < memory, continue
3841 MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
3842 MOS_ZeroMemory(
3843 &miConditionalBatchBufferEndParams,
3844 sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
3845
3846 miConditionalBatchBufferEndParams.presSemaphoreBuffer =
3847 &m_resHucPakMmioBuffer;
3848 // Make the DisableCompareMask 0, so that the HW will do AND operation on DW0 with Mask DW1, refer to HuCVp9Prob() for the settings
3849 // and compare the result against the Semaphore data which in our case dwValue = 0.
3850 // If result > dwValue then continue execution otherwise terminate the batch buffer
3851 miConditionalBatchBufferEndParams.bDisableCompareMask = false;
3852
3853 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
3854 &cmdBuffer,
3855 &miConditionalBatchBufferEndParams));
3856 }
3857
3858 if (IsFirstPipe())
3859 {
3860 CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
3861 }
3862
3863 // Send VDENC_CONTROL_STATE Pipe Initialization
3864 MHW_VDBOX_VDENC_CONTROL_STATE_PARAMS vdencControlStateParams;
3865 {
3866 MOS_ZeroMemory(&vdencControlStateParams, sizeof(MHW_VDBOX_VDENC_CONTROL_STATE_PARAMS));
3867 vdencControlStateParams.bVdencInitialization = true;
3868 CODECHAL_ENCODE_CHK_STATUS_RETURN(
3869 static_cast<MhwVdboxVdencInterfaceG12X *>(m_vdencInterface)->AddVdencControlStateCmd(&cmdBuffer, &vdencControlStateParams));
3870 }
3871
3872 //Send VD_CONTROL_STATE Pipe Initialization
3873 MHW_MI_VD_CONTROL_STATE_PARAMS vdCtrlParam;
3874 MOS_ZeroMemory(&vdCtrlParam, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
3875 vdCtrlParam.initialization = true;
3876 MhwMiInterfaceG12 *miInterfaceG12 = static_cast <MhwMiInterfaceG12 *>(m_miInterface);
3877 CODECHAL_ENCODE_CHK_STATUS_RETURN((miInterfaceG12)->AddMiVdControlStateCmd(&cmdBuffer, &vdCtrlParam));
3878
3879 // set HCP_PIPE_MODE_SELECT values
3880 PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams = nullptr;
3881 pipeModeSelectParams = m_vdencInterface->CreateMhwVdboxPipeModeSelectParams();
3882 CODECHAL_ENCODE_CHK_NULL_RETURN(pipeModeSelectParams);
3883
3884 auto release_func = [&]()
3885 {
3886 m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams);
3887 pipeModeSelectParams = nullptr;
3888 };
3889
3890 SetHcpPipeModeSelectParams(*pipeModeSelectParams);
3891 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams), release_func);
3892
3893 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, false), release_func);
3894
3895 // Decoded picture
3896 #ifdef _MMC_SUPPORTED
3897 CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(m_mmcState, release_func);
3898 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]), release_func);
3899 #endif
3900 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]), release_func);
3901
3902 // Source input
3903 #ifdef _MMC_SUPPORTED
3904 CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(m_mmcState, release_func);
3905 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]), release_func);
3906 #endif
3907 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]), release_func);
3908
3909 if (MEDIA_IS_WA(m_waTable, Wa_Vp9UnalignedHeight))
3910 {
3911 uint32_t real_height = m_oriFrameHeight;
3912 uint32_t aligned_height = MOS_ALIGN_CEIL(real_height, CODEC_VP9_MIN_BLOCK_HEIGHT);
3913
3914 fill_pad_with_value(m_rawSurfaceToPak, real_height, aligned_height);
3915 }
3916
3917 if (m_pictureCodingType != I_TYPE)
3918 {
3919 #ifdef _MMC_SUPPORTED
3920 //Get each reference surface state and be recorded by skipMask if current surface state is mmc disabled
3921 //In VP9 mode, Bit 8is (here is bit0 in skipMask ) for Previous Reference;
3922 //Bit 9is (here is bit1 in skipMask ) for Golden Reference and Bit 10is (here is bit2 in skipMask ) for Alterante Reference;
3923 //Bits11-15are unused and should be programmed to 0 (skipped)
3924 uint8_t skipMask = 0xf8;
3925 for (uint8_t i = CODECHAL_HCP_LAST_SURFACE_ID; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
3926 {
3927 CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(m_mmcState, release_func);
3928 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[i]), release_func);
3929 if (surfaceParams[i].mmcState == MOS_MEMCOMP_DISABLED)
3930 {
3931 skipMask |= (1 << (i - 2));
3932 }
3933 }
3934 CODECHAL_ENCODE_NORMALMESSAGE("MMC skip mask is %d\n", skipMask);
3935 for (uint8_t i = CODECHAL_HCP_LAST_SURFACE_ID; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
3936 {
3937 //Set each ref surface state as MOS_MEMCOMP_MC to satisfy MmcEnable in AddHcpSurfaceCmd
3938 //Because each ref surface state should be programmed as the same
3939 //The actual mmc state is recorded by skipMask and set each ref surface too
3940 surfaceParams[i].mmcState = MOS_MEMCOMP_MC;
3941 surfaceParams[i].mmcSkipMask = skipMask;
3942 }
3943 #endif
3944 for (uint8_t i = CODECHAL_HCP_LAST_SURFACE_ID; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
3945 {
3946 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[i]), release_func);
3947 }
3948 }
3949
3950 // set HCP_PIPE_BUF_ADDR_STATE values
3951 PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS pipeBufAddrParams = nullptr;
3952 pipeBufAddrParams = CreateHcpPipeBufAddrParams(pipeBufAddrParams);
3953
3954 auto delete_func = [&]()
3955 {
3956 if (pipeModeSelectParams)
3957 {
3958 m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams);
3959 pipeModeSelectParams = nullptr;
3960 }
3961 if (pipeBufAddrParams)
3962 {
3963 MOS_Delete(pipeBufAddrParams);
3964 pipeBufAddrParams = nullptr;
3965 }
3966 };
3967
3968 if (pipeBufAddrParams)
3969 {
3970 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(SetHcpPipeBufAddrParams(*pipeBufAddrParams, refSurface, refSurfaceNonScaled, dsRefSurface4x, dsRefSurface8x), delete_func);
3971 #ifdef _MMC_SUPPORTED
3972 CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(m_mmcState, delete_func);
3973 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_mmcState->SetPipeBufAddr(pipeBufAddrParams), delete_func);
3974 #endif
3975 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(&cmdBuffer, pipeBufAddrParams), delete_func);
3976 }
3977
3978 // set HCP_IND_OBJ_BASE_ADDR_STATE values
3979 MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
3980 SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
3981 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams), delete_func);
3982
3983 // Send VD_CONTROL_STATE Pipe Initialization
3984 MOS_ZeroMemory(&vdCtrlParam, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
3985 vdCtrlParam.vdencEnabled = true;
3986 vdCtrlParam.vdencInitialization = true;
3987 miInterfaceG12 = static_cast <MhwMiInterfaceG12 *>(m_miInterface);
3988 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN((miInterfaceG12)->AddMiVdControlStateCmd(&cmdBuffer, &vdCtrlParam), delete_func);
3989
3990 // Change ref surfaces to scaled for VDENC for DYS
3991 if ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
3992 {
3993 surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID].psSurface = refSurface[0];
3994 surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID].psSurface = refSurface[1];
3995 surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID].psSurface = refSurface[2];
3996 }
3997
3998 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams), delete_func);
3999 if (pipeModeSelectParams)
4000 {
4001 MOS_Delete(pipeModeSelectParams);
4002 pipeModeSelectParams = nullptr;
4003 }
4004
4005 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencSrcSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]), delete_func);
4006 if (m_pictureCodingType == I_TYPE)
4007 {
4008 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]), delete_func);
4009 }
4010 else
4011 {
4012 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID]), delete_func);
4013 if ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
4014 {
4015 if (m_refFrameFlags & 0x02)
4016 {
4017 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID]), delete_func);
4018 }
4019 if (m_refFrameFlags & 0x04)
4020 {
4021 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID]), delete_func);
4022 }
4023 }
4024 }
4025
4026 MHW_VDBOX_SURFACE_PARAMS dsSurfaceParams[2]; // 8x and 4x DS surfaces
4027 SetHcpDsSurfaceParams(&dsSurfaceParams[0]);
4028 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencDsRefSurfaceStateCmd(&cmdBuffer, &dsSurfaceParams[0], 2), delete_func);
4029
4030 if (pipeBufAddrParams)
4031 {
4032 pipeBufAddrParams->presVdencTileRowStoreBuffer = &m_vdencTileRowStoreBuffer;
4033 pipeBufAddrParams->presVdencCumulativeCuCountStreamoutSurface = &m_vdencCumulativeCuCountStreamoutSurface;
4034 pipeBufAddrParams->bDynamicScalingEnable = (m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled;
4035 pipeBufAddrParams->pRawSurfParam = &surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID];
4036 pipeBufAddrParams->pDecodedReconParam = &surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID];
4037 pipeBufAddrParams->isIFrame = (m_vp9PicParams->PicFlags.fields.frame_type == 0);
4038
4039 #ifdef _MMC_SUPPORTED
4040 CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(m_mmcState, delete_func);
4041 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_mmcState->SetPipeBufAddr(pipeBufAddrParams), delete_func);
4042 #endif
4043 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencPipeBufAddrCmd(&cmdBuffer, pipeBufAddrParams), delete_func);
4044 MOS_Delete(pipeBufAddrParams);
4045 pipeBufAddrParams = nullptr;
4046 }
4047
4048 MHW_BATCH_BUFFER secondLevelBatchBuffer;
4049 MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(secondLevelBatchBuffer));
4050 secondLevelBatchBuffer.dwOffset = 0;
4051 secondLevelBatchBuffer.bSecondLevel = true;
4052 if (m_hucEnabled)
4053 {
4054 secondLevelBatchBuffer.OsResource = m_resVdencPictureState2NdLevelBatchBufferWrite[0];
4055 }
4056 else
4057 {
4058 if (m_dysRefFrameFlags != DYS_REF_NONE && m_dysVdencMultiPassEnabled)
4059 {
4060 secondLevelBatchBuffer.OsResource = m_resVdencDysPictureState2NdLevelBatchBuffer;
4061 }
4062 else
4063 {
4064 secondLevelBatchBuffer.OsResource = m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
4065 }
4066 }
4067
4068 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
4069 &cmdBuffer,
4070 &secondLevelBatchBuffer));
4071
4072 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
4073
4074 return eStatus;
4075 }
4076
SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams,PMOS_SURFACE * refSurface,PMOS_SURFACE * refSurfaceNonScaled,PMOS_SURFACE * dsRefSurface4x,PMOS_SURFACE * dsRefSurface8x)4077 MOS_STATUS CodechalVdencVp9StateG12::SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams,
4078 PMOS_SURFACE* refSurface,
4079 PMOS_SURFACE* refSurfaceNonScaled,
4080 PMOS_SURFACE* dsRefSurface4x,
4081 PMOS_SURFACE* dsRefSurface8x)
4082 {
4083 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4084
4085 CODECHAL_ENCODE_FUNCTION_ENTER;
4086
4087 pipeBufAddrParams = {};
4088 pipeBufAddrParams.Mode = m_mode;
4089 pipeBufAddrParams.psPreDeblockSurface = &m_reconSurface;
4090 pipeBufAddrParams.psPostDeblockSurface = &m_reconSurface;
4091 pipeBufAddrParams.psRawSurface = m_rawSurfaceToPak;
4092
4093 pipeBufAddrParams.presMfdDeblockingFilterRowStoreScratchBuffer =
4094 &m_resDeblockingFilterLineBuffer;
4095
4096 pipeBufAddrParams.presDeblockingFilterTileRowStoreScratchBuffer =
4097 &m_resDeblockingFilterTileLineBuffer;
4098
4099 pipeBufAddrParams.presDeblockingFilterColumnRowStoreScratchBuffer =
4100 &m_resDeblockingFilterTileColumnBuffer;
4101
4102 pipeBufAddrParams.presMetadataLineBuffer = &m_resMetadataLineBuffer;
4103 pipeBufAddrParams.presMetadataTileLineBuffer = &m_resMetadataTileLineBuffer;
4104 pipeBufAddrParams.presMetadataTileColumnBuffer = &m_resMetadataTileColumnBuffer;
4105 pipeBufAddrParams.presCurMvTempBuffer = m_trackedBuf->GetMvTemporalBuffer(m_currMvTemporalBufferIndex);
4106 pipeBufAddrParams.bDynamicScalingEnable = (m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled;
4107
4108 if (m_mmcState && m_mmcState->IsMmcEnabled() && m_reconSurface.bCompressible)
4109 {
4110 pipeBufAddrParams.PreDeblockSurfMmcState = MOS_MEMCOMP_HORIZONTAL;
4111 pipeBufAddrParams.PostDeblockSurfMmcState = pipeBufAddrParams.PreDeblockSurfMmcState;
4112 }
4113 else
4114 {
4115 pipeBufAddrParams.PreDeblockSurfMmcState = MOS_MEMCOMP_DISABLED;
4116 }
4117
4118 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
4119 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetPipeBufAddr(&pipeBufAddrParams));
4120
4121 // Huc first pass doesn't write probabilities to output prob region but only updates to the input region. HuC run before repak writes to the ouput region.
4122 uint8_t frameCtxIdx = 0;
4123 if (m_hucEnabled && IsLastPass())
4124 {
4125 pipeBufAddrParams.presVp9ProbBuffer = &m_resHucProbOutputBuffer;
4126 }
4127 else
4128 {
4129 frameCtxIdx = m_vp9PicParams->PicFlags.fields.frame_context_idx;
4130 CODECHAL_ENCODE_ASSERT(frameCtxIdx < CODEC_VP9_NUM_CONTEXTS);
4131 pipeBufAddrParams.presVp9ProbBuffer = &m_resProbBuffer[frameCtxIdx];
4132 }
4133
4134 pipeBufAddrParams.presVp9SegmentIdBuffer = &m_resSegmentIdBuffer;
4135 pipeBufAddrParams.presHvdTileRowStoreBuffer = &m_resHvcTileRowstoreBuffer;
4136 pipeBufAddrParams.ps4xDsSurface = m_trackedBuf->Get4xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
4137 pipeBufAddrParams.ps8xDsSurface = m_trackedBuf->Get8xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
4138 pipeBufAddrParams.presVdencIntraRowStoreScratchBuffer = &m_resVdencIntraRowStoreScratchBuffer;
4139 pipeBufAddrParams.dwNumRefIdxL0ActiveMinus1 = (m_vp9PicParams->PicFlags.fields.frame_type) ? m_numRefFrames - 1 : 0;
4140
4141 if (m_scalableMode && m_hucEnabled && m_isTilingSupported)
4142 {
4143 pipeBufAddrParams.presVdencStreamOutBuffer = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource;
4144 pipeBufAddrParams.dwVdencStatsStreamOutOffset = m_tileStatsOffset.vdencStats;
4145 }
4146 else
4147 {
4148 pipeBufAddrParams.presVdencStreamOutBuffer = &m_resVdencBrcStatsBuffer;
4149 pipeBufAddrParams.dwVdencStatsStreamOutOffset = 0;
4150 }
4151
4152 pipeBufAddrParams.presStreamOutBuffer = nullptr;
4153
4154 if (m_scalableMode && m_hucEnabled && m_isTilingSupported)
4155 {
4156 PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex];
4157 bool useTileStatisticsBuffer = tileStatisticsBuffer && !Mos_ResourceIsNull(&tileStatisticsBuffer->sResource);
4158 // the new framestats streamout will now be the tile level stats buffer because each pak is spewing out tile level stats
4159 pipeBufAddrParams.presFrameStatStreamOutBuffer = useTileStatisticsBuffer ? &tileStatisticsBuffer->sResource : nullptr;
4160 pipeBufAddrParams.dwFrameStatStreamOutOffset = useTileStatisticsBuffer ? m_tileStatsOffset.pakStats : 0;
4161 //Main Frame Stats are integrated by PAK integration kernel
4162 }
4163 else
4164 {
4165 pipeBufAddrParams.presFrameStatStreamOutBuffer = &m_resFrameStatStreamOutBuffer;
4166 pipeBufAddrParams.dwFrameStatStreamOutOffset = 0;
4167 }
4168
4169 pipeBufAddrParams.presSseSrcPixelRowStoreBuffer = &m_resSseSrcPixelRowStoreBuffer;
4170 pipeBufAddrParams.presVdencStreamInBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
4171 pipeBufAddrParams.presSegmentMapStreamOut = &m_resVdencSegmentMapStreamOut;
4172 pipeBufAddrParams.presPakCuLevelStreamoutBuffer =
4173 Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource) ? nullptr : &m_resPakcuLevelStreamoutData.sResource;
4174 if (m_dysRefFrameFlags != DYS_REF_NONE)
4175 {
4176 pipeBufAddrParams.presVdencPakObjCmdStreamOutBuffer =
4177 (m_vdencPakObjCmdStreamOutEnabled) ? m_resVdencPakObjCmdStreamOutBuffer : nullptr;
4178 }
4179 else
4180 {
4181 pipeBufAddrParams.presVdencPakObjCmdStreamOutBuffer = m_resVdencPakObjCmdStreamOutBuffer = &m_resMbCodeSurface;
4182 }
4183
4184 if (m_pictureCodingType != I_TYPE)
4185 {
4186 for (auto i = 0; i < 3; i++)
4187 {
4188 CODECHAL_ENCODE_CHK_NULL_RETURN(refSurface[i]);
4189 CODECHAL_ENCODE_CHK_NULL_RETURN(dsRefSurface4x[i]);
4190 CODECHAL_ENCODE_CHK_NULL_RETURN(dsRefSurface8x[i]);
4191 if ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
4192 {
4193 pipeBufAddrParams.presReferences[i] = &refSurfaceNonScaled[i]->OsResource;
4194 pipeBufAddrParams.presReferences[i+4] = &refSurfaceNonScaled[i]->OsResource;
4195 }
4196 else
4197 {
4198 pipeBufAddrParams.presReferences[i] = &refSurface[i]->OsResource;
4199 }
4200 pipeBufAddrParams.presVdencReferences[i] = &refSurface[i]->OsResource;
4201 pipeBufAddrParams.presVdenc4xDsSurface[i] = &dsRefSurface4x[i]->OsResource;
4202 pipeBufAddrParams.presVdenc8xDsSurface[i] = &dsRefSurface8x[i]->OsResource;
4203 }
4204 if ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
4205 {
4206 pipeBufAddrParams.psFwdRefSurface0 = refSurface[0];
4207 pipeBufAddrParams.psFwdRefSurface1 = refSurface[1];
4208 pipeBufAddrParams.psFwdRefSurface2 = refSurface[2];
4209 }
4210
4211 pipeBufAddrParams.presColMvTempBuffer[0] = m_trackedBuf->GetMvTemporalBuffer(m_currMvTemporalBufferIndex ^ 0x01);
4212 }
4213
4214 return eStatus;
4215 }
4216
GetNumTilesInFrame()4217 uint16_t CodechalVdencVp9StateG12::GetNumTilesInFrame()
4218 {
4219 return ((1 << m_vp9PicParams->log2_tile_rows) * (1 << m_vp9PicParams->log2_tile_columns));
4220 }
4221
AllocateResources()4222 MOS_STATUS CodechalVdencVp9StateG12::AllocateResources()
4223 {
4224 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4225
4226 CODECHAL_ENCODE_FUNCTION_ENTER;
4227
4228 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::AllocateResources());
4229
4230 // create the tile coding state parameters
4231 CODECHAL_ENCODE_CHK_NULL_RETURN(m_tileParams =
4232 (PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12)MOS_AllocAndZeroMemory(sizeof(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12) * m_maxTileNumber));
4233
4234 if (m_isTilingSupported)
4235 {
4236
4237 // VDENC tile row store buffer
4238 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
4239 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4240 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4241 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4242 allocParamsForBufferLinear.Format = Format_Buffer;
4243 allocParamsForBufferLinear.dwBytes = MOS_ROUNDUP_DIVIDE(m_frameWidth, 32) * CODECHAL_CACHELINE_SIZE * 2;
4244 allocParamsForBufferLinear.pBufName = "VDENC Tile Row Store Buffer";
4245
4246 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
4247 m_osInterface,
4248 &allocParamsForBufferLinear,
4249 &m_vdencTileRowStoreBuffer),
4250 "Failed to allocate VDENC Tile Row Store Buffer");
4251
4252 uint32_t maxPicWidthInSb = MOS_ROUNDUP_DIVIDE(m_maxPicWidth, CODEC_VP9_SUPER_BLOCK_WIDTH);
4253 uint32_t maxPicHeightInSb = MOS_ROUNDUP_DIVIDE(m_maxPicHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT);
4254
4255 //PAK CU Level Streamout Data: DW57-59 in HCP pipe buffer address command
4256 uint32_t size = maxPicWidthInSb * maxPicHeightInSb * 64 * CODECHAL_CACHELINE_SIZE; // One CU has 16-byte, and there are 64 CU in one SB. But, each tile needs to be aliged to the cache line
4257 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4258 allocParamsForBufferLinear.dwBytes = size;
4259 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4260 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4261 allocParamsForBufferLinear.Format = Format_Buffer;
4262 allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
4263
4264 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4265 m_osInterface,
4266 &allocParamsForBufferLinear,
4267 &m_resPakcuLevelStreamoutData.sResource);
4268 CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4269
4270 //PAK Slice Level Streamut Data. DW60-DW62 in HCP pipe buffer address command
4271 // one LCU has one cache line. Use CU as LCU during creation
4272 allocParamsForBufferLinear.dwBytes = size;
4273 allocParamsForBufferLinear.pBufName = "PAK Slice Level Streamout Data";
4274
4275 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4276 m_osInterface,
4277 &allocParamsForBufferLinear,
4278 &m_resPakSliceLevelStreamutData.sResource);
4279 CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4280
4281 //HCP scalability Sync buffer
4282 size = CODECHAL_ENCODE_VP9_MAX_NUM_HCP_PIPE * CODECHAL_CACHELINE_SIZE;
4283 allocParamsForBufferLinear.dwBytes = size;
4284 allocParamsForBufferLinear.pBufName = "Hcp scalability Sync buffer ";
4285
4286 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4287 m_osInterface,
4288 &allocParamsForBufferLinear,
4289 &m_hcpScalabilitySyncBuffer.sResource);
4290 CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4291 m_hcpScalabilitySyncBuffer.dwSize = size;
4292
4293 //HCP Tile Size Streamout Buffer. Use in HCP_IND_OBJ_CMD
4294 size = m_maxTileNumber * MOS_ALIGN_CEIL(m_hcpInterface->GetPakHWTileSizeRecordSize(), CODECHAL_CACHELINE_SIZE);
4295 allocParamsForBufferLinear.dwBytes = size;
4296 allocParamsForBufferLinear.pBufName = "HCP Tile Record Buffer";
4297
4298 if (m_scalableMode && m_hucEnabled)
4299 {
4300 //Sizes of each buffer to be loaded into the region 0 as input and 1 loaded out as output.
4301
4302 MOS_ZeroMemory(&m_statsSize, sizeof(StatsInfo));
4303 m_statsSize.tileSizeRecord = m_hcpInterface->GetPakHWTileSizeRecordSize();
4304 m_statsSize.vdencStats = m_brcStatsBufSize; // VDEnc stats size
4305 m_statsSize.pakStats = m_brcPakStatsBufSize; // Frame stats size
4306 m_statsSize.counterBuffer = m_probabilityCounterBufferSize;
4307
4308 // HUC Pak Int DMEM buffer
4309 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucPakIntDmem), CODECHAL_CACHELINE_SIZE);
4310 allocParamsForBufferLinear.pBufName = "Huc Pak Int Dmem Buffer";
4311 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
4312 {
4313 for (auto j = 0; j < m_brcMaxNumPasses; j++)
4314 {
4315 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4316 m_osInterface,
4317 &allocParamsForBufferLinear,
4318 &m_hucPakIntDmemBuffer[i][j]);
4319 CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4320 }
4321 }
4322
4323 // HuC PAK Int Region 1 programming related stats
4324 MOS_ZeroMemory(&m_frameStatsOffset, sizeof(StatsInfo));
4325 m_frameStatsOffset.tileSizeRecord = 0;
4326 m_frameStatsOffset.vdencStats = MOS_ALIGN_CEIL((m_frameStatsOffset.tileSizeRecord + (m_maxTileNumber * m_statsSize.tileSizeRecord)), CODECHAL_PAGE_SIZE);
4327 m_frameStatsOffset.pakStats = MOS_ALIGN_CEIL((m_frameStatsOffset.vdencStats + m_statsSize.vdencStats), CODECHAL_PAGE_SIZE);
4328 m_frameStatsOffset.counterBuffer = MOS_ALIGN_CEIL((m_frameStatsOffset.pakStats + m_statsSize.pakStats), CODECHAL_PAGE_SIZE);
4329
4330 // HuC PAK Int DMEM region 1 buffer allocation
4331 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_frameStatsOffset.counterBuffer + m_statsSize.counterBuffer, CODECHAL_PAGE_SIZE);
4332 allocParamsForBufferLinear.pBufName = "PAK HUC Integrated Frame Stats Buffer";
4333 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4334 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4335 allocParamsForBufferLinear.Format = Format_Buffer;
4336
4337 m_frameStatsPakIntegrationBufferSize = allocParamsForBufferLinear.dwBytes;
4338
4339 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
4340 m_osInterface,
4341 &allocParamsForBufferLinear,
4342 &m_frameStatsPakIntegrationBuffer.sResource));
4343 m_frameStatsPakIntegrationBuffer.dwSize = allocParamsForBufferLinear.dwBytes;
4344
4345 MOS_LOCK_PARAMS lockFlags;
4346 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4347 lockFlags.WriteOnly = 1;
4348 uint8_t* data = nullptr;
4349
4350 data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_frameStatsPakIntegrationBuffer.sResource, &lockFlags);
4351 MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
4352 m_osInterface->pfnUnlockResource(m_osInterface, &m_frameStatsPakIntegrationBuffer.sResource);
4353
4354 // HuC PAK Int region 7, 8
4355 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(64, CODECHAL_PAGE_SIZE);
4356 allocParamsForBufferLinear.pBufName = "HUC PAK Int Dummy Buffer";
4357
4358 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4359 m_osInterface,
4360 &allocParamsForBufferLinear,
4361 &m_hucPakIntDummyBuffer);
4362 CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4363
4364 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4365 lockFlags.WriteOnly = 1;
4366
4367 data = (uint8_t*)m_osInterface->pfnLockResource(
4368 m_osInterface,
4369 &m_hucPakIntDummyBuffer,
4370 &lockFlags);
4371
4372 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4373 MOS_ZeroMemory(
4374 data,
4375 allocParamsForBufferLinear.dwBytes);
4376 m_osInterface->pfnUnlockResource(m_osInterface, &m_hucPakIntDummyBuffer);
4377
4378 // Allocate region 9 of pak integration to be fed as input to HUC BRC region 7
4379 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4380 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4381 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4382 allocParamsForBufferLinear.Format = Format_Buffer;
4383 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE, CODECHAL_PAGE_SIZE);
4384 allocParamsForBufferLinear.pBufName = "GEN12 PAK Integration FrameByteCount output";
4385 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
4386 m_osInterface,
4387 &allocParamsForBufferLinear,
4388 &m_hucPakIntBrcDataBuffer));
4389
4390 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4391 lockFlags.WriteOnly = 1;
4392 data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_hucPakIntBrcDataBuffer, &lockFlags);
4393 MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
4394 m_osInterface->pfnUnlockResource(m_osInterface, &m_hucPakIntBrcDataBuffer);
4395
4396 // Allocate Semaphore memory for HUC to signal other pipe VDENC/PAK to continue
4397 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4398 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4399 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4400 allocParamsForBufferLinear.Format = Format_Buffer;
4401 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
4402 allocParamsForBufferLinear.pBufName = "GEN12 HUC done Semaphore Memory";
4403
4404 for (auto i = 0; i < m_numPipe; i++)
4405 {
4406 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
4407 m_osInterface,
4408 &allocParamsForBufferLinear,
4409 &m_hucDoneSemaphoreMem[i].sResource));
4410 m_hucDoneSemaphoreMem[i].dwSize = allocParamsForBufferLinear.dwBytes;
4411 }
4412
4413 // Allocate Semaphore memory for VDEnc/PAK on all pipes to signal stitch command to stop waiting
4414 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4415 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4416 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4417 allocParamsForBufferLinear.Format = Format_Buffer;
4418 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
4419 allocParamsForBufferLinear.pBufName = "GEN12 VDEnc PAK done Semaphore Memory";
4420
4421 for (auto i = 0; i < m_numPipe; i++)
4422 {
4423 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
4424 m_osInterface,
4425 &allocParamsForBufferLinear,
4426 &m_stitchWaitSemaphoreMem[i].sResource));
4427 m_stitchWaitSemaphoreMem[i].dwSize = allocParamsForBufferLinear.dwBytes;
4428 }
4429
4430 // Allocate semaphore memory for HUC HPU or BRC to wait on previous pass' PAK Integration command to finish
4431 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4432 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4433 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4434 allocParamsForBufferLinear.Format = Format_Buffer;
4435 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
4436 allocParamsForBufferLinear.pBufName = "GEN12 VDEnc PAK Int done Semaphore Memory";
4437
4438 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
4439 m_osInterface,
4440 &allocParamsForBufferLinear,
4441 &m_pakIntDoneSemaphoreMem.sResource));
4442 m_pakIntDoneSemaphoreMem.dwSize = allocParamsForBufferLinear.dwBytes;
4443 }
4444 }
4445
4446 if (m_enableTileStitchByHW)
4447 {
4448 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
4449 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4450 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4451 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4452 allocParamsForBufferLinear.Format = Format_Buffer;
4453
4454 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
4455 {
4456 for (auto j = 0; j < CODECHAL_ENCODE_VP9_BRC_MAX_NUM_OF_PASSES; j++)
4457 {
4458 // HuC stitching Data buffer
4459 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE);
4460 allocParamsForBufferLinear.pBufName = "VP9 HuC Stitch Data Buffer";
4461 CODECHAL_ENCODE_CHK_STATUS_RETURN(
4462 m_osInterface->pfnAllocateResource(
4463 m_osInterface,
4464 &allocParamsForBufferLinear,
4465 &m_resHucStitchDataBuffer[i][j]));
4466 MOS_LOCK_PARAMS lockFlagsWriteOnly;
4467 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
4468 lockFlagsWriteOnly.WriteOnly = 1;
4469 uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource(
4470 m_osInterface,
4471 &m_resHucStitchDataBuffer[i][j],
4472 &lockFlagsWriteOnly);
4473 CODECHAL_ENCODE_CHK_NULL_RETURN(pData);
4474 MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes);
4475 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[i][j]);
4476 }
4477 }
4478 //Second level BB for huc stitching cmd
4479 MOS_ZeroMemory(&m_HucStitchCmdBatchBuffer, sizeof(m_HucStitchCmdBatchBuffer));
4480 m_HucStitchCmdBatchBuffer.bSecondLevel = true;
4481 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
4482 m_osInterface,
4483 &m_HucStitchCmdBatchBuffer,
4484 nullptr,
4485 m_hwInterface->m_HucStitchCmdBatchBufferSize));
4486 }
4487
4488 uint32_t aligned_width = MOS_ALIGN_CEIL(m_frameWidth, 64);
4489 uint32_t aligned_height = MOS_ALIGN_CEIL(m_frameHeight, 64);
4490 uint32_t num_lcu = (aligned_width * aligned_height) / (64 * 64);
4491
4492 MOS_ALLOC_GFXRES_PARAMS allocParamsForSurface;
4493 MOS_ZeroMemory(&allocParamsForSurface, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4494 allocParamsForSurface.Type = MOS_GFXRES_BUFFER;
4495 allocParamsForSurface.TileType = MOS_TILE_LINEAR;
4496 allocParamsForSurface.Format = Format_Buffer;
4497 allocParamsForSurface.dwBytes = num_lcu * 4;
4498 allocParamsForSurface.pBufName = "VDEnc Cumulative CU Count Streamout Surface";
4499
4500 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
4501 m_osInterface,
4502 &allocParamsForSurface,
4503 &m_vdencCumulativeCuCountStreamoutSurface),
4504 "Failed to allocate VDEnc Cumulative CU Count Streamout Surface");
4505
4506 return eStatus;
4507 }
4508
FreeResources()4509 void CodechalVdencVp9StateG12::FreeResources()
4510 {
4511 CodechalVdencVp9State::FreeResources();
4512
4513 MOS_FreeMemory(m_tileParams);
4514 if (m_isTilingSupported)
4515 {
4516 if (!Mos_ResourceIsNull(&m_vdencTileRowStoreBuffer))
4517 {
4518 m_osInterface->pfnFreeResource(
4519 m_osInterface,
4520 &m_vdencTileRowStoreBuffer);
4521 }
4522
4523 if (!Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource))
4524 {
4525 m_osInterface->pfnFreeResource(
4526 m_osInterface,
4527 &m_resPakcuLevelStreamoutData.sResource);
4528 }
4529
4530 if (!Mos_ResourceIsNull(&m_resPakSliceLevelStreamutData.sResource))
4531 {
4532 m_osInterface->pfnFreeResource(
4533 m_osInterface,
4534 &m_resPakSliceLevelStreamutData.sResource);
4535 }
4536
4537 // Release Hcp scalability Sync buffer
4538 if (!Mos_ResourceIsNull(&m_hcpScalabilitySyncBuffer.sResource))
4539 {
4540 m_osInterface->pfnFreeResource(
4541 m_osInterface,
4542 &m_hcpScalabilitySyncBuffer.sResource);
4543 }
4544
4545 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileRecordBuffer); i++)
4546 {
4547 if (!Mos_ResourceIsNull(&m_tileRecordBuffer[i].sResource))
4548 {
4549 m_osInterface->pfnFreeResource(
4550 m_osInterface,
4551 &m_tileRecordBuffer[i].sResource);
4552 }
4553 }
4554
4555 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileStatsPakIntegrationBuffer); i++)
4556 {
4557 if (!Mos_ResourceIsNull(&m_tileStatsPakIntegrationBuffer[i].sResource))
4558 {
4559 m_osInterface->pfnFreeResource(
4560 m_osInterface,
4561 &m_tileStatsPakIntegrationBuffer[i].sResource);
4562 }
4563 }
4564
4565 if (!Mos_ResourceIsNull(&m_frameStatsPakIntegrationBuffer.sResource))
4566 {
4567 m_osInterface->pfnFreeResource(
4568 m_osInterface,
4569 &m_frameStatsPakIntegrationBuffer.sResource);
4570 }
4571
4572 if (!Mos_ResourceIsNull(&m_hucPakIntBrcDataBuffer))
4573 {
4574 m_osInterface->pfnFreeResource(
4575 m_osInterface,
4576 &m_hucPakIntBrcDataBuffer);
4577 }
4578
4579 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
4580 {
4581 for (auto j = 0; j < m_brcMaxNumPasses; j++)
4582 {
4583 if (!Mos_ResourceIsNull(&m_hucPakIntDmemBuffer[i][j]))
4584 {
4585 m_osInterface->pfnFreeResource(
4586 m_osInterface,
4587 &m_hucPakIntDmemBuffer[i][j]);
4588 }
4589 }
4590 }
4591
4592 if (!Mos_ResourceIsNull(&m_hucPakIntDummyBuffer))
4593 {
4594 m_osInterface->pfnFreeResource(
4595 m_osInterface,
4596 &m_hucPakIntDummyBuffer);
4597 }
4598
4599 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_stitchWaitSemaphoreMem); i++)
4600 {
4601 if (!Mos_ResourceIsNull(&m_stitchWaitSemaphoreMem[i].sResource))
4602 {
4603 m_osInterface->pfnFreeResource(
4604 m_osInterface,
4605 &m_stitchWaitSemaphoreMem[i].sResource);
4606 }
4607 }
4608
4609 if (!Mos_ResourceIsNull(&m_pakIntDoneSemaphoreMem.sResource))
4610 {
4611 m_osInterface->pfnFreeResource(
4612 m_osInterface,
4613 &m_pakIntDoneSemaphoreMem.sResource);
4614 }
4615
4616 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_hucDoneSemaphoreMem); i++)
4617 {
4618 if (!Mos_ResourceIsNull(&m_hucDoneSemaphoreMem[i].sResource))
4619 {
4620 m_osInterface->pfnFreeResource(
4621 m_osInterface,
4622 &m_hucDoneSemaphoreMem[i].sResource);
4623 }
4624 }
4625
4626 for (auto i = 0; i < m_numUncompressedSurface; i++)
4627 {
4628 for (auto j = 0; j < CODECHAL_ENCODE_VP9_MAX_NUM_HCP_PIPE; j++)
4629 {
4630 for (auto k = 0; k < 3; k++)
4631 {
4632 PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[i][j][k];
4633
4634 if (!Mos_ResourceIsNull(&cmdBuffer->OsResource))
4635 {
4636 if (cmdBuffer->pCmdBase)
4637 {
4638 m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
4639 }
4640 m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
4641 }
4642 }
4643 }
4644 }
4645 }
4646
4647 if (m_enableTileStitchByHW)
4648 {
4649 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
4650 {
4651 for (auto j = 0; j < CODECHAL_ENCODE_VP9_BRC_MAX_NUM_OF_PASSES; j++)
4652 {
4653 // HuC stitching Data buffer
4654 m_osInterface->pfnFreeResource(
4655 m_osInterface,
4656 &m_resHucStitchDataBuffer[i][j]);
4657 }
4658 }
4659 //Second level BB for huc stitching cmd
4660 Mhw_FreeBb(m_osInterface, &m_HucStitchCmdBatchBuffer, nullptr);
4661 }
4662
4663 if (!Mos_ResourceIsNull(&m_vdencCumulativeCuCountStreamoutSurface))
4664 {
4665 m_osInterface->pfnFreeResource(
4666 m_osInterface,
4667 &m_vdencCumulativeCuCountStreamoutSurface);
4668 }
4669
4670 return;
4671 }
4672
SetRowstoreCachingOffsets()4673 MOS_STATUS CodechalVdencVp9StateG12::SetRowstoreCachingOffsets()
4674 {
4675 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4676 // Add row store cache support for VDENC Gen12.
4677 if (m_hwInterface->GetHcpInterface()->IsRowStoreCachingSupported())
4678 {
4679 //add row store cache support.
4680 MHW_VDBOX_ROWSTORE_PARAMS rowstoreParams;
4681 rowstoreParams.Mode = m_mode;
4682 rowstoreParams.dwPicWidth = m_frameWidth;
4683 rowstoreParams.ucChromaFormat = ToHCPChromaFormat(m_chromaFormat);
4684 rowstoreParams.ucBitDepthMinus8 = m_bitDepth * 2; // 0(8bit) -> 0, 1(10bit)->2, 2(12bit)->4
4685 m_hwInterface->SetRowstoreCachingOffsets(&rowstoreParams);
4686 }
4687 return eStatus;
4688 }
4689
Initialize(CodechalSetting * settings)4690 MOS_STATUS CodechalVdencVp9StateG12::Initialize(CodechalSetting * settings)
4691 {
4692 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4693 uint32_t maxRows = 1;
4694
4695 CODECHAL_ENCODE_FUNCTION_ENTER;
4696
4697 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::Initialize(settings));
4698
4699 GetSystemPipeNumberCommon();
4700
4701 if (MOS_VE_SUPPORTED(m_osInterface))
4702 {
4703 m_scalabilityState = (PCODECHAL_ENCODE_SCALABILITY_STATE)MOS_AllocAndZeroMemory(sizeof(CODECHAL_ENCODE_SCALABILITY_STATE));
4704 CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalabilityState);
4705 //scalability initialize
4706 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_InitializeState(m_scalabilityState, m_hwInterface));
4707 }
4708
4709 m_adaptiveRepakSupported = true;
4710 //This flag enables pak-only mode for RePak pass
4711 m_pakOnlyModeEnabledForLastPass = true;
4712
4713 maxRows = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_HEIGHT) / CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_HEIGHT;
4714 //Max num of rows = 4 by VP9 Spec
4715 maxRows = MOS_MIN(maxRows, 4);
4716
4717 //Max tile numbers = max of number tiles for single pipe or max muber of tiles for scalable pipes
4718 m_maxTileNumber = MOS_MAX((MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH) / CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH), m_numVdbox) * maxRows;
4719
4720 m_numPipe = m_numVdbox;
4721
4722 m_scalableMode = (m_numPipe > 1);
4723
4724 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRowstoreCachingOffsets());
4725
4726 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
4727
4728 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4729 MOS_STATUS eStatusKey = MOS_UserFeature_ReadValue_ID(
4730 nullptr,
4731 __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ENABLE_HW_STITCH,
4732 &userFeatureData,
4733 m_osInterface->pOsContext);
4734 m_enableTileStitchByHW = userFeatureData.i32Data ? true : false;
4735
4736 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4737 userFeatureData.i32Data = 1;
4738 userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
4739 MOS_UserFeature_ReadValue_ID(
4740 nullptr,
4741 __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_HUC_ENABLE_ID,
4742 &userFeatureData,
4743 m_osInterface->pOsContext);
4744 m_hucEnabled = (userFeatureData.i32Data) ? true : false;
4745
4746 //Enable single pass dynamic scaling by default
4747 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4748 userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
4749 userFeatureData.i32Data = 1;
4750 MOS_UserFeature_ReadValue_ID(
4751 nullptr,
4752 __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_SINGLE_PASS_DYS_ENABLE_ID,
4753 &userFeatureData,
4754 m_osInterface->pOsContext);
4755 m_dysVdencMultiPassEnabled = (userFeatureData.i32Data) ? false : true;
4756 m_singlePassDys = !m_dysVdencMultiPassEnabled;
4757
4758 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4759 userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
4760 userFeatureData.i32Data = 1;
4761 MOS_UserFeature_ReadValue_ID(
4762 nullptr,
4763 __MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID,
4764 &userFeatureData,
4765 m_osInterface->pOsContext);
4766 m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false;
4767 m_singleTaskPhaseSupportedInPak = m_singleTaskPhaseSupported;
4768 // For dynamic scaling, the SingleTaskPhaseSupported is set to true and it does not get restored
4769 // to the original value after encoding of the frame. So need to restore to the original state
4770 m_storeSingleTaskPhaseSupported = m_singleTaskPhaseSupported; //Save the SingleTaskPhase state here
4771
4772 // Multi-Pass BRC: currently disabled by default
4773 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4774 MOS_UserFeature_ReadValue_ID(
4775 nullptr,
4776 __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_MULTIPASS_BRC_ENABLE_ID,
4777 &userFeatureData,
4778 m_osInterface->pOsContext);
4779 m_multipassBrcSupported = (userFeatureData.i32Data) ? true : false;
4780
4781 m_vdencBrcStatsBufferSize = m_brcStatsBufSize;
4782 m_vdencBrcPakStatsBufferSize = m_brcPakStatsBufSize;
4783 m_brcHistoryBufferSize = m_brcHistoryBufSize;
4784 // HME enabled by default for VP9
4785 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4786 MOS_UserFeature_ReadValue_ID(
4787 NULL,
4788 __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ME_ENABLE_ID,
4789 &userFeatureData,
4790 m_osInterface->pOsContext);
4791 m_hmeSupported = (userFeatureData.i32Data) ? true : false;
4792
4793 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4794 MOS_UserFeature_ReadValue_ID(
4795 NULL,
4796 __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_16xME_ENABLE_ID,
4797 &userFeatureData,
4798 m_osInterface->pOsContext);
4799 m_16xMeSupported = (userFeatureData.i32Data) ? true : false;
4800
4801 // disable superHME when HME is disabled
4802 if (m_hmeSupported == false)
4803 {
4804 m_16xMeSupported = false;
4805 }
4806
4807 // UHME disabled
4808 m_32xMeSupported = false;
4809
4810 // VP9 uses a different streamin kernel
4811 m_useNonLegacyStreamin = true;
4812
4813 // Initialize kernel State
4814 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStates());
4815
4816 // Get max binding table count
4817 m_maxBtCount = GetMaxBtCount(); // Need to add the correct BTcount when HME is enabled
4818
4819 #if (_DEBUG || _RELEASE_INTERNAL)
4820 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4821 MOS_UserFeature_ReadValue_ID(
4822 nullptr,
4823 __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ENABLE_BRC_DLL,
4824 &userFeatureData,
4825 m_osInterface->pOsContext);
4826
4827 if (userFeatureData.i32Data)
4828 {
4829 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4830 MOS_UserFeature_ReadValue_ID(
4831 nullptr,
4832 __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ENABLE_BRC_DLL_CUSTOMPATH,
4833 &userFeatureData,
4834 m_osInterface->pOsContext);
4835
4836 if (!userFeatureData.i32Data)
4837 {
4838 CODECHAL_ENCODE_CHK_STATUS_RETURN(MosUtilities::MosLoadLibrary(VP9SWBRCLIB, &m_swBrcMode)); // Load Dependency (use on RS1)
4839 }
4840 else
4841 {
4842 char path_buffer[MAXPATH];
4843 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4844 MOS_ZeroMemory(path_buffer, MAXPATH);
4845 userFeatureData.StringData.pStringData = path_buffer;
4846 MOS_UserFeature_ReadValue_ID(
4847 nullptr,
4848 __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_BRC_DLL_PATH,
4849 &userFeatureData,
4850 m_osInterface->pOsContext);
4851 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnLoadLibrary(m_osInterface, path_buffer, &m_swBrcMode));
4852 }
4853 }
4854 #endif // (_DEBUG || _RELEASE_INTERNAL)
4855
4856 return eStatus;
4857 }
4858
4859 /*----------------------------------------------------------------------------
4860 | Name : GetSegmentBlockIndexInFrame
4861 | Purpose : Returns the offset of 32x32 block in the frame based on current x,y 32 block location in current tile
4862 |
4863 | Returns : MOS_STATUS
4864 \---------------------------------------------------------------------------*/
GetSegmentBlockIndexInFrame(uint32_t frameWidth,uint32_t curr32XInTile,uint32_t curr32YInTile,uint32_t currTileStartY64aligned,uint32_t currTileStartX64aligned)4865 uint32_t CodechalVdencVp9StateG12::GetSegmentBlockIndexInFrame(
4866 uint32_t frameWidth,
4867 uint32_t curr32XInTile,
4868 uint32_t curr32YInTile,
4869 uint32_t currTileStartY64aligned,
4870 uint32_t currTileStartX64aligned)
4871 {
4872 uint32_t frameWidthIn32 = MOS_ALIGN_CEIL(frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
4873 uint32_t curr32XInFrame = currTileStartX64aligned / 32 + curr32XInTile;
4874 uint32_t curr32YInFrame = currTileStartY64aligned / 32 + curr32YInTile;
4875 uint32_t curr32BlockInFrame = curr32YInFrame * frameWidthIn32 + curr32XInFrame;
4876 return curr32BlockInFrame;
4877 }
4878
4879 /*----------------------------------------------------------------------------
4880 | Name : InitZigZagToRasterLUTPerTile
4881 | Purpose : Rasterize a tile's 32 blocks' segmap indices, add to frame mapbuffer created for these indices
4882 |
4883 | Returns : MOS_STATUS
4884 \---------------------------------------------------------------------------*/
InitZigZagToRasterLUTPerTile(uint32_t tileHeight,uint32_t tileWidth,uint32_t currTileStartYInFrame,uint32_t currTileStartXInFrame)4885 MOS_STATUS CodechalVdencVp9StateG12::InitZigZagToRasterLUTPerTile(
4886 uint32_t tileHeight,
4887 uint32_t tileWidth,
4888 uint32_t currTileStartYInFrame,
4889 uint32_t currTileStartXInFrame)
4890 {
4891 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4892
4893 // Allocate space for zig-zag to raster LUT used for vdenc streamin (1 int32_t for every 32x32 block (pic 64 aligned))
4894 // We only do this when the 1st tile of new frame is being processed and keep it the same unless tile resolutions changed.
4895 // We keep this map around until sequence is finished, it's deleted at device destruction.
4896 if (currTileStartXInFrame == 0 && currTileStartYInFrame == 0)
4897 {
4898 if (m_mapBuffer) // free previous if it exists - it may exist if this isn't first seg streamin frame, but it's a new tile with different res
4899 {
4900 MOS_FreeMemory(m_mapBuffer);
4901 }
4902 // Allocate one integer space for each 32*32 block in the whole frame to hold the segmentation index.
4903 m_mapBuffer = (uint32_t*)MOS_AllocAndZeroMemory(
4904 (MOS_ALIGN_CEIL(m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32) *
4905 (MOS_ALIGN_CEIL(m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32) *
4906 sizeof(int32_t)); //Framewidth and height are 64 aligned already
4907 }
4908 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mapBuffer);
4909
4910 uint32_t align64Width32 = MOS_ALIGN_CEIL(tileWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
4911 uint32_t align64Height32 = MOS_ALIGN_CEIL(tileHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32;
4912 uint32_t* mapBufferZigZagPerTile = (uint32_t*)MOS_AllocAndZeroMemory(align64Width32*align64Height32 * sizeof(uint32_t));
4913 CODECHAL_ENCODE_CHK_NULL_RETURN(mapBufferZigZagPerTile);
4914
4915 m_segStreamInHeight = m_frameHeight;
4916 m_segStreamInWidth = m_frameWidth;
4917
4918 uint32_t dwCount32 = 0; //Number of 32 by 32 blocks that will be processed here
4919 for (uint32_t curr32YInTile = 0; curr32YInTile< align64Height32; curr32YInTile++)
4920 {
4921 for (uint32_t curr32XInTile = 0; curr32XInTile < align64Width32; curr32XInTile++)
4922 {
4923 mapBufferZigZagPerTile[dwCount32++] = GetSegmentBlockIndexInFrame(
4924 m_frameWidth,
4925 curr32XInTile,
4926 curr32YInTile,
4927 currTileStartYInFrame,
4928 currTileStartXInFrame);
4929 }
4930 }
4931
4932 // mapBufferZigZagPerTile ---> m_mapBuffer
4933 // | a b c d ... ---> | a b W X c d Y Z ....
4934 // | W X Y Z ...
4935 uint32_t num32blocks = align64Width32 * align64Height32;
4936 uint32_t tileOffsetIndex = m_32BlocksRasterized;
4937 for (uint32_t i = 0, dwRasterCount = 0; i < num32blocks; i += (align64Width32 * 2))
4938 {
4939 for (uint32_t j = i; j < i + (align64Width32 * 2); j += 4)
4940 {
4941 m_mapBuffer[j + tileOffsetIndex] = mapBufferZigZagPerTile[dwRasterCount++];
4942 m_mapBuffer[j + tileOffsetIndex + 1] = mapBufferZigZagPerTile[dwRasterCount++];
4943 }
4944 for (uint32_t j = i + 2; j < i + (align64Width32 * 2); j += 4)
4945 {
4946 m_mapBuffer[j + tileOffsetIndex] = mapBufferZigZagPerTile[dwRasterCount++];
4947 m_mapBuffer[j + tileOffsetIndex + 1] = mapBufferZigZagPerTile[dwRasterCount++];
4948 }
4949 }
4950 if (mapBufferZigZagPerTile) // free per tile map buffer as it has been rasterized and copied into the mapbuffer
4951 {
4952 MOS_FreeMemory(mapBufferZigZagPerTile);
4953 }
4954
4955 // ^ Zig-zag pattern filled to SB aligned (CEIL), if unaligned then we base seg ID address on previous row/column (data replication)
4956 uint32_t width32 = CODECHAL_GET_WIDTH_IN_BLOCKS(tileWidth, 32);
4957 if (width32 != align64Width32) // replicate last column
4958 {
4959 for (auto i = (align64Width32 * 2) - 1 - 2; i < num32blocks; i += (align64Width32 * 2))
4960 {
4961 m_mapBuffer[i + tileOffsetIndex] = m_mapBuffer[i + tileOffsetIndex - 1];
4962 m_mapBuffer[i + tileOffsetIndex + 2] = m_mapBuffer[i + tileOffsetIndex + 1];
4963 }
4964 }
4965
4966 uint32_t height32 = CODECHAL_GET_HEIGHT_IN_BLOCKS(tileHeight, 32);
4967 if (height32 != align64Height32) // replicate last row
4968 {
4969 for (auto i = num32blocks - (align64Width32 * 2) + 2; i < num32blocks; i += 4)
4970 {
4971 m_mapBuffer[i + tileOffsetIndex] = m_mapBuffer[i + tileOffsetIndex - 2];
4972 m_mapBuffer[i + tileOffsetIndex + 1] = m_mapBuffer[i + tileOffsetIndex + 1 - 2];
4973 }
4974 }
4975 //Index offset to be added to the buffer for the next tile depending on how many blocks were rasterized already in this tile
4976 m_32BlocksRasterized += dwCount32;
4977
4978 return eStatus;
4979 }
4980
CalculateVdencPictureStateCommandSize()4981 MOS_STATUS CodechalVdencVp9StateG12::CalculateVdencPictureStateCommandSize()
4982 {
4983 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4984
4985 CODECHAL_ENCODE_FUNCTION_ENTER;
4986
4987 MHW_VDBOX_STATE_CMDSIZE_PARAMS_G12 stateCmdSizeParams;
4988 uint32_t vdencPictureStatesSize = 0, vdencPicturePatchListSize = 0;
4989 stateCmdSizeParams.bHucDummyStream = true;
4990 m_hwInterface->GetHxxStateCommandSize(
4991 CODECHAL_ENCODE_MODE_VP9,
4992 &vdencPictureStatesSize,
4993 &vdencPicturePatchListSize,
4994 &stateCmdSizeParams);
4995
4996 m_defaultPictureStatesSize += vdencPictureStatesSize;
4997 m_defaultPicturePatchListSize += vdencPicturePatchListSize;
4998
4999 m_hwInterface->GetVdencStateCommandsDataSize(
5000 CODECHAL_ENCODE_MODE_VP9,
5001 &vdencPictureStatesSize,
5002 &vdencPicturePatchListSize);
5003
5004 m_defaultPictureStatesSize += vdencPictureStatesSize;
5005 m_defaultPicturePatchListSize += vdencPicturePatchListSize;
5006
5007 return eStatus;
5008 }
5009
CreateHcpPipeBufAddrParams(PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS pipeBufAddrParams)5010 PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS CodechalVdencVp9StateG12::CreateHcpPipeBufAddrParams(PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS pipeBufAddrParams)
5011 {
5012 pipeBufAddrParams = MOS_New(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS_G12);
5013
5014 return pipeBufAddrParams;
5015 }
5016
UpdateCmdBufAttribute(PMOS_COMMAND_BUFFER cmdBuffer,bool renderEngineInUse)5017 MOS_STATUS CodechalVdencVp9StateG12::UpdateCmdBufAttribute(
5018 PMOS_COMMAND_BUFFER cmdBuffer,
5019 bool renderEngineInUse)
5020 {
5021 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5022
5023 // should not be there. Will remove it in the next change
5024 CODECHAL_ENCODE_FUNCTION_ENTER;
5025 if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe)
5026 {
5027 PMOS_CMD_BUF_ATTRI_VE attriExt =
5028 (PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe);
5029
5030 memset(attriExt, 0, sizeof(MOS_CMD_BUF_ATTRI_VE));
5031 attriExt->bUseVirtualEngineHint =
5032 attriExt->VEngineHintParams.NeedSyncWithPrevious = !renderEngineInUse;
5033 }
5034
5035 return eStatus;
5036 }
5037
AddMediaVfeCmd(PMOS_COMMAND_BUFFER cmdBuffer,SendKernelCmdsParams * params)5038 MOS_STATUS CodechalVdencVp9StateG12::AddMediaVfeCmd(
5039 PMOS_COMMAND_BUFFER cmdBuffer,
5040 SendKernelCmdsParams *params)
5041 {
5042 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
5043
5044 MHW_VFE_PARAMS_G12 vfeParams = {};
5045 vfeParams.pKernelState = params->pKernelState;
5046 vfeParams.eVfeSliceDisable = MHW_VFE_SLICE_ALL;
5047 vfeParams.dwMaximumNumberofThreads = m_encodeVfeMaxThreads;
5048 vfeParams.bFusedEuDispatch = false; // legacy mode
5049
5050 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaVfeCmd(cmdBuffer, &vfeParams));
5051
5052 return MOS_STATUS_SUCCESS;
5053 }
5054
HuCVp9PakInt(PMOS_COMMAND_BUFFER cmdBuffer)5055 MOS_STATUS CodechalVdencVp9StateG12::HuCVp9PakInt(
5056 PMOS_COMMAND_BUFFER cmdBuffer)
5057 {
5058 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5059
5060 CODECHAL_ENCODE_FUNCTION_ENTER;
5061
5062 if (!IsFirstPipe())
5063 {
5064 return eStatus;
5065 }
5066
5067 CODECHAL_DEBUG_TOOL(
5068 uint32_t hucRegionSize[16] = { 0 };
5069 const char* hucRegionName[16] = { "\0" };
5070
5071 hucRegionName[0] = "_MultiPakStreamout_input";
5072 hucRegionSize[0] = m_tileStatsPakIntegrationBufferSize;
5073 hucRegionName[1] = "_IntegratedStreamout_output";
5074 hucRegionSize[1] = m_frameStatsPakIntegrationBufferSize;
5075 hucRegionName[4] = "_HCPPICSTATEInputDummy";
5076 hucRegionSize[4] = sizeof(m_hucPakIntDummyBuffer);
5077 hucRegionName[5] = "_HCPPICSTATEInputDummy";
5078 hucRegionSize[5] = sizeof(m_hucPakIntDummyBuffer);
5079 hucRegionName[6] = "_HCPPICSTATEInputDummy";
5080 hucRegionSize[6] = sizeof(m_hucPakIntDummyBuffer);
5081 hucRegionName[7] = "_HCPPICSTATEInputDummy";
5082 hucRegionSize[7] = sizeof(m_hucPakIntDummyBuffer);
5083 hucRegionName[8] = "_HucStitchDataBuffer";
5084 hucRegionSize[8] = MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE);
5085 hucRegionName[9] = "_BrcDataOutputBuffer"; // This is the pak MMIO region 7 , not 4, of BRC update
5086 hucRegionSize[9] = MOS_ALIGN_CEIL(CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE, CODECHAL_PAGE_SIZE);
5087 hucRegionName[15] = "_TileRecordBuffer";
5088 hucRegionSize[15] = m_maxTileNumber * MOS_ALIGN_CEIL(m_hcpInterface->GetPakHWTileSizeRecordSize(), CODECHAL_CACHELINE_SIZE);
5089 )
5090
5091 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
5092 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
5093 imemParams.dwKernelDescriptor = m_vdboxHucPakIntegrationKernelDescriptor;
5094 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(cmdBuffer, &imemParams));
5095
5096 // pipe mode select
5097 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
5098 pipeModeSelectParams.Mode = m_mode;
5099 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
5100
5101 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakInt());
5102
5103 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
5104 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
5105 dmemParams.presHucDataSource = &m_hucPakIntDmemBuffer[m_currRecycledBufIdx][GetCurrentPass()];
5106 dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucPakIntDmem), CODECHAL_CACHELINE_SIZE);
5107 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
5108 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(cmdBuffer, &dmemParams));
5109
5110 if (m_enableTileStitchByHW)
5111 {
5112 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
5113 }
5114
5115 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
5116 MOS_ZeroMemory(&virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
5117 virtualAddrParams.regionParams[0].presRegion = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource; // Region 0 - Tile based input statistics from PAK/ VDEnc
5118 virtualAddrParams.regionParams[0].dwOffset = 0;
5119 virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource; // Region 1 - HuC Frame statistics output
5120 virtualAddrParams.regionParams[1].isWritable = true;
5121 virtualAddrParams.regionParams[4].presRegion = &m_hucPakIntDummyBuffer; // Region 4 - Not used for VP9
5122 virtualAddrParams.regionParams[5].presRegion = &m_hucPakIntDummyBuffer; // Region 5 - Not used for VP9
5123 virtualAddrParams.regionParams[5].isWritable = true;
5124 virtualAddrParams.regionParams[6].presRegion = &m_hucPakIntDummyBuffer; // Region 6 - Not used for VP9
5125 virtualAddrParams.regionParams[6].isWritable = true;
5126 virtualAddrParams.regionParams[7].presRegion = &m_hucPakIntDummyBuffer; // Region 7 - Not used for VP9
5127 if (m_enableTileStitchByHW)
5128 {
5129 virtualAddrParams.regionParams[8].presRegion = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][GetCurrentPass()]; // Region 8 - data buffer read by HUC for stitching cmd generation
5130 virtualAddrParams.regionParams[8].isWritable = true;
5131 }
5132 virtualAddrParams.regionParams[9].presRegion = &m_hucPakIntBrcDataBuffer; // Region 9 - HuC outputs BRC data
5133 virtualAddrParams.regionParams[9].isWritable = true;
5134 if (m_enableTileStitchByHW)
5135 {
5136 virtualAddrParams.regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource; // Region 10 - SLB for stitching cmd output from Huc
5137 virtualAddrParams.regionParams[10].isWritable = true;
5138 }
5139 virtualAddrParams.regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource; // Region 15 [In/Out] - Tile Record Buffer
5140 virtualAddrParams.regionParams[15].dwOffset = 0;
5141
5142 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(cmdBuffer, &virtualAddrParams));
5143
5144 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(cmdBuffer));
5145
5146 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(cmdBuffer, true));
5147
5148 // wait Huc completion (use HEVC bit for now)
5149 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
5150 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
5151 vdPipeFlushParams.Flags.bFlushHEVC = 1;
5152 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
5153 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams));
5154
5155 // Flush the engine to ensure memory written out
5156 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
5157 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
5158 flushDwParams.bVideoPipelineCacheInvalidate = true;
5159 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
5160
5161 auto mmioRegisters = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1);
5162 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, cmdBuffer, false));
5163 CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(cmdBuffer));
5164
5165 CODECHAL_DEBUG_TOOL(
5166 // Dump input Pak Integration buffers before running HuC
5167 m_debugInterface->DumpHucRegion(
5168 virtualAddrParams.regionParams[0].presRegion,
5169 0,
5170 hucRegionSize[0],
5171 0,
5172 "_PakIntStitchBuffer",
5173 (virtualAddrParams.regionParams[0].isWritable ? true : false),
5174 GetCurrentPass(),
5175 CodechalHucRegionDumpType::hucRegionDumpPakIntegrate);
5176
5177 m_debugInterface->DumpHucDmem(
5178 &m_hucPakIntDmemBuffer[m_currRecycledBufIdx][GetCurrentPass()],
5179 sizeof(HucPakIntDmem),
5180 GetCurrentPass(),
5181 CodechalHucRegionDumpType::hucRegionDumpPakIntegrate);
5182
5183 for (auto i = 0; i < 16; i++)
5184 {
5185 if (virtualAddrParams.regionParams[i].presRegion)
5186 {
5187 if (m_scalableMode && m_isTilingSupported && virtualAddrParams.regionParams[i].isWritable && i != 11)
5188 {
5189 continue;
5190 }
5191 m_debugInterface->DumpHucRegion(
5192 virtualAddrParams.regionParams[i].presRegion,
5193 virtualAddrParams.regionParams[i].dwOffset,
5194 hucRegionSize[i],
5195 i,
5196 hucRegionName[i],
5197 !virtualAddrParams.regionParams[i].isWritable,
5198 GetCurrentPass(),
5199 CodechalHucRegionDumpType::hucRegionDumpPakIntegrate);
5200 }
5201 }
5202 )
5203
5204 return eStatus;
5205 }
5206
ConstructPicStateBatchBuf(PMOS_RESOURCE picStateBuffer)5207 MOS_STATUS CodechalVdencVp9StateG12::ConstructPicStateBatchBuf(
5208 PMOS_RESOURCE picStateBuffer)
5209 {
5210 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5211
5212 CODECHAL_ENCODE_FUNCTION_ENTER;
5213
5214 CODECHAL_ENCODE_CHK_NULL_RETURN(picStateBuffer);
5215
5216 MOS_COMMAND_BUFFER cmdBuffer;
5217 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
5218
5219 if (!m_singleTaskPhaseSupported || m_firstTaskInPhase)
5220 {
5221 // Send command buffer header at the beginning (OS dependent)
5222 bool requestFrameTracking = false;
5223 if (!m_vp9PicParams->PicFlags.fields.super_frame) {
5224 requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
5225 }
5226 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
5227 m_firstTaskInPhase = false;
5228 }
5229
5230 ReturnCommandBuffer(&cmdBuffer);
5231
5232 MOS_LOCK_PARAMS lockFlagsWriteOnly;
5233 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
5234 lockFlagsWriteOnly.WriteOnly = 1;
5235 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, picStateBuffer, &lockFlagsWriteOnly);
5236 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
5237
5238 MOS_COMMAND_BUFFER constructedCmdBuf;
5239 MOS_ZeroMemory(&constructedCmdBuf, sizeof(constructedCmdBuf));
5240 constructedCmdBuf.pCmdBase = (uint32_t *)data;
5241 constructedCmdBuf.pCmdPtr = (uint32_t *)data;
5242 constructedCmdBuf.iOffset = 0;
5243 constructedCmdBuf.iRemaining = m_vdencPicStateSecondLevelBatchBufferSize;
5244
5245 eStatus = AddCommandsVp9(CODECHAL_CMD1, &constructedCmdBuf);
5246 if (eStatus != MOS_STATUS_SUCCESS)
5247 {
5248 m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
5249 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add CODECHAL_CMD1 command.");
5250 return eStatus;
5251 }
5252
5253 // HCP_VP9_PIC_STATE
5254 MHW_VDBOX_VP9_ENCODE_PIC_STATE picState;
5255 MOS_ZeroMemory(&picState, sizeof(picState));
5256 picState.pVp9PicParams = m_vp9PicParams;
5257 picState.pVp9SeqParams = m_vp9SeqParams;
5258 picState.ppVp9RefList = &(m_refList[0]);
5259 picState.PrevFrameParams.fields.KeyFrame = m_prevFrameInfo.KeyFrame;
5260 picState.PrevFrameParams.fields.IntraOnly = m_prevFrameInfo.IntraOnly;
5261 picState.PrevFrameParams.fields.Display = m_prevFrameInfo.ShowFrame;
5262 picState.dwPrevFrmWidth = m_prevFrameInfo.FrameWidth;
5263 picState.dwPrevFrmHeight = m_prevFrameInfo.FrameHeight;
5264 picState.ucTxMode = m_txMode;
5265 picState.bSSEEnable = m_vdencBrcEnabled;
5266 picState.bUseDysRefSurface = (m_dysRefFrameFlags != DYS_REF_NONE) && m_dysVdencMultiPassEnabled;
5267 picState.bVdencPakOnlyPassFlag = m_vdencPakonlyMultipassEnabled;
5268 picState.uiMaxBitRate = m_vp9SeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS;
5269 picState.uiMinBitRate = m_vp9SeqParams->MinBitRate * CODECHAL_ENCODE_BRC_KBPS;
5270 m_hucPicStateOffset = (uint16_t)constructedCmdBuf.iOffset;
5271
5272 eStatus = m_hcpInterface->AddHcpVp9PicStateEncCmd(&constructedCmdBuf, nullptr, &picState);
5273 if (eStatus != MOS_STATUS_SUCCESS)
5274 {
5275 m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
5276 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add HCP_VP9_PIC_STATE command.");
5277 return eStatus;
5278 }
5279
5280 // HCP_VP9_SEGMENT_STATE
5281 MHW_VDBOX_VP9_SEGMENT_STATE segmentState;
5282 MOS_ZeroMemory(&segmentState, sizeof(segmentState));
5283 segmentState.Mode = m_mode;
5284 segmentState.pVp9EncodeSegmentParams = m_vp9SegmentParams;
5285 uint8_t segmentCount = (m_vp9PicParams->PicFlags.fields.segmentation_enabled) ? CODEC_VP9_MAX_SEGMENTS : 1;
5286
5287 for (uint8_t i = 0; i < segmentCount; i++)
5288 {
5289 segmentState.ucCurrentSegmentId = i;
5290 eStatus = m_hcpInterface->AddHcpVp9SegmentStateCmd(&constructedCmdBuf, nullptr, &segmentState);
5291 if (eStatus != MOS_STATUS_SUCCESS)
5292 {
5293 m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
5294 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add MHW_VDBOX_VP9_SEGMENT_STATE command.");
5295 return eStatus;
5296 }
5297 }
5298
5299 // Adjust cmd buffer offset to have 8 segment state blocks
5300 if (segmentCount < CODEC_VP9_MAX_SEGMENTS)
5301 {
5302 // Max 7 segments, 32 bytes each
5303 uint8_t zeroBlock[m_segmentStateBlockSize * (CODEC_VP9_MAX_SEGMENTS - 1)];
5304 MOS_ZeroMemory(zeroBlock, sizeof(zeroBlock));
5305 Mhw_AddCommandCmdOrBB(m_osInterface, &constructedCmdBuf, nullptr, zeroBlock, (CODEC_VP9_MAX_SEGMENTS - segmentCount) * m_segmentStateBlockSize);
5306 }
5307
5308 m_slbbImgStateOffset = (uint16_t)constructedCmdBuf.iOffset;
5309 eStatus = AddCommandsVp9(CODECHAL_CMD2, &constructedCmdBuf);
5310 if (eStatus != MOS_STATUS_SUCCESS)
5311 {
5312 m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
5313 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add CODECHAL_CMD2 command.");
5314 return eStatus;
5315 }
5316
5317 // BB_END
5318 eStatus = m_miInterface->AddMiBatchBufferEnd(&constructedCmdBuf, nullptr);
5319 if (eStatus != MOS_STATUS_SUCCESS)
5320 {
5321 m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
5322 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add MI Batch Buffer End command.");
5323 return eStatus;
5324 }
5325 m_hucSlbbSize = (uint16_t)constructedCmdBuf.iOffset;
5326
5327 m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
5328
5329 return eStatus;
5330 }
5331
HuCVp9Prob()5332 MOS_STATUS CodechalVdencVp9StateG12::HuCVp9Prob()
5333 {
5334 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5335
5336 CODECHAL_ENCODE_FUNCTION_ENTER;
5337 if (!IsFirstPipe())
5338 {
5339 return eStatus;
5340 }
5341
5342 CODECHAL_DEBUG_TOOL(
5343 uint32_t hucRegionSize[16] = { 0 };
5344 const char* hucRegionName[16] = { "\0" };
5345
5346 hucRegionName[0] = "_UpdatedProbBuffer"; // hucRegionName[0] is used to dump region 0 after HuC is run, which has updated probabilities. Input Region 0 is dumped separetely before HuC.
5347 hucRegionSize[0] = 32 * CODECHAL_CACHELINE_SIZE;
5348 hucRegionName[1] = "_CountersBuffer";
5349 hucRegionSize[1] = 193 * CODECHAL_CACHELINE_SIZE;
5350 hucRegionName[2] = "_ProbBuffer";
5351 hucRegionSize[2] = 32 * CODECHAL_CACHELINE_SIZE;
5352 hucRegionName[3] = "_ProbDeltaBuffer";
5353 hucRegionSize[3] = 29 * CODECHAL_CACHELINE_SIZE;
5354 hucRegionName[4] = "_UncompressedHdr";
5355 hucRegionSize[4] = CODECHAL_ENCODE_VP9_PAK_INSERT_UNCOMPRESSED_HEADER;
5356 hucRegionName[5] = "_CompressedHdr";
5357 hucRegionSize[5] = 32 * CODECHAL_CACHELINE_SIZE;
5358 hucRegionName[6] = "_SecondLevelBatchBuffer";
5359 hucRegionSize[6] = m_vdencPicStateSecondLevelBatchBufferSize;
5360 hucRegionName[7] = "_SecondLevelBatchBuffer";
5361 hucRegionSize[7] = m_vdencPicStateSecondLevelBatchBufferSize;
5362 hucRegionName[8] = "_UncompressedHdr";
5363 hucRegionSize[8] = CODECHAL_ENCODE_VP9_PAK_INSERT_UNCOMPRESSED_HEADER;
5364 hucRegionName[9] = "_DefaultProbs";
5365 hucRegionSize[9] = sizeof(Keyframe_Default_Probs) + sizeof(Inter_Default_Probs);
5366 hucRegionName[10] = "_SuperFrameBuffer";
5367 hucRegionSize[10] = CODECHAL_ENCODE_VP9_BRC_SUPER_FRAME_BUFFER_SIZE;
5368 hucRegionName[11] = "_DataExtension";
5369 hucRegionSize[11] = CODECHAL_ENCODE_VP9_VDENC_DATA_EXTENSION_SIZE;
5370 )
5371
5372 MOS_COMMAND_BUFFER cmdBuffer;
5373 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
5374
5375 if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && !m_scalableMode)
5376 {
5377 bool requestFrameTracking = false;
5378 // Send command buffer header at the beginning (OS dependent)
5379 // frame tracking tag is only added in the last command buffer header
5380 requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
5381 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
5382 m_firstTaskInPhase = false;
5383 }
5384 // Collect of HuC BRC Update kernel performance data
5385 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
5386
5387 int currPass = GetCurrentPass();
5388 if (m_scalableMode && m_isTilingSupported)
5389 {
5390 // Define huc done semaphore to be empty at the start
5391 for (auto i = 0; i < m_numPipe; i++)
5392 {
5393 SetSemaphoreMem(&m_hucDoneSemaphoreMem[i].sResource, &cmdBuffer, 0);
5394 }
5395 // Wait here for pak int done from previous pass
5396 if (IsLastPass())
5397 {
5398 SendHWWaitCommand(&m_pakIntDoneSemaphoreMem.sResource, &cmdBuffer, currPass);
5399 SetSemaphoreMem(&m_pakIntDoneSemaphoreMem.sResource, &cmdBuffer, 0);
5400 }
5401 }
5402
5403 // load kernel from WOPCM into L2 storage RAM
5404 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
5405 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
5406 imemParams.dwKernelDescriptor = m_vdboxHucVp9VdencProbKernelDescriptor;
5407 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
5408
5409 // pipe mode select
5410 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
5411 pipeModeSelectParams.Mode = m_mode;
5412 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
5413
5414 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCVp9Prob());
5415
5416 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
5417 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
5418 dmemParams.presHucDataSource = &m_resHucProbDmemBuffer[currPass][m_currRecycledBufIdx];
5419 dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucProbDmem), CODECHAL_CACHELINE_SIZE);
5420 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
5421 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
5422
5423 // Add Virtual addr
5424 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
5425 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
5426 // Input regions
5427 virtualAddrParams.regionParams[0].presRegion = &m_resProbBuffer[m_vp9PicParams->PicFlags.fields.frame_context_idx];
5428 virtualAddrParams.regionParams[0].isWritable = true; // Region 0 is both read and write for HuC. Has input probabilities before running HuC and updated probabilities after running HuC, which will then be input to next pass
5429 if (m_scalableMode)
5430 {
5431 virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
5432 virtualAddrParams.regionParams[1].dwOffset = m_frameStatsOffset.counterBuffer;
5433 }
5434 else
5435 {
5436 virtualAddrParams.regionParams[1].presRegion = &m_resProbabilityCounterBuffer;
5437 virtualAddrParams.regionParams[1].dwOffset = 0;
5438 }
5439 // If BRC enabled, BRC Pass 2 output SLBB -> input SLBB for HPU on pass 2 (HPU pass 1 and 3. BRC Update pass 1 and 2)
5440 // BRC Pass 1 output SLBB -> input SLBB for HPU on pass 1
5441 // If BRC not on, Driver prepared SLBB -> input to HPU on both passes
5442
5443 if (m_vdencBrcEnabled)
5444 {
5445 virtualAddrParams.regionParams[7].presRegion = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
5446 }
5447 else
5448 {
5449 virtualAddrParams.regionParams[7].presRegion = &m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
5450 }
5451
5452 virtualAddrParams.regionParams[8].presRegion = &m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx];
5453 virtualAddrParams.regionParams[9].presRegion = &m_resHucDefaultProbBuffer;
5454
5455 // Output regions
5456 virtualAddrParams.regionParams[2].presRegion = &m_resHucProbOutputBuffer; // Final probability output from HuC after each pass
5457 virtualAddrParams.regionParams[2].isWritable = true;
5458 virtualAddrParams.regionParams[3].presRegion = &m_resProbabilityDeltaBuffer;
5459 virtualAddrParams.regionParams[3].isWritable = true;
5460 virtualAddrParams.regionParams[4].presRegion = &m_resHucPakInsertUncompressedHeaderWriteBuffer;
5461 virtualAddrParams.regionParams[4].isWritable = true;
5462 virtualAddrParams.regionParams[5].presRegion = &m_resCompressedHeaderBuffer;
5463 virtualAddrParams.regionParams[5].isWritable = true;
5464 virtualAddrParams.regionParams[6].presRegion = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
5465 virtualAddrParams.regionParams[6].isWritable = true;
5466 virtualAddrParams.regionParams[10].presRegion = &m_resBitstreamBuffer;
5467 virtualAddrParams.regionParams[10].isWritable = true;
5468 virtualAddrParams.regionParams[11].presRegion = &m_resVdencDataExtensionBuffer;
5469 virtualAddrParams.regionParams[11].isWritable = true;
5470
5471 m_hpuVirtualAddrParams = virtualAddrParams;
5472 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
5473 // Store HUC_STATUS2 register bit 6 before HUC_Start command
5474 // This bit will be cleared by HW at the end of a HUC workload
5475 // (HUC_Start command with last start bit set).
5476 CODECHAL_DEBUG_TOOL(
5477 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
5478 )
5479
5480 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
5481
5482 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
5483
5484 // wait Huc completion (use HEVC bit for now)
5485 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
5486 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
5487 vdPipeFlushParams.Flags.bFlushHEVC = 1;
5488 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
5489 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
5490
5491 // Flush the engine to ensure memory written out
5492 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
5493 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
5494 flushDwParams.bVideoPipelineCacheInvalidate = true;
5495 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
5496
5497 // Write HUC_STATUS mask: DW1 (mask value)
5498 MHW_MI_STORE_DATA_PARAMS storeDataParams;
5499 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
5500 storeDataParams.pOsResource = &m_resHucPakMmioBuffer;
5501 storeDataParams.dwResourceOffset = sizeof(uint32_t);
5502 storeDataParams.dwValue = 1 << 31; //Repak bit for HUC is bit 31
5503 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
5504
5505 // store HUC_STATUS register
5506 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
5507 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
5508 storeRegParams.presStoreBuffer = &m_resHucPakMmioBuffer;
5509 storeRegParams.dwOffset = 0;
5510 storeRegParams.dwRegister = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1)->hucStatusRegOffset;
5511 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
5512
5513 auto mmioRegisters = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1);
5514 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, &cmdBuffer, false));
5515 CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(&cmdBuffer));
5516
5517 // In case of other pipes running other tiles, signal the vdenc/pak hw commands there to proceed because huc done
5518 if (m_scalableMode && m_isTilingSupported)
5519 {
5520 for (auto i = 1; i < m_numPipe; i++)
5521 {
5522 if (!Mos_ResourceIsNull(&m_hucDoneSemaphoreMem[i].sResource))
5523 {
5524 CODECHAL_ENCODE_CHK_STATUS_RETURN(
5525 SetSemaphoreMem(
5526 &m_hucDoneSemaphoreMem[i].sResource,
5527 &cmdBuffer,
5528 (currPass + 1))
5529 );
5530 }
5531 }
5532 }
5533
5534 // For superframe pass, after HuC executes, write the updated size (combined frame size) to status report
5535 // So app knows total size instead of just the showframe size
5536 if (m_superFrameHucPass)
5537 {
5538 EncodeStatusBuffer* encodeStatusBuf = &m_encodeStatusBuf;
5539 uint32_t baseOffset =
5540 (encodeStatusBuf->wCurrIndex * m_encodeStatusBuf.dwReportSize) +
5541 sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource
5542
5543 MHW_MI_COPY_MEM_MEM_PARAMS copyMemMemParams;
5544 MOS_ZeroMemory(©MemMemParams, sizeof(copyMemMemParams));
5545
5546 copyMemMemParams.presSrc = virtualAddrParams.regionParams[11].presRegion;
5547 copyMemMemParams.dwSrcOffset = 0; // Updated framesize is 1st DW in buffer
5548 copyMemMemParams.presDst = &encodeStatusBuf->resStatusBuffer;
5549 copyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf->dwBSByteCountOffset;
5550
5551 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(
5552 &cmdBuffer,
5553 ©MemMemParams));
5554 }
5555 // Ending collect of HuC BRC Update kernel performance data
5556 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
5557
5558 if ((!m_singleTaskPhaseSupported && !m_scalableMode) || m_superFrameHucPass)
5559 {
5560 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
5561 }
5562
5563 // Dump input probabilites before running HuC
5564 CODECHAL_DEBUG_TOOL(
5565 CodechalHucRegionDumpType dumpType = m_superFrameHucPass ? CodechalHucRegionDumpType::hucRegionDumpHpuSuperFrame : CodechalHucRegionDumpType::hucRegionDumpHpu;
5566 m_debugInterface->DumpHucRegion(
5567 virtualAddrParams.regionParams[0].presRegion,
5568 0,
5569 hucRegionSize[0],
5570 0,
5571 "_ProbBuffer",
5572 (virtualAddrParams.regionParams[0].isWritable ? true : false),
5573 currPass,
5574 dumpType);
5575 )
5576
5577 ReturnCommandBuffer(&cmdBuffer);
5578
5579 // For Temporal scaling, super frame pass is initiated after the command buffer submission in ExecuteSliceLevel.
5580 // So if Single Task Phase is enabled, then we need to explicitly submit the command buffer here to call HuC
5581 if ((!m_singleTaskPhaseSupported && !m_scalableMode) || m_superFrameHucPass)
5582 {
5583 bool renderFlags = m_videoContextUsesNullHw;
5584
5585 CODECHAL_DEBUG_TOOL(
5586 std::string nameCmdPass = (m_superFrameHucPass ? "HPU_SuperFramePass" : "HPU_Pass") + std::to_string(currPass);
5587
5588 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
5589 &cmdBuffer,
5590 CODECHAL_NUM_MEDIA_STATES,
5591 nameCmdPass.c_str()));
5592 )
5593
5594 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
5595 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, m_vp9PicParams->PicFlags.fields.super_frame));
5596 ReturnCommandBuffer(&cmdBuffer);
5597
5598 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderFlags));
5599
5600 CODECHAL_DEBUG_TOOL(
5601 CodechalHucRegionDumpType dumpType = m_superFrameHucPass ? CodechalHucRegionDumpType::hucRegionDumpHpuSuperFrame : CodechalHucRegionDumpType::hucRegionDumpHpu;
5602 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
5603 &m_resHucProbDmemBuffer[currPass][m_currRecycledBufIdx],
5604 sizeof(HucProbDmem),
5605 currPass,
5606 dumpType));
5607
5608 for (auto i = 0; i < 16; i++)
5609 {
5610 if (virtualAddrParams.regionParams[i].presRegion)
5611 {
5612 if (m_scalableMode && m_isTilingSupported && virtualAddrParams.regionParams[i].isWritable && i != 11)
5613 {
5614 continue;
5615 }
5616 m_debugInterface->DumpHucRegion(
5617 virtualAddrParams.regionParams[i].presRegion,
5618 virtualAddrParams.regionParams[i].dwOffset,
5619 hucRegionSize[i],
5620 i,
5621 hucRegionName[i],
5622 !virtualAddrParams.regionParams[i].isWritable,
5623 currPass,
5624 dumpType);
5625 }
5626 })
5627 }
5628
5629 return eStatus;
5630 }
5631
5632 /*----------------------------------------------------------------------------
5633 | Name : HuCBrcInitReset
5634 | Purpose : Start/Submit VP9 HuC BrcInit kernel to HW
5635 |
5636 | Returns : MOS_STATUS
5637 \---------------------------------------------------------------------------*/
HuCBrcInitReset()5638 MOS_STATUS CodechalVdencVp9StateG12::HuCBrcInitReset()
5639 {
5640 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5641
5642 CODECHAL_ENCODE_FUNCTION_ENTER;
5643
5644 int currPass = GetCurrentPass();
5645
5646 CODECHAL_DEBUG_TOOL(
5647 uint32_t hucRegionSize[16];
5648 const char* hucRegionName[16];
5649
5650 hucRegionName[0] = "_BrcHistoryBuffer";
5651 hucRegionSize[0] = m_brcHistoryBufferSize;
5652 )
5653
5654 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
5655 #if (_DEBUG || _RELEASE_INTERNAL)
5656 if (m_swBrcMode)
5657 {
5658 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
5659 CODECHAL_ENCODE_CHK_STATUS_RETURN(SoftwareBRC(false));
5660 // Set region params for dumping only
5661 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
5662 virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
5663 virtualAddrParams.regionParams[0].isWritable = true;
5664 m_inputBitsPerFrame = ((m_vp9SeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS) * 100.) / ((m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiNumerator * 100.) / m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiDenominator);
5665 m_curTargetFullness = m_vp9SeqParams->TargetBitRate[m_vp9SeqParams->NumTemporalLayersMinus1] * CODECHAL_ENCODE_BRC_KBPS;
5666
5667 CODECHAL_DEBUG_TOOL(
5668 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
5669 &m_resVdencBrcInitDmemBuffer,
5670 sizeof(HucBrcInitDmem),
5671 0,
5672 CodechalHucRegionDumpType::hucRegionDumpInit));
5673
5674 for (auto i = 0; i < 16; i++)
5675 {
5676 if (virtualAddrParams.regionParams[i].presRegion)
5677 {
5678 m_debugInterface->DumpHucRegion(
5679 virtualAddrParams.regionParams[i].presRegion,
5680 virtualAddrParams.regionParams[i].dwOffset,
5681 hucRegionSize[i],
5682 i,
5683 hucRegionName[i],
5684 !virtualAddrParams.regionParams[i].isWritable,
5685 currPass,
5686 CodechalHucRegionDumpType::hucRegionDumpInit);
5687 }
5688 }
5689 )
5690 return eStatus;
5691 }
5692 #endif
5693 MOS_COMMAND_BUFFER cmdBuffer;
5694 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
5695
5696 if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && !m_scalableMode)
5697 {
5698 // Send command buffer header at the beginning (OS dependent)
5699 bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : false;
5700 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
5701
5702 m_firstTaskInPhase = false;
5703 }
5704 // Collect HuC Init/Reset kernel performance data
5705 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
5706
5707 // load kernel from WOPCM into L2 storage RAM
5708 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
5709 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
5710 imemParams.dwKernelDescriptor = m_vdboxHucVp9VdencBrcInitKernelDescriptor;
5711 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
5712
5713 // pipe mode select
5714 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
5715 pipeModeSelectParams.Mode = m_mode;
5716 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
5717
5718 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
5719
5720 m_inputBitsPerFrame = ((m_vp9SeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS) * 100.) / ((m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiNumerator * 100.) / m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiDenominator);
5721 m_curTargetFullness = m_vp9SeqParams->TargetBitRate[m_vp9SeqParams->NumTemporalLayersMinus1] * CODECHAL_ENCODE_BRC_KBPS;
5722
5723 // set HuC DMEM param
5724 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
5725 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
5726 dmemParams.presHucDataSource = &m_resVdencBrcInitDmemBuffer;
5727 dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucBrcInitDmem), CODECHAL_CACHELINE_SIZE);
5728 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
5729 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
5730
5731 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
5732 virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
5733 virtualAddrParams.regionParams[0].isWritable = true;
5734 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
5735
5736 // Store HUC_STATUS2 register bit 6 before HUC_Start command
5737 // This bit will be cleared by HW at the end of a HUC workload
5738 // (HUC_Start command with last start bit set).
5739 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
5740
5741 // HuC Status 2 report in Status Report
5742 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
5743
5744 // Check HuC_STATUS2 bit6, if bit6 > 0 HW continue execution following cmd, otherwise it send a COND BB END cmd.
5745 uint32_t compareOperation = mhw_mi_g12_X::MI_CONDITIONAL_BATCH_BUFFER_END_CMD::COMPARE_OPERATION_MADGREATERTHANIDD;
5746 auto hwInterface = dynamic_cast<CodechalHwInterfaceG12 *>(m_hwInterface);
5747 CODECHAL_ENCODE_CHK_NULL_RETURN(hwInterface);
5748 CODECHAL_ENCODE_CHK_STATUS_RETURN(hwInterface->SendCondBbEndCmd(
5749 &m_resHucStatus2Buffer, 0, 0, false, false, compareOperation, &cmdBuffer));
5750
5751 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
5752
5753 // wait Huc completion (use HEVC bit for now)
5754 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
5755 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
5756 vdPipeFlushParams.Flags.bFlushHEVC = 1;
5757 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
5758 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
5759
5760 // Flush the engine to ensure memory written out
5761 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
5762 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
5763 flushDwParams.bVideoPipelineCacheInvalidate = true;
5764 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
5765
5766 auto mmioRegisters = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1);
5767 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, &cmdBuffer, false));
5768 CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(&cmdBuffer));
5769 // End: Collect HuC Init/Reset kernel performance data
5770 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
5771
5772 if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd) && !m_scalableMode)
5773 {
5774 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
5775 }
5776
5777 ReturnCommandBuffer(&cmdBuffer);
5778
5779 if (!m_singleTaskPhaseSupported)
5780 {
5781 bool renderingFlags = m_videoContextUsesNullHw;
5782
5783 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
5784
5785 CODECHAL_DEBUG_TOOL(
5786 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
5787 &m_resVdencBrcInitDmemBuffer,
5788 sizeof(HucBrcInitDmem),
5789 0,
5790 CodechalHucRegionDumpType::hucRegionDumpInit));
5791
5792 for (auto i = 0; i < 16; i++)
5793 {
5794 if (virtualAddrParams.regionParams[i].presRegion)
5795 {
5796 m_debugInterface->DumpHucRegion(
5797 virtualAddrParams.regionParams[i].presRegion,
5798 virtualAddrParams.regionParams[i].dwOffset,
5799 hucRegionSize[i],
5800 i,
5801 hucRegionName[i],
5802 !virtualAddrParams.regionParams[i].isWritable,
5803 0,
5804 CodechalHucRegionDumpType::hucRegionDumpInit);
5805 }
5806 }
5807 )
5808 }
5809
5810 return eStatus;
5811 }
5812
5813 /*----------------------------------------------------------------------------
5814 | Name : HuCBrcUpdate
5815 | Purpose : Start/Submit VP9 HuC BrcUpdate kernel to HW
5816 |
5817 | Returns : MOS_STATUS
5818 \---------------------------------------------------------------------------*/
HuCBrcUpdate()5819 MOS_STATUS CodechalVdencVp9StateG12::HuCBrcUpdate()
5820 {
5821 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5822
5823 CODECHAL_ENCODE_FUNCTION_ENTER;
5824
5825 int currPass = GetCurrentPass();
5826
5827 CODECHAL_DEBUG_TOOL(
5828 uint32_t hucRegionSize[16];
5829 const char* hucRegionName[16];
5830
5831 hucRegionName[0] = "_BrcHistory";
5832 hucRegionSize[0] = m_brcHistoryBufferSize;
5833 hucRegionName[1] = "_VDEncStats";
5834 hucRegionSize[1] = m_vdencBrcStatsBufferSize;
5835 hucRegionName[2] = "_PAKStats";
5836 hucRegionSize[2] = m_vdencBrcPakStatsBufferSize;
5837 hucRegionName[3] = "_InputSLBB";
5838 hucRegionSize[3] = m_vdencPicStateSecondLevelBatchBufferSize;
5839 hucRegionName[4] = "_BRCData";
5840 hucRegionSize[4] = CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE;
5841 hucRegionName[5] = "_ConstData";
5842 hucRegionSize[5] = m_brcConstantSurfaceSize;
5843 hucRegionName[6] = "_OutputSLBB";
5844 hucRegionSize[6] = m_vdencPicStateSecondLevelBatchBufferSize;
5845 hucRegionName[7] = "_PAKMMIO";
5846 hucRegionSize[7] = MOS_ALIGN_CEIL(CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE, CODECHAL_PAGE_SIZE);
5847 )
5848
5849 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
5850 #if (_DEBUG || _RELEASE_INTERNAL)
5851 if (m_swBrcMode)
5852 {
5853 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
5854 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitBrcConstantBuffer(&m_brcBuffers.resBrcConstantDataBuffer[0], m_pictureCodingType));
5855 // Set region params for dumping only
5856 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
5857 virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
5858 virtualAddrParams.regionParams[0].isWritable = true;
5859 virtualAddrParams.regionParams[1].presRegion = &m_resVdencBrcStatsBuffer;
5860 virtualAddrParams.regionParams[2].presRegion = &m_resFrameStatStreamOutBuffer;
5861 virtualAddrParams.regionParams[3].presRegion = &m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
5862 virtualAddrParams.regionParams[4].presRegion = &m_brcBuffers.resBrcHucDataBuffer;
5863 virtualAddrParams.regionParams[4].isWritable = true;
5864 virtualAddrParams.regionParams[5].presRegion = GetBrcConstantBuffer(&m_brcBuffers.resBrcConstantDataBuffer[0], m_pictureCodingType);
5865 virtualAddrParams.regionParams[6].presRegion = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
5866 virtualAddrParams.regionParams[6].isWritable = true;
5867 virtualAddrParams.regionParams[7].presRegion = &m_brcBuffers.resBrcBitstreamSizeBuffer;
5868 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(// Dump history IN since it's both IN/OUT, OUT will dump at end of function, rest of buffers are IN XOR OUT (not both)
5869 virtualAddrParams.regionParams[0].presRegion,
5870 virtualAddrParams.regionParams[0].dwOffset,
5871 hucRegionSize[0],
5872 0,
5873 hucRegionName[0],
5874 true,
5875 currPass,
5876 CodechalHucRegionDumpType::hucRegionDumpUpdate));
5877 CODECHAL_ENCODE_CHK_STATUS_RETURN(SoftwareBRC(true));
5878
5879 CODECHAL_DEBUG_TOOL(
5880 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
5881 &m_resVdencBrcUpdateDmemBuffer[currPass][m_currRecycledBufIdx],
5882 sizeof(HucBrcUpdateDmem), // Change buffer and size to update dmem
5883 currPass,
5884 CodechalHucRegionDumpType::hucRegionDumpUpdate));
5885
5886 for (auto i = 0; i < 16; i++)
5887 {
5888 if (virtualAddrParams.regionParams[i].presRegion)
5889 {
5890 m_debugInterface->DumpHucRegion(
5891 virtualAddrParams.regionParams[i].presRegion,
5892 virtualAddrParams.regionParams[i].dwOffset,
5893 hucRegionSize[i],
5894 i,
5895 hucRegionName[i],
5896 !virtualAddrParams.regionParams[i].isWritable,
5897 currPass,
5898 CodechalHucRegionDumpType::hucRegionDumpUpdate);
5899 }
5900 }
5901 );
5902 // We increment by the average frame value once for each frame
5903 if (IsFirstPass())
5904 {
5905 m_curTargetFullness += m_inputBitsPerFrame;
5906 }
5907
5908 return eStatus;
5909 }
5910 #endif
5911
5912 MOS_COMMAND_BUFFER cmdBuffer;
5913 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
5914
5915 if ((!m_singleTaskPhaseSupported || (m_firstTaskInPhase && !m_brcInit)) && !m_scalableMode)
5916 {
5917 // Send command buffer header at the beginning (OS dependent)
5918 bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : false;
5919 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
5920
5921 m_firstTaskInPhase = false;
5922 }
5923 // Collect HuC BRC Update kernel performance data
5924 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
5925
5926 // For Scalability, wait here for previous pass PAK int done
5927 if (m_scalableMode && !IsFirstPass() && m_isTilingSupported && !m_brcInit && !m_brcReset)
5928 {
5929 SendHWWaitCommand(&m_pakIntDoneSemaphoreMem.sResource, &cmdBuffer, currPass);
5930 SetSemaphoreMem(&m_pakIntDoneSemaphoreMem.sResource, &cmdBuffer, 0);
5931 }
5932
5933 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitBrcConstantBuffer(&m_brcBuffers.resBrcConstantDataBuffer[0], m_pictureCodingType));
5934
5935 // load kernel from WOPCM into L2 storage RAM
5936 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
5937 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
5938 imemParams.dwKernelDescriptor = m_vdboxHucVp9VdencBrcUpdateKernelDescriptor;
5939 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
5940
5941 // pipe mode select
5942 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
5943 pipeModeSelectParams.Mode = m_mode;
5944 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
5945
5946 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
5947
5948 // set HuC DMEM param
5949 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
5950 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
5951 dmemParams.presHucDataSource = &m_resVdencBrcUpdateDmemBuffer[currPass][m_currRecycledBufIdx];
5952 dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucBrcUpdateDmem), CODECHAL_CACHELINE_SIZE);
5953 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS; // how to set?
5954 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
5955
5956 // Set surfaces to HuC regions
5957 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
5958
5959 // History Buffer - IN/OUT
5960 virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
5961 virtualAddrParams.regionParams[0].isWritable = true;
5962
5963 if (IsFirstPass()) //First BRC pass needs stats from last frame
5964 {
5965 if (m_lastFrameScalableMode) // Frame (n-1) Scalable mode output -> input for frame n, BRC pass 0
5966 {
5967 //VDEnc Stats Buffer - IN
5968 virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
5969 virtualAddrParams.regionParams[1].dwOffset = m_frameStatsOffset.vdencStats;
5970 // Frame (not PAK) Stats Buffer - IN
5971 virtualAddrParams.regionParams[2].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
5972 virtualAddrParams.regionParams[2].dwOffset = m_frameStatsOffset.pakStats;
5973 // PAK MMIO - IN
5974 virtualAddrParams.regionParams[7].presRegion = &m_hucPakIntBrcDataBuffer;
5975 }
5976 else
5977 {
5978 virtualAddrParams.regionParams[1].presRegion = &m_resVdencBrcStatsBuffer;
5979 virtualAddrParams.regionParams[1].dwOffset = 0;
5980 virtualAddrParams.regionParams[2].presRegion = &m_resFrameStatStreamOutBuffer;
5981 virtualAddrParams.regionParams[2].dwOffset = 0;
5982 virtualAddrParams.regionParams[7].presRegion = &m_brcBuffers.resBrcBitstreamSizeBuffer;
5983 }
5984 }
5985 else // Second BRC Update Pass
5986 {
5987 if (m_scalableMode)
5988 {
5989 // VDEnc Stats Buffer - IN
5990 virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
5991 virtualAddrParams.regionParams[1].dwOffset = m_frameStatsOffset.vdencStats;
5992 // Frame (not PAK) Stats Buffer - IN
5993 virtualAddrParams.regionParams[2].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
5994 virtualAddrParams.regionParams[2].dwOffset = m_frameStatsOffset.pakStats;
5995 // PAK MMIO - IN
5996 virtualAddrParams.regionParams[7].presRegion = &m_hucPakIntBrcDataBuffer;
5997 }
5998 else
5999 {
6000 virtualAddrParams.regionParams[1].presRegion = &m_resVdencBrcStatsBuffer;
6001 virtualAddrParams.regionParams[1].dwOffset = 0;
6002 virtualAddrParams.regionParams[2].presRegion = &m_resFrameStatStreamOutBuffer;
6003 virtualAddrParams.regionParams[2].dwOffset = 0;
6004 virtualAddrParams.regionParams[7].presRegion = &m_brcBuffers.resBrcBitstreamSizeBuffer;
6005 }
6006 }
6007
6008 // Input SLBB (second level batch buffer) - IN
6009 virtualAddrParams.regionParams[3].presRegion = &m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
6010
6011 // BRC Data - OUT
6012 virtualAddrParams.regionParams[4].presRegion = &m_brcBuffers.resBrcHucDataBuffer;
6013 virtualAddrParams.regionParams[4].isWritable = true;
6014
6015 // Const Data - IN
6016 virtualAddrParams.regionParams[5].presRegion = GetBrcConstantBuffer(&m_brcBuffers.resBrcConstantDataBuffer[0], m_pictureCodingType);
6017
6018 // Output SLBB - OUT
6019 virtualAddrParams.regionParams[6].presRegion = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
6020 virtualAddrParams.regionParams[6].isWritable = true;
6021
6022 // Load HuC Regions into Cmd Buf
6023 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
6024
6025 // Store HUC_STATUS2 register bit 6 before HUC_Start command
6026 // This bit will be cleared by HW at the end of a HUC workload
6027 // (HUC_Start command with last start bit set).
6028 CODECHAL_DEBUG_TOOL(
6029 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
6030 )
6031
6032 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
6033
6034 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
6035
6036 // wait Huc completion (use HEVC bit for now)
6037 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
6038 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
6039 vdPipeFlushParams.Flags.bFlushHEVC = 1;
6040 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
6041 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
6042
6043 // Flush the engine to ensure memory written out
6044 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
6045 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
6046 flushDwParams.bVideoPipelineCacheInvalidate = true;
6047 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
6048
6049 MHW_MI_STORE_DATA_PARAMS storeDataParams;
6050 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
6051 storeDataParams.pOsResource = &m_resHucPakMmioBuffer;
6052 storeDataParams.dwResourceOffset = sizeof(uint32_t);
6053 storeDataParams.dwValue = 1 << 31;
6054 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
6055
6056 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
6057 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
6058 storeRegParams.presStoreBuffer = &m_resHucPakMmioBuffer;
6059 storeRegParams.dwOffset = 0;
6060 storeRegParams.dwRegister = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1)->hucStatusRegOffset;
6061 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
6062
6063 auto mmioRegisters = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1);
6064 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, &cmdBuffer, false));
6065 CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(&cmdBuffer));
6066
6067 // Ending collect of HuC BRC Update kernel performance data
6068 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
6069
6070 if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd) && !m_scalableMode)
6071 {
6072 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
6073 }
6074
6075 ReturnCommandBuffer(&cmdBuffer);
6076
6077 if (!m_singleTaskPhaseSupported)
6078 {
6079 bool renderingFlags = m_videoContextUsesNullHw;
6080
6081 // Dump history input before HuC runs
6082 CODECHAL_DEBUG_TOOL(
6083 m_debugInterface->DumpHucRegion(
6084 virtualAddrParams.regionParams[0].presRegion,
6085 0,
6086 hucRegionSize[0],
6087 0,
6088 hucRegionName[0],
6089 true,
6090 currPass,
6091 CodechalHucRegionDumpType::hucRegionDumpUpdate);
6092 );
6093
6094 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
6095
6096 CODECHAL_DEBUG_TOOL(
6097 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
6098 &m_resVdencBrcUpdateDmemBuffer[currPass][m_currRecycledBufIdx],
6099 sizeof(HucBrcUpdateDmem), // Change buffer and size to update dmem
6100 currPass,
6101 CodechalHucRegionDumpType::hucRegionDumpUpdate));
6102
6103 for (auto i = 0; i < 16; i++)
6104 {
6105 if (virtualAddrParams.regionParams[i].presRegion)
6106 {
6107 m_debugInterface->DumpHucRegion(
6108 virtualAddrParams.regionParams[i].presRegion,
6109 virtualAddrParams.regionParams[i].dwOffset,
6110 hucRegionSize[i],
6111 i,
6112 hucRegionName[i],
6113 !virtualAddrParams.regionParams[i].isWritable,
6114 currPass,
6115 CodechalHucRegionDumpType::hucRegionDumpUpdate);
6116 }
6117 }
6118 )
6119 }
6120
6121 // We increment by the average frame value once for each frame
6122 if (IsFirstPass())
6123 {
6124 m_curTargetFullness += m_inputBitsPerFrame;
6125 }
6126
6127 return eStatus;
6128 }
6129
InitMmcState()6130 MOS_STATUS CodechalVdencVp9StateG12::InitMmcState()
6131 {
6132 CODECHAL_ENCODE_FUNCTION_ENTER;
6133 #ifdef _MMC_SUPPORTED
6134 m_mmcState = MOS_New(CodechalMmcEncodeVp9G12, m_hwInterface, &m_reconSurface, &m_rawSurface);
6135 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
6136 #endif
6137 return MOS_STATUS_SUCCESS;
6138 }
6139
AddCommandsVp9(uint32_t commandType,PMOS_COMMAND_BUFFER cmdBuffer)6140 MOS_STATUS CodechalVdencVp9StateG12::AddCommandsVp9(uint32_t commandType, PMOS_COMMAND_BUFFER cmdBuffer )
6141 {
6142 auto qp = m_vp9PicParams->LumaACQIndex;
6143 auto vp9FrameType = m_vp9PicParams->PicFlags.fields.frame_type;
6144 double QPScale = (vp9FrameType == CODEC_VP9_KEY_FRAME) ? 0.31 : 0.33;
6145 double lambda = QPScale * CODECHAL_VP9_QUANT_AC[qp] / 8;
6146
6147 auto sadQpLambda = lambda * 4 + 0.5;
6148 auto rdQpLambda = lambda * lambda *4 + 0.5;
6149
6150 if (commandType == CODECHAL_CMD1)
6151 {
6152 MHW_VDBOX_VDENC_CMD1_PARAMS cmd1Params;
6153 MOS_ZeroMemory(&cmd1Params, sizeof(cmd1Params));
6154 cmd1Params.Mode = CODECHAL_ENCODE_MODE_VP9;
6155 cmd1Params.usSADQPLambda = (uint16_t)sadQpLambda;
6156 cmd1Params.usRDQPLambda = (uint16_t)rdQpLambda;
6157 cmd1Params.pVp9EncPicParams = m_vp9PicParams;
6158 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencCmd1Cmd(cmdBuffer, nullptr, &cmd1Params));
6159
6160 }
6161 else if (commandType == CODECHAL_CMD2)
6162 {
6163 PMHW_VDBOX_VDENC_CMD2_STATE cmd2Params(new MHW_VDBOX_VDENC_CMD2_STATE);
6164 CODECHAL_ENCODE_CHK_NULL_RETURN(cmd2Params);
6165
6166 MHW_VDBOX_VP9_SEGMENT_STATE segmentState;
6167 MOS_ZeroMemory(&segmentState, sizeof(segmentState));
6168 segmentState.Mode = m_mode;
6169 segmentState.pVp9EncodeSegmentParams = m_vp9SegmentParams;
6170 cmd2Params->Mode = m_mode;
6171 cmd2Params->pVp9EncPicParams = m_vp9PicParams;
6172 cmd2Params->pVp9EncSeqParams = m_vp9SeqParams;
6173 cmd2Params->bSegmentationEnabled = m_vp9PicParams->PicFlags.fields.segmentation_enabled;
6174 cmd2Params->pVp9SegmentState = &segmentState;
6175 cmd2Params->bPrevFrameSegEnabled = m_prevFrameSegEnabled;
6176 cmd2Params->bStreamInEnabled = m_segmentMapProvided || m_16xMeEnabled;
6177 cmd2Params->ucNumRefIdxL0ActiveMinus1 = (m_vp9PicParams->PicFlags.fields.frame_type) ? m_numRefFrames - 1 : 0;
6178 cmd2Params->usSADQPLambda = (uint16_t)sadQpLambda;
6179 cmd2Params->usRDQPLambda = (uint16_t)rdQpLambda;
6180 cmd2Params->bPakOnlyMultipassEnable = m_vdencPakonlyMultipassEnabled;
6181 cmd2Params->bDynamicScalingEnabled = ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled);
6182 cmd2Params->temporalMVpEnable = m_vp9PicParams->PicFlags.fields.frame_type && !m_prevFrameInfo.KeyFrame;
6183 if ((m_vp9PicParams->RefFlags.fields.LastRefIdx == m_vp9PicParams->RefFlags.fields.AltRefIdx
6184 && m_vp9PicParams->RefFlags.fields.AltRefIdx == m_vp9PicParams->RefFlags.fields.GoldenRefIdx
6185 && m_vp9PicParams->RefFlags.fields.GoldenRefIdx == m_vp9PicParams->RefFlags.fields.LastRefIdx)
6186 || (m_vp9SeqParams->TargetUsage == TU_QUALITY)) {
6187 cmd2Params->temporalMVpEnable = 0;
6188 }
6189 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencCmd2Cmd(cmdBuffer, nullptr, cmd2Params));
6190 }
6191
6192 return MOS_STATUS_SUCCESS;
6193 }
6194
ConfigStitchDataBuffer()6195 MOS_STATUS CodechalVdencVp9StateG12::ConfigStitchDataBuffer()
6196 {
6197 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6198 CODECHAL_ENCODE_FUNCTION_ENTER;
6199 int32_t currentPass = GetCurrentPass();
6200
6201 MOS_LOCK_PARAMS lockFlagsWriteOnly;
6202 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
6203 lockFlagsWriteOnly.WriteOnly = 1;
6204
6205 HucCommandData *hucStitchDataBuf = (HucCommandData *)m_osInterface->pfnLockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass], &lockFlagsWriteOnly);
6206 CODECHAL_ENCODE_CHK_NULL_RETURN(hucStitchDataBuf);
6207
6208 MOS_ZeroMemory(hucStitchDataBuf, sizeof(HucCommandData));
6209 hucStitchDataBuf->TotalCommands = 1;
6210 hucStitchDataBuf->InputCOM[0].SizeOfData = 0xF;
6211
6212 HucInputCmdG12 hucInputCmd;
6213 MOS_ZeroMemory(&hucInputCmd, sizeof(HucInputCmdG12));
6214
6215 CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
6216 hucInputCmd.SelectionForIndData = m_osInterface->osCpInterface->IsCpEnabled() ? 4 : 0;
6217 hucInputCmd.CmdMode = HUC_CMD_LIST_MODE;
6218 hucInputCmd.LengthOfTable = (uint8_t)GetNumTilesInFrame();
6219 hucInputCmd.CopySize = m_hwInterface->m_tileRecordSize;
6220
6221 PMOS_RESOURCE presSrc = &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource;
6222
6223 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
6224 m_osInterface,
6225 presSrc,
6226 false,
6227 false));
6228 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
6229 m_osInterface,
6230 &m_resBitstreamBuffer,
6231 true,
6232 true));
6233
6234 uint64_t srcAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, presSrc);
6235 uint64_t destAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, &m_resBitstreamBuffer);
6236 hucInputCmd.SrcAddrBottom = (uint32_t)(srcAddr & 0x00000000FFFFFFFF);
6237 hucInputCmd.SrcAddrTop = (uint32_t)((srcAddr & 0xFFFFFFFF00000000) >> 32);
6238
6239 hucInputCmd.DestAddrBottom = (uint32_t)(destAddr & 0x00000000FFFFFFFF);
6240 hucInputCmd.DestAddrTop = (uint32_t)((destAddr & 0xFFFFFFFF00000000) >> 32);
6241
6242 MOS_SecureMemcpy(hucStitchDataBuf->InputCOM[0].data, sizeof(HucInputCmdG12), &hucInputCmd, sizeof(HucInputCmdG12));
6243
6244 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]);
6245
6246 return eStatus;
6247 }
6248
SetDmemHuCVp9Prob()6249 MOS_STATUS CodechalVdencVp9StateG12::SetDmemHuCVp9Prob()
6250 {
6251 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6252
6253 CODECHAL_ENCODE_FUNCTION_ENTER;
6254
6255 MOS_LOCK_PARAMS lockFlagsWriteOnly;
6256 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
6257 lockFlagsWriteOnly.WriteOnly = 1;
6258
6259 HucProbDmem *dmem = nullptr;
6260 HucProbDmem *dmemTemp = nullptr;
6261 int currPass = GetCurrentPass();
6262 if (IsFirstPass())
6263 {
6264 for (auto i = 0; i < 3; i++)
6265 {
6266 dmem = (HucProbDmem *)m_osInterface->pfnLockResource(
6267 m_osInterface, &m_resHucProbDmemBuffer[i][m_currRecycledBufIdx], &lockFlagsWriteOnly);
6268 CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
6269
6270 if (i == 0)
6271 {
6272 dmemTemp = dmem;
6273 }
6274
6275 MOS_SecureMemcpy(dmem, sizeof(HucProbDmem), m_probDmem, sizeof(HucProbDmem));
6276
6277 if (i != 0)
6278 {
6279 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucProbDmemBuffer[i][m_currRecycledBufIdx]));
6280 dmem = dmemTemp;
6281 }
6282 }
6283 }
6284 else
6285 {
6286 dmem = (HucProbDmem *)m_osInterface->pfnLockResource(
6287 m_osInterface, &m_resHucProbDmemBuffer[currPass][m_currRecycledBufIdx], &lockFlagsWriteOnly);
6288 CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
6289 }
6290
6291 // for BRC cases, HuC needs to be called on Pass 1
6292 if (m_superFrameHucPass)
6293 {
6294 dmem->HuCPassNum = CODECHAL_ENCODE_VP9_HUC_SUPERFRAME_PASS;
6295 }
6296 else
6297 {
6298 if (m_dysBrc)
6299 {
6300 //For BRC+Dynamic Scaling, we need to run as HUC pass 1 in the last pass since the curr_pass was changed to 0.
6301 dmem->HuCPassNum = currPass != 0;
6302 }
6303 else
6304 {
6305 //For Non-dynamic scaling BRC cases, HuC needs to run as HuC pass one only in last pass.
6306 dmem->HuCPassNum = ((m_vdencBrcEnabled && currPass == 1) ? 0 : (currPass != 0));
6307 }
6308 }
6309
6310 dmem->FrameWidth = m_oriFrameWidth;
6311 dmem->FrameHeight = m_oriFrameHeight;
6312
6313 for (auto i = 0; i < CODEC_VP9_MAX_SEGMENTS; i++)
6314 {
6315 dmem->SegmentRef[i] = (m_vp9SegmentParams->SegData[i].SegmentFlags.fields.SegmentReferenceEnabled == true) ? m_vp9SegmentParams->SegData[i].SegmentFlags.fields.SegmentReference : CODECHAL_ENCODE_VP9_REF_SEGMENT_DISABLED;
6316 dmem->SegmentSkip[i] = m_vp9SegmentParams->SegData[i].SegmentFlags.fields.SegmentSkipped;
6317 }
6318
6319 if (m_vp9PicParams->PicFlags.fields.frame_type == CODEC_VP9_KEY_FRAME && m_currPass == 0)
6320 {
6321 for (auto i = 1; i < CODEC_VP9_NUM_CONTEXTS; i++)
6322 {
6323 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
6324 m_osInterface,
6325 &m_resProbBuffer[i],
6326 &lockFlagsWriteOnly);
6327
6328 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
6329
6330 ContextBufferInit(data, 0);
6331 CtxBufDiffInit(data, 0);
6332
6333 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
6334 m_osInterface,
6335 &m_resProbBuffer[i]));
6336 }
6337 }
6338
6339 // in multipasses, only delta seg qp (SegCodeAbs = 0) is supported, confirmed by the arch team
6340 dmem->SegCodeAbs = 0;
6341 dmem->SegTemporalUpdate = m_vp9PicParams->PicFlags.fields.segmentation_temporal_update;
6342 dmem->LastRefIndex = m_vp9PicParams->RefFlags.fields.LastRefIdx;
6343 dmem->GoldenRefIndex = m_vp9PicParams->RefFlags.fields.GoldenRefIdx;
6344 dmem->AltRefIndex = m_vp9PicParams->RefFlags.fields.AltRefIdx;
6345 dmem->RefreshFrameFlags = m_vp9PicParams->RefFlags.fields.refresh_frame_flags;
6346 dmem->RefFrameFlags = m_refFrameFlags;
6347 dmem->ContextFrameTypes = m_contextFrameTypes[m_vp9PicParams->PicFlags.fields.frame_context_idx];
6348 dmem->FrameToShow = GetReferenceBufferSlotIndex(dmem->RefreshFrameFlags);
6349
6350 dmem->FrameCtrl.FrameType = m_vp9PicParams->PicFlags.fields.frame_type;
6351 dmem->FrameCtrl.ShowFrame = m_vp9PicParams->PicFlags.fields.show_frame;
6352 dmem->FrameCtrl.ErrorResilientMode = m_vp9PicParams->PicFlags.fields.error_resilient_mode;
6353 dmem->FrameCtrl.IntraOnly = m_vp9PicParams->PicFlags.fields.intra_only;
6354 dmem->FrameCtrl.ContextReset = m_vp9PicParams->PicFlags.fields.reset_frame_context;
6355 dmem->FrameCtrl.LastRefFrameBias = m_vp9PicParams->RefFlags.fields.LastRefSignBias;
6356 dmem->FrameCtrl.GoldenRefFrameBias = m_vp9PicParams->RefFlags.fields.GoldenRefSignBias;
6357 dmem->FrameCtrl.AltRefFrameBias = m_vp9PicParams->RefFlags.fields.AltRefSignBias;
6358 dmem->FrameCtrl.AllowHighPrecisionMv = m_vp9PicParams->PicFlags.fields.allow_high_precision_mv;
6359 dmem->FrameCtrl.McompFilterMode = m_vp9PicParams->PicFlags.fields.mcomp_filter_type;
6360 dmem->FrameCtrl.TxMode = m_txMode;
6361 dmem->FrameCtrl.RefreshFrameContext = m_vp9PicParams->PicFlags.fields.refresh_frame_context;
6362 dmem->FrameCtrl.FrameParallelDecode = m_vp9PicParams->PicFlags.fields.frame_parallel_decoding_mode;
6363 dmem->FrameCtrl.CompPredMode = m_vp9PicParams->PicFlags.fields.comp_prediction_mode;
6364 dmem->FrameCtrl.FrameContextIdx = m_vp9PicParams->PicFlags.fields.frame_context_idx;
6365 dmem->FrameCtrl.SharpnessLevel = m_vp9PicParams->sharpness_level;
6366 dmem->FrameCtrl.SegOn = m_vp9PicParams->PicFlags.fields.segmentation_enabled;
6367 dmem->FrameCtrl.SegMapUpdate = m_vp9PicParams->PicFlags.fields.segmentation_update_map;
6368 dmem->FrameCtrl.SegUpdateData = m_vp9PicParams->PicFlags.fields.seg_update_data;
6369 dmem->StreamInSegEnable = (uint8_t)m_segmentMapProvided;
6370 dmem->StreamInEnable = (uint8_t)m_segmentMapProvided; // Currently unused, if used may || with HME enabled
6371
6372 dmem->FrameCtrl.log2TileRows = m_vp9PicParams->log2_tile_rows;
6373 dmem->FrameCtrl.log2TileCols = m_vp9PicParams->log2_tile_columns;
6374
6375 dmem->PrevFrameInfo = m_prevFrameInfo;
6376
6377 // For DyS CQP or BRC case, there is no Repak on last pass. So Repak flag is disabled here.
6378 // We also disable repak pass in TU7 speed mode usage for performance reasons.
6379 dmem->RePak = (m_numPasses > 0 && IsLastPass() && !(m_dysCqp || m_dysBrc) && (m_vp9SeqParams->TargetUsage != TU_PERFORMANCE));
6380
6381 if (dmem->RePak && m_adaptiveRepakSupported)
6382 {
6383 MOS_SecureMemcpy(dmem->RePakThreshold, sizeof(uint32_t) * CODEC_VP9_QINDEX_RANGE, m_rePakThreshold, sizeof(uint32_t) * CODEC_VP9_QINDEX_RANGE);
6384 }
6385
6386 dmem->LFLevelBitOffset = m_vp9PicParams->BitOffsetForLFLevel;
6387 dmem->QIndexBitOffset = m_vp9PicParams->BitOffsetForQIndex;
6388 dmem->SegBitOffset = m_vp9PicParams->BitOffsetForSegmentation + 1; // exclude segment_enable bit
6389 dmem->SegLengthInBits = m_vp9PicParams->BitSizeForSegmentation - 1; // exclude segment_enable bit
6390 dmem->UnCompHdrTotalLengthInBits = m_vp9PicParams->BitOffsetForFirstPartitionSize + 16;
6391 dmem->PicStateOffset = m_hucPicStateOffset;
6392 dmem->SLBBSize = m_hucSlbbSize;
6393 dmem->IVFHeaderSize = (m_frameNum == 0) ? 44 : 12;
6394 dmem->VDEncImgStateOffset = m_slbbImgStateOffset;
6395 dmem->PakOnlyEnable = ((dmem->RePak) && m_vdencPakonlyMultipassEnabled) ? 1 : 0;
6396
6397 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucProbDmemBuffer[currPass][m_currRecycledBufIdx]));
6398
6399 return eStatus;
6400 }
6401
InsertConditionalBBEndWithHucErrorStatus(PMOS_COMMAND_BUFFER cmdBuffer)6402 MOS_STATUS CodechalVdencVp9StateG12::InsertConditionalBBEndWithHucErrorStatus(PMOS_COMMAND_BUFFER cmdBuffer)
6403 {
6404 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6405 CODECHAL_ENCODE_FUNCTION_ENTER;
6406
6407 MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS miEnhancedConditionalBatchBufferEndParams;
6408
6409 MOS_ZeroMemory(
6410 &miEnhancedConditionalBatchBufferEndParams,
6411 sizeof(MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
6412
6413 miEnhancedConditionalBatchBufferEndParams.presSemaphoreBuffer = &m_resHucErrorStatusBuffer;
6414
6415 miEnhancedConditionalBatchBufferEndParams.dwParamsType = MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS::ENHANCED_PARAMS;
6416 miEnhancedConditionalBatchBufferEndParams.enableEndCurrentBatchBuffLevel = false;
6417 miEnhancedConditionalBatchBufferEndParams.compareOperation = MAD_EQUAL_IDD;
6418 miEnhancedConditionalBatchBufferEndParams.bDisableCompareMask = false;
6419
6420 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
6421 cmdBuffer,
6422 (PMHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS)(&miEnhancedConditionalBatchBufferEndParams)));
6423
6424 return eStatus;
6425 }
6426