1 /*
2 * Copyright (c) 2017-2022, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 
23 //!
24 //! \file     codechal_vdenc_vp9_g12.cpp
25 //! \brief    VP9 VDENC encoder for GEN12.
26 //!
27 
28 #include "codechal_vdenc_vp9_g12.h"
29 #include "codechal_kernel_header_g12.h"
30 #include "codechal_kernel_hme_g12.h"
31 #include "codeckrnheader.h"
32 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
33 #include "igcodeckrn_g12.h"
34 #endif
35 #include "mhw_vdbox_hcp_g12_X.h"
36 #include "mhw_vdbox_vdenc_g12_X.h"
37 #include "mhw_vdbox_g12_X.h"
38 #include "mhw_vdbox_vdenc_hwcmd_g12_X.h"
39 #include "mhw_mi_g12_X.h"
40 #include "mhw_render_g12_X.h"
41 #include "codechal_mmc_encode_vp9_g12.h"
42 #include "codechal_hw_g12_X.h"
43 
44 #define MAXPATH 512
45 
46 const uint32_t CodechalVdencVp9StateG12::meCurbeInit[48] =
47 {
48     0x00000000, 0x00200010, 0x00003939, 0x77a43000, 0x00000000, 0x28300000, 0x00000000, 0x00000000,
49     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
50     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
51     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
52     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
53     0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
54 };
55 
UserFeatureKeyReport()56 MOS_STATUS CodechalVdencVp9StateG12::UserFeatureKeyReport()
57 {
58     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
59 
60     CODECHAL_ENCODE_FUNCTION_ENTER;
61 
62     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::UserFeatureKeyReport());
63 
64 #if (_DEBUG || _RELEASE_INTERNAL)
65     CodecHalEncodeWriteKey(__MEDIA_USER_FEATURE_VALUE_ENCODE_USED_VDBOX_NUM_ID, m_numPipe, m_osInterface->pOsContext);
66     CodecHalEncodeWriteKey(__MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ENABLE_HW_STITCH, m_enableTileStitchByHW, m_osInterface->pOsContext);
67     CodecHalEncodeWriteKey(__MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_SINGLE_PASS_DYS_ENABLE_ID, m_singlePassDys, m_osInterface->pOsContext);
68 #endif
69 
70     return eStatus;
71 }
72 
CodechalVdencVp9StateG12(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)73 CodechalVdencVp9StateG12::CodechalVdencVp9StateG12(
74     CodechalHwInterface* hwInterface,
75     CodechalDebugInterface* debugInterface,
76     PCODECHAL_STANDARD_INFO standardInfo)
77     :CodechalVdencVp9State(hwInterface, debugInterface, standardInfo)
78 {
79     m_useCommonKernel = true;
80     m_isTilingSupported = true;
81 
82     // We need the DYS kernel inside AllVP9Enc_CNLA0, for SHME we need kernels inside
83     // HME_DS_SCOREBOARD_KERNEL, so we need to allocate enough size in ISH for both.
84 
85     uint8_t* binary = nullptr;
86     m_scalabilityState = nullptr;
87     uint32_t combinedKernelSize = 0;
88 
89     pfnGetKernelHeaderAndSize = GetCommonKernelHeaderAndSizeG12;
90 
91     m_hwInterface->GetStateHeapSettings()->dwNumSyncTags = CODECHAL_ENCODE_VP9_NUM_SYNC_TAGS;
92     m_hwInterface->GetStateHeapSettings()->dwDshSize     = CODECHAL_ENCODE_VP9_INIT_DSH_SIZE;
93 
94     m_kuid = IDR_CODEC_AllVP9Enc;
95 
96     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
97 
98     if (m_useCommonKernel)
99     {
100         m_kuidCommon = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
101         eStatus = CodecHalGetKernelBinaryAndSize(
102 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
103             (uint8_t*)IGCODECKRN_G12,
104 #else
105             nullptr,
106 #endif
107             m_kuidCommon,
108             &binary,
109             &combinedKernelSize);
110         CODECHAL_ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
111 
112         m_hwInterface->GetStateHeapSettings()->dwIshSize +=
113             MOS_ALIGN_CEIL(combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
114     }
115 
116     // Initialize to 0
117     MOS_ZeroMemory(&m_realCmdBuffer, sizeof(m_realCmdBuffer));
118     MOS_ZeroMemory(m_veBatchBuffer, sizeof(m_veBatchBuffer));
119     MOS_ZeroMemory(&m_hcpScalabilitySyncBuffer, sizeof(m_hcpScalabilitySyncBuffer));
120 
121     for (auto i = 0; i < m_numUncompressedSurface; i++)
122     {
123         MOS_ZeroMemory(&m_tileRecordBuffer[i].sResource, sizeof(m_tileRecordBuffer[i].sResource));
124     }
125     CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_osInterface);
126     m_osInterface->pfnVirtualEngineSupported(m_osInterface, false, true);
127     Mos_SetVirtualEngineSupported(m_osInterface, true);
128     for (auto i = 0; i < m_numUncompressedSurface; i++)
129     {
130         MOS_ZeroMemory(&m_tileStatsPakIntegrationBuffer[i].sResource, sizeof(m_tileStatsPakIntegrationBuffer[i].sResource));
131     }
132     MOS_ZeroMemory(&m_frameStatsPakIntegrationBuffer.sResource, sizeof(m_frameStatsPakIntegrationBuffer.sResource));
133     for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
134     {
135         for (auto j = 0; j < m_brcMaxNumPasses; j++)
136         {
137             MOS_ZeroMemory(&m_hucPakIntDmemBuffer[i][j], sizeof(m_hucPakIntDmemBuffer[i][j]));
138         }
139     }
140     MOS_ZeroMemory(&m_hucPakIntDummyBuffer, sizeof(m_hucPakIntDummyBuffer));
141     MOS_ZeroMemory(&m_hucPakIntBrcDataBuffer, sizeof(m_hucPakIntBrcDataBuffer));
142     for (auto i = 0; i < m_maxNumPipes; i++)
143     {
144         MOS_ZeroMemory(&m_stitchWaitSemaphoreMem[i], sizeof(m_stitchWaitSemaphoreMem[i]));
145         MOS_ZeroMemory(&m_hucDoneSemaphoreMem[i], sizeof(m_hucDoneSemaphoreMem[i]));
146     }
147     MOS_ZeroMemory(&m_pakIntDoneSemaphoreMem, sizeof(m_pakIntDoneSemaphoreMem));
148 }
149 
~CodechalVdencVp9StateG12()150 CodechalVdencVp9StateG12::~CodechalVdencVp9StateG12()
151 {
152     CODECHAL_ENCODE_FUNCTION_ENTER;
153 
154     if (m_scalabilityState)
155     {
156         MOS_FreeMemAndSetNull(m_scalabilityState);
157     }
158     //Note: virtual engine interface destroy is done in MOS layer
159     return;
160 }
161 
162 // This is used only for DynamicScaling
ExecuteDysPictureLevel()163 MOS_STATUS CodechalVdencVp9StateG12::ExecuteDysPictureLevel()
164 {
165     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
166 
167     CODECHAL_ENCODE_FUNCTION_ENTER;
168 
169     CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum");
170     auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
171     CODECHAL_ENCODE_CHK_NULL_RETURN(mmioRegisters);
172     PerfTagSetting perfTag;
173     perfTag.Value = 0;
174     perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
175     perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE;
176     perfTag.PictureCodingType = m_pictureCodingType;
177     m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
178 
179     // We only need to update Huc PAK insert object and picture state for the first pass
180     if (IsFirstPass())
181     {
182         CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructPakInsertObjBatchBuf(&m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx]));
183         CODECHAL_ENCODE_CHK_STATUS_RETURN(PakConstructPicStateBatchBuf(
184             &m_brcBuffers.resPicStateBrcWriteHucReadBuffer));
185 
186     }
187 
188     MOS_COMMAND_BUFFER cmdBuffer;
189     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
190 
191     if (!m_singleTaskPhaseSupported || m_firstTaskInPhase)
192     {
193         bool requestFrameTracking = false;
194         MHW_MI_FORCE_WAKEUP_PARAMS forceWakeupParams;
195         MOS_ZeroMemory(&forceWakeupParams, sizeof(MHW_MI_FORCE_WAKEUP_PARAMS));
196         forceWakeupParams.bMFXPowerWellControl = true;
197         forceWakeupParams.bMFXPowerWellControlMask = true;
198         forceWakeupParams.bHEVCPowerWellControl = true;
199         forceWakeupParams.bHEVCPowerWellControlMask = true;
200         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiForceWakeupCmd(&cmdBuffer, &forceWakeupParams));
201         // Send command buffer header at the beginning (OS dependent)
202         // frame tracking tag is only added in the last command buffer header
203         requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
204         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
205     }
206 
207     // Making sure ImgStatusCtrl is zeroed out before first PAK pass. HW supposedly does this before start of each frame. Remove this after confirming.
208     if (m_currPass == 0)
209     {
210         MHW_MI_LOAD_REGISTER_IMM_PARAMS miLoadRegImmParams;
211         MOS_ZeroMemory(&miLoadRegImmParams, sizeof(miLoadRegImmParams));
212         miLoadRegImmParams.dwData = 0;
213         miLoadRegImmParams.dwRegister = mmioRegisters->hcpVp9EncImageStatusCtrlRegOffset;
214         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiLoadRegisterImmCmd(&cmdBuffer, &miLoadRegImmParams));
215     }
216 
217     // Read Image status before running PAK, to get correct cumulative delta applied for final pass.
218     if (m_currPass != m_numPasses)        // Don't read it for Repak
219     {
220         CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadImageStatus(&cmdBuffer));
221     }
222 
223     //updating the numberofpakpasses in encode staus buffer. should not update for repak.
224     if (m_currPass < m_numPasses)
225     {
226         uint32_t offset =
227             (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
228             m_encodeStatusBuf.dwNumPassesOffset +
229             sizeof(uint32_t) * 2; // encode status doesn't start until 3rd DW
230 
231         MHW_MI_STORE_DATA_PARAMS storeDataParams;
232         MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
233         storeDataParams.pOsResource = &m_encodeStatusBuf.resStatusBuffer;
234         storeDataParams.dwResourceOffset = offset;
235         storeDataParams.dwValue = m_currPass + 1;
236         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
237     }
238 
239     if (!m_currPass && m_osInterface->bTagResourceSync)
240     {
241         // This is a short term WA to solve the sync tag issue: the sync tag write for PAK is inserted at the end of 2nd pass PAK BB
242         // which may be skipped in multi-pass PAK enabled case. The idea here is to insert the previous frame's tag at the beginning
243         // of the BB and keep the current frame's tag at the end of the BB. There will be a delay for tag update but it should be fine
244         // as long as Dec/VP/Enc won't depend on this PAK so soon.
245         PMOS_RESOURCE globalGpuContextSyncTagBuffer = nullptr;
246         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource(
247             m_osInterface,
248             globalGpuContextSyncTagBuffer));
249         CODECHAL_ENCODE_CHK_NULL_RETURN(globalGpuContextSyncTagBuffer);
250 
251         uint32_t value = m_osInterface->pfnGetGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
252         MHW_MI_STORE_DATA_PARAMS params;
253         params.pOsResource = globalGpuContextSyncTagBuffer;
254         params.dwResourceOffset = m_osInterface->pfnGetGpuStatusTagOffset(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
255         params.dwValue = (value > 0) ? (value - 1) : 0;
256         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiStoreDataImmCmd(&cmdBuffer, &params));
257     }
258 
259     CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
260 
261     //Send VD_CONTROL_STATE Pipe Initialization
262     MHW_MI_VD_CONTROL_STATE_PARAMS vdCtrlParam;
263     MOS_ZeroMemory(&vdCtrlParam, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
264     vdCtrlParam.initialization = true;
265     MhwMiInterfaceG12* miInterfaceG12 = static_cast <MhwMiInterfaceG12 *>(m_miInterface);
266     CODECHAL_ENCODE_CHK_STATUS_RETURN((miInterfaceG12)->AddMiVdControlStateCmd(&cmdBuffer, &vdCtrlParam));
267 
268     // set HCP_PIPE_MODE_SELECT values
269     PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams = nullptr;
270     pipeModeSelectParams = m_vdencInterface->CreateMhwVdboxPipeModeSelectParams();
271     CODECHAL_ENCODE_CHK_NULL_RETURN(pipeModeSelectParams);
272 
273     SetHcpPipeModeSelectParams(*pipeModeSelectParams);
274 
275     pipeModeSelectParams->Mode                   = m_mode;
276     pipeModeSelectParams->bStreamOutEnabled      = false;
277     pipeModeSelectParams->bVdencEnabled          = false;
278     pipeModeSelectParams->ChromaType             = m_vp9SeqParams->SeqFlags.fields.EncodedFormat;
279     pipeModeSelectParams->bDynamicScalingEnabled = m_dysRefFrameFlags && !m_dysVdencMultiPassEnabled;
280 
281     eStatus = m_hcpInterface->AddHcpPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams);
282     m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams);
283     CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
284 
285     // set HCP_SURFACE_STATE values
286     MHW_VDBOX_SURFACE_PARAMS surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID + 1];
287     for (uint8_t i = 0; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
288     {
289         MOS_ZeroMemory(&surfaceParams[i], sizeof(surfaceParams[i]));
290         surfaceParams[i].Mode = m_mode;
291         surfaceParams[i].ucSurfaceStateId = i;
292         surfaceParams[i].ChromaType = m_outputChromaFormat;
293 
294         switch (m_vp9SeqParams->SeqFlags.fields.EncodedBitDepth)
295         {
296         case VP9_ENCODED_BIT_DEPTH_10: //10 bit encoding
297         {
298             surfaceParams[i].ucBitDepthChromaMinus8 = 2;
299             surfaceParams[i].ucBitDepthLumaMinus8 = 2;
300             break;
301         }
302         default:
303         {
304             surfaceParams[i].ucBitDepthChromaMinus8 = 0;
305             surfaceParams[i].ucBitDepthLumaMinus8 = 0;
306             break;
307         }
308         }
309     }
310 
311     // For PAK engine, we do NOT use scaled reference images even if dynamic scaling is enabled
312     PMOS_SURFACE refSurface[3];
313     for (auto i = 0; i < 3; i++)
314     {
315         refSurface[i] = nullptr;
316     }
317 
318     if (m_pictureCodingType != I_TYPE)
319     {
320         uint8_t refPicIndex;
321         if (m_refFrameFlags & 0x01)
322         {
323             refPicIndex = m_vp9PicParams->RefFlags.fields.LastRefIdx;
324 
325             CODECHAL_ENCODE_ASSERT((refPicIndex < CODEC_VP9_NUM_REF_FRAMES) && (!CodecHal_PictureIsInvalid(m_vp9PicParams->RefFrameList[refPicIndex])));
326             refSurface[0] = &(m_refList[m_vp9PicParams->RefFrameList[refPicIndex].FrameIdx]->sRefBuffer);
327         }
328 
329         if (m_refFrameFlags & 0x02)
330         {
331             refPicIndex = m_vp9PicParams->RefFlags.fields.GoldenRefIdx;
332 
333             CODECHAL_ENCODE_ASSERT((refPicIndex < CODEC_VP9_NUM_REF_FRAMES) && (!CodecHal_PictureIsInvalid(m_vp9PicParams->RefFrameList[refPicIndex])));
334             refSurface[1] = &(m_refList[m_vp9PicParams->RefFrameList[refPicIndex].FrameIdx]->sRefBuffer);
335         }
336 
337         if (m_refFrameFlags & 0x04)
338         {
339             refPicIndex = m_vp9PicParams->RefFlags.fields.AltRefIdx;
340 
341             CODECHAL_ENCODE_ASSERT((refPicIndex < CODEC_VP9_NUM_REF_FRAMES) && (!CodecHal_PictureIsInvalid(m_vp9PicParams->RefFrameList[refPicIndex])))
342             refSurface[2] = &(m_refList[m_vp9PicParams->RefFrameList[refPicIndex].FrameIdx]->sRefBuffer);
343         }
344 
345         if (!refSurface[0])
346         {
347             refSurface[0] = (refSurface[1]) ? refSurface[1] : refSurface[2];
348         }
349 
350         if (!refSurface[1])
351         {
352             refSurface[1] = (refSurface[0]) ? refSurface[0] : refSurface[2];
353         }
354 
355         if (!refSurface[2])
356         {
357             refSurface[2] = (refSurface[0]) ? refSurface[0] : refSurface[1];
358         }
359 
360         // Program Surface params for Last/Golen/Alt Reference surface
361         surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID].psSurface = refSurface[0];
362         surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID].psSurface = refSurface[1];
363         surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID].psSurface = refSurface[2];
364 
365         surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID].dwReconSurfHeight = MOS_ALIGN_CEIL((refSurface[0] ? refSurface[0]->dwHeight : 0), CODEC_VP9_MIN_BLOCK_WIDTH);
366         surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID].dwReconSurfHeight = MOS_ALIGN_CEIL((refSurface[1] ? refSurface[1]->dwHeight : 0), CODEC_VP9_MIN_BLOCK_WIDTH);
367         surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID].dwReconSurfHeight = MOS_ALIGN_CEIL((refSurface[2] ? refSurface[2]->dwHeight : 0), CODEC_VP9_MIN_BLOCK_WIDTH);
368     }
369 
370     // recon
371     surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID].psSurface = &m_reconSurface;
372     surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID].dwReconSurfHeight = m_rawSurfaceToPak->dwHeight;
373 
374     // raw
375     surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID].psSurface = m_rawSurfaceToPak;
376     surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID].bDisplayFormatSwizzle = m_vp9SeqParams->SeqFlags.fields.DisplayFormatSwizzle;
377     surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID].dwActualWidth = MOS_ALIGN_CEIL(m_oriFrameWidth, CODEC_VP9_MIN_BLOCK_WIDTH);
378     surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID].dwActualHeight = MOS_ALIGN_CEIL(m_oriFrameHeight, CODEC_VP9_MIN_BLOCK_WIDTH);
379 
380     // Decoded picture
381 #ifdef _MMC_SUPPORTED
382     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
383     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]));
384 #endif
385     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]));
386 
387     // Source input
388 #ifdef _MMC_SUPPORTED
389     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
390     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]));
391 #endif
392     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]));
393 
394     if (m_pictureCodingType != I_TYPE)
395     {
396 #ifdef _MMC_SUPPORTED
397         //Get each reference surface state and be recorded by skipMask if current surface state is mmc disabled
398         //In VP9 mode, Bit 8is (here is bit0 in skipMask ) for Previous Reference;
399         //Bit 9is (here is bit1 in skipMask ) for Golden Reference and Bit 10is (here is bit2 in skipMask ) for Alterante Reference;
400         //Bits11-15are unused and should be programmed to 0 (skipped)
401         uint8_t skipMask = 0xf8;
402         for (uint8_t i = CODECHAL_HCP_LAST_SURFACE_ID; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
403         {
404             CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
405             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[i]));
406             if (surfaceParams[i].mmcState == MOS_MEMCOMP_DISABLED)
407             {
408                 skipMask |= (1 << (i - 2));
409             }
410         }
411         CODECHAL_ENCODE_NORMALMESSAGE("MMC skip mask is %d\n", skipMask);
412         for (uint8_t i = CODECHAL_HCP_LAST_SURFACE_ID; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
413         {
414             //Set each ref surface state as MOS_MEMCOMP_MC to satisfy MmcEnable in AddHcpSurfaceCmd
415             //Because each ref surface state should be programmed as the same
416             //The actual mmc state is recorded by skipMask and set each ref surface too
417             surfaceParams[i].mmcState = MOS_MEMCOMP_MC;
418             surfaceParams[i].mmcSkipMask = skipMask;
419         }
420 #endif
421         for (uint8_t i = CODECHAL_HCP_LAST_SURFACE_ID; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
422         {
423             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[i]));
424         }
425     }
426 
427     // set HCP_PIPE_BUF_ADDR_STATE values
428     PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS pipeBufAddrParams = nullptr;
429     pipeBufAddrParams = CreateHcpPipeBufAddrParams(pipeBufAddrParams);
430     if (pipeBufAddrParams)
431     {
432         auto delete_func = [&]()
433         {
434             MOS_Delete(pipeBufAddrParams);
435             pipeBufAddrParams = nullptr;
436         };
437 
438         pipeBufAddrParams->Mode = m_mode;
439         pipeBufAddrParams->psPreDeblockSurface = &m_reconSurface;
440         pipeBufAddrParams->psPostDeblockSurface = &m_reconSurface;
441         pipeBufAddrParams->psRawSurface = m_rawSurfaceToPak;
442 
443         pipeBufAddrParams->presStreamOutBuffer = nullptr;
444         pipeBufAddrParams->presMfdDeblockingFilterRowStoreScratchBuffer =
445             &m_resDeblockingFilterLineBuffer;
446 
447         pipeBufAddrParams->presDeblockingFilterTileRowStoreScratchBuffer =
448             &m_resDeblockingFilterTileLineBuffer;
449 
450         pipeBufAddrParams->presDeblockingFilterColumnRowStoreScratchBuffer =
451             &m_resDeblockingFilterTileColumnBuffer;
452 
453         pipeBufAddrParams->presMetadataLineBuffer       = &m_resMetadataLineBuffer;
454         pipeBufAddrParams->presMetadataTileLineBuffer   = &m_resMetadataTileLineBuffer;
455         pipeBufAddrParams->presMetadataTileColumnBuffer = &m_resMetadataTileColumnBuffer;
456         pipeBufAddrParams->presCurMvTempBuffer = m_trackedBuf->GetMvTemporalBuffer(m_currMvTemporalBufferIndex);
457 
458 #ifdef _MMC_SUPPORTED
459         CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(m_mmcState, delete_func);
460         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_mmcState->SetPipeBufAddr(pipeBufAddrParams), delete_func);
461 #endif
462 
463         //Huc is disabled for ref frame scaling, use input region
464         uint8_t frameCtxIdx = m_vp9PicParams->PicFlags.fields.frame_context_idx;
465         CODECHAL_ENCODE_ASSERT(frameCtxIdx < CODEC_VP9_NUM_CONTEXTS);
466         pipeBufAddrParams->presVp9ProbBuffer      = &m_resProbBuffer[frameCtxIdx];
467         pipeBufAddrParams->presVp9SegmentIdBuffer = &m_resSegmentIdBuffer;
468 
469         if (m_pictureCodingType != I_TYPE)
470         {
471             for (auto i = 0; i < 3; i++)
472             {
473                 CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(refSurface[i], delete_func);
474 
475                 pipeBufAddrParams->presReferences[i] = &refSurface[i]->OsResource;
476             }
477 
478             pipeBufAddrParams->presColMvTempBuffer[0] = m_trackedBuf->GetMvTemporalBuffer(m_currMvTemporalBufferIndex ^ 0x01);
479         }
480 #ifdef _MMC_SUPPORTED
481         CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(m_mmcState, delete_func);
482         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_mmcState->SetPipeBufAddr(pipeBufAddrParams), delete_func);
483 #endif
484         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(&cmdBuffer, pipeBufAddrParams), delete_func);
485 
486         MOS_Delete(pipeBufAddrParams);
487     }
488 
489     // set HCP_IND_OBJ_BASE_ADDR_STATE values
490     MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
491     MOS_ZeroMemory(&indObjBaseAddrParams, sizeof(indObjBaseAddrParams));
492     indObjBaseAddrParams.Mode = m_mode;
493     indObjBaseAddrParams.presMvObjectBuffer = &m_resMbCodeSurface;
494     indObjBaseAddrParams.dwMvObjectOffset = m_mvOffset;
495     indObjBaseAddrParams.dwMvObjectSize = m_mbCodeSize - m_mvOffset;
496     indObjBaseAddrParams.presPakBaseObjectBuffer = &m_resBitstreamBuffer;
497     indObjBaseAddrParams.dwPakBaseObjectSize = m_bitstreamUpperBound;
498     indObjBaseAddrParams.presProbabilityDeltaBuffer   = &m_resProbabilityDeltaBuffer;
499     indObjBaseAddrParams.dwProbabilityDeltaSize = 29 * CODECHAL_CACHELINE_SIZE;
500     indObjBaseAddrParams.presCompressedHeaderBuffer   = &m_resCompressedHeaderBuffer;
501     indObjBaseAddrParams.dwCompressedHeaderSize = 32 * CODECHAL_CACHELINE_SIZE;
502     indObjBaseAddrParams.presProbabilityCounterBuffer = &m_resProbabilityCounterBuffer;
503     indObjBaseAddrParams.dwProbabilityCounterSize = 193 * CODECHAL_CACHELINE_SIZE;
504     indObjBaseAddrParams.presTileRecordBuffer         = &m_resTileRecordStrmOutBuffer;
505     indObjBaseAddrParams.dwTileRecordSize = m_picSizeInSb * CODECHAL_CACHELINE_SIZE;
506     indObjBaseAddrParams.presCuStatsBuffer            = &m_resCuStatsStrmOutBuffer;
507     indObjBaseAddrParams.dwCuStatsSize = MOS_ALIGN_CEIL(m_picSizeInSb * 64 * 8, CODECHAL_CACHELINE_SIZE);
508     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams));
509 
510     // Using picstate zero with updated QP and LF deltas by HuC for repak, irrespective of how many Pak passes were run in multi-pass mode.
511     MHW_BATCH_BUFFER secondLevelBatchBuffer;
512     MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(secondLevelBatchBuffer));
513     secondLevelBatchBuffer.dwOffset = (m_numPasses > 0) ? CODECHAL_ENCODE_VP9_PIC_STATE_BUFFER_SIZE_PER_PASS * (m_currPass % m_numPasses) : 0;
514     secondLevelBatchBuffer.bSecondLevel = true;
515     //As Huc is disabled for Ref frame scaling, use the ReadBuffer
516     secondLevelBatchBuffer.OsResource = m_brcBuffers.resPicStateBrcWriteHucReadBuffer;
517     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
518         &cmdBuffer,
519         &secondLevelBatchBuffer));
520 
521     // HCP_VP9_SEGMENT_STATE
522     uint8_t segmentCount = (m_vp9PicParams->PicFlags.fields.segmentation_enabled) ? CODEC_VP9_MAX_SEGMENTS : 1;
523 
524     MHW_VDBOX_VP9_SEGMENT_STATE segmentState;
525     MOS_ZeroMemory(&segmentState, sizeof(segmentState));
526     segmentState.Mode = m_mode;
527     segmentState.pVp9EncodeSegmentParams = m_vp9SegmentParams;
528     segmentState.ucQPIndexLumaAC         = m_vp9PicParams->LumaACQIndex;
529 
530     // For BRC with segmentation, seg state commands for PAK are copied from BRC seg state buffer
531     // For CQP or BRC with no segmentation, PAK still needs seg state commands and driver prepares those commands.
532     segmentState.pbSegStateBufferPtr = nullptr; // Set this to nullptr, for commands to be prepared by driver
533     segmentState.pcucLfQpLookup = &LF_VALUE_QP_LOOKUP[0];
534     for (uint8_t i = 0; i < segmentCount; i++)
535     {
536         segmentState.ucCurrentSegmentId = i;
537         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpVp9SegmentStateCmd(&cmdBuffer, nullptr, &segmentState));
538     }
539 
540     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
541 
542     return eStatus;
543 }
ExecuteDysSliceLevel()544 MOS_STATUS CodechalVdencVp9StateG12::ExecuteDysSliceLevel()
545 {
546     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
547 
548     CODECHAL_ENCODE_FUNCTION_ENTER;
549 
550     CODECHAL_ENCODE_CHK_NULL_RETURN(m_nalUnitParams);
551 
552     MOS_COMMAND_BUFFER cmdBuffer;
553     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
554 
555     MHW_BATCH_BUFFER secondLevelBatchBuffer;
556     MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(secondLevelBatchBuffer));
557     secondLevelBatchBuffer.dwOffset = 0;
558     secondLevelBatchBuffer.bSecondLevel = true;
559     if (!m_hucEnabled)
560     {
561         secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx];
562     }
563     else
564     {
565         secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderWriteBuffer;
566     }
567     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
568         &cmdBuffer,
569         &secondLevelBatchBuffer));
570 
571     // Setup Tile level PAK commands
572     CODECHAL_ENCODE_CHK_NULL_RETURN(m_tileParams);
573     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9StateG12::SetTileData());
574     CODECHAL_ENCODE_CHK_STATUS_RETURN(static_cast<MhwVdboxHcpInterfaceG12 *>(m_hcpInterface)->AddHcpTileCodingCmd(&cmdBuffer, &m_tileParams[0]));
575 
576     //Reset Frame Tracking header for this submission as this is not the last submission
577     bool isFrameTrackingHeaderSet = cmdBuffer.Attributes.bEnableMediaFrameTracking;
578     cmdBuffer.Attributes.bEnableMediaFrameTracking = false;
579 
580     MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(MHW_BATCH_BUFFER));
581     secondLevelBatchBuffer.OsResource = m_resMbCodeSurface;
582     secondLevelBatchBuffer.dwOffset = 0;
583     secondLevelBatchBuffer.bSecondLevel = true;
584     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &secondLevelBatchBuffer));
585 
586     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
587     MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
588     // MFXPipeDone should not be set for tail insertion
589     vdPipelineFlushParams.Flags.bWaitDoneMFX =
590         (m_lastPicInStream || m_lastPicInSeq) ? 0 : 1;
591     vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
592     vdPipelineFlushParams.Flags.bFlushHEVC = 1;
593     vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
594 
595     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
596 
597     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
598     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
599     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
600 
601     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
602 
603     if (!m_scalableMode)
604     {
605         CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
606     }
607 
608     if (m_currPass >= (m_numPasses - 1))    // Last pass and the one before last
609     {
610         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
611     }
612 
613     std::string currPassName = "PAK_PASS_DYS" + std::to_string((int)m_currPass);
614     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
615         &cmdBuffer,
616         CODECHAL_NUM_MEDIA_STATES,
617         currPassName.data())));
618 
619     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
620 
621     if (m_waitForEnc &&
622         !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
623     {
624         MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
625         syncParams.GpuContext = m_videoContext;
626         syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
627 
628         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
629         m_waitForEnc = false;
630     }
631 
632     if (m_currPass >= (m_numPasses - 1))    // Last pass and the one before last
633     {
634         bool renderFlags;
635 
636         renderFlags = m_videoContextUsesNullHw;
637 
638         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderFlags));
639     }
640 
641     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
642     cmdBuffer.Attributes.bEnableMediaFrameTracking = isFrameTrackingHeaderSet;
643     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
644 
645     CODECHAL_DEBUG_TOOL(
646         if (m_vp9PicParams->PicFlags.fields.segmentation_enabled) {
647             //CodecHal_DbgDumpEncodeVp9SegmentStreamout(m_debugInterface, m_encoder);
648             //m_debugInterface->DumpBuffer(
649             //    (PCODECHAL_ENCODE_VP9_STATE)pvStandardState.resVdencSegmentMapStreamOut,
650             //    CodechalDbgAttr::attrOutput,
651             //    "SegMap_Out",
652             //    CODECHAL_CACHELINE_SIZE * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameHeight, 64) * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameWidth, 64),
653             //    0,
654             //    CODECHAL_MEDIA_STATE_VP9_PAK_LUMA_RECON);
655         } if (m_mmcState) {
656             m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
657         });
658 
659     return eStatus;
660 }
661 
InitKernelStates()662 MOS_STATUS CodechalVdencVp9StateG12::InitKernelStates()
663 {
664     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
665 
666     CODECHAL_ENCODE_FUNCTION_ENTER;
667 
668 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
669     m_kernelBase = (uint8_t*)IGCODECKRN_G12;
670 #endif
671 
672     // KUID for HME + DS + SW SCOREBOARD Kernel
673     m_kuidCommon = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
674 
675 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
676     // DYS
677     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateDys());
678 
679     // SHME
680     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMe());
681 #endif
682 
683     return eStatus;
684 }
685 
GetMaxBtCount()686 uint32_t CodechalVdencVp9StateG12::GetMaxBtCount()
687 {
688     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
689 
690     CODECHAL_ENCODE_FUNCTION_ENTER;
691     uint32_t maxBtCount = 0;
692 
693 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
694     if (m_hmeSupported)
695     {
696         uint32_t scalingBtCount = 0;
697         uint32_t numKernelsToLoad = m_interlacedFieldDisabled ? 1 : CODEC_NUM_FIELDS_PER_FRAME;
698         uint16_t btIdxAlignment = m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
699         for (uint32_t krnStateIdx = 0; krnStateIdx < numKernelsToLoad; krnStateIdx++)
700         {
701             scalingBtCount += MOS_ALIGN_CEIL(
702                 m_scaling4xKernelStates[krnStateIdx].KernelParams.iBTCount,
703                 btIdxAlignment);
704         }
705         uint32_t meBtCount = 0;
706         // 4xME + Streamin kernel btcount
707         meBtCount += MOS_ALIGN_CEIL(m_meKernelStates[CODECHAL_ENCODE_ME_IDX_VDENC].KernelParams.iBTCount, btIdxAlignment);
708 
709         //16xME streamin kernel count added to ME count and scaling kernel 16x added to scaling count
710         if (m_16xMeSupported)
711         {
712             meBtCount += MOS_ALIGN_CEIL(m_meKernelStates[CODECHAL_ENCODE_ME_IDX_P].KernelParams.iBTCount, btIdxAlignment);
713             for (uint32_t krnStateIdx = 0; krnStateIdx < numKernelsToLoad; krnStateIdx++)
714             {
715                 scalingBtCount += MOS_ALIGN_CEIL(
716                     m_scaling4xKernelStates[krnStateIdx].KernelParams.iBTCount,
717                     btIdxAlignment);
718             }
719         }
720         maxBtCount = scalingBtCount + meBtCount;
721     }
722 #endif
723 
724     return maxBtCount;
725 }
726 
727 // DYS kernel state init
InitKernelStateDys()728 MOS_STATUS CodechalVdencVp9StateG12::InitKernelStateDys()
729 {
730     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
731 
732     CODECHAL_ENCODE_FUNCTION_ENTER;
733 
734 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
735     uint32_t combinedKernelSize = 0;
736     uint8_t* binary = nullptr;
737     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
738         (uint8_t*)IGCODECKRN_G12,
739         m_kuidCommon,
740         &binary,
741         &combinedKernelSize));
742 
743     uint32_t kernelSize = combinedKernelSize;
744     CODECHAL_KERNEL_HEADER currKrnHeader;
745     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG12(
746         binary,
747         ENC_DYS,
748         0,
749         &currKrnHeader,
750         &kernelSize));
751 
752     PMHW_KERNEL_STATE kernelState = &m_dysKernelState;
753     kernelState->KernelParams.iBTCount = MOS_ALIGN_CEIL(m_dysNumSurfaces, m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment());
754     kernelState->KernelParams.iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
755     kernelState->KernelParams.iCurbeLength = MOS_ALIGN_CEIL(m_dysStaticDataSize, m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment());
756     kernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;// just assign this to 16, the block resolution for the kernel is decided when its launched depending on the std.
757     kernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;// just assign this to 16, the block resolution for the kernel is decided when its launched depending on the std.
758     kernelState->KernelParams.iIdCount = 1;
759     kernelState->KernelParams.iSamplerCount = 1;
760     kernelState->KernelParams.iSamplerLength = m_stateHeapInterface->pStateHeapInterface->GetSizeofSamplerStateAvs();
761 
762     kernelState->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
763     kernelState->dwSamplerOffset = MOS_ALIGN_CEIL(kernelState->dwCurbeOffset + kernelState->KernelParams.iCurbeLength, MHW_SAMPLER_STATE_AVS_ALIGN_G9);
764     kernelState->KernelParams.pBinary =
765         binary +
766         (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
767     kernelState->KernelParams.iSize = kernelSize;
768     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
769         m_stateHeapInterface,
770         kernelState->KernelParams.iBTCount,
771         &kernelState->dwSshSize,
772         &kernelState->dwBindingTableSize));
773 
774     m_dysDshSize = kernelState->dwSamplerOffset +
775         MOS_ALIGN_CEIL(kernelState->KernelParams.iSamplerLength * kernelState->KernelParams.iSamplerCount, MHW_SAMPLER_STATE_AVS_ALIGN);
776 
777     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelState));
778 #endif
779 
780     return eStatus;
781 }
782 
SetupSegmentationStreamIn()783 MOS_STATUS CodechalVdencVp9StateG12::SetupSegmentationStreamIn()
784 {
785     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
786 
787     CODECHAL_ENCODE_FUNCTION_ENTER;
788 
789     if (!m_segmentMapProvided && !m_hmeEnabled) // If we're not going to use the streamin surface leave now
790     {
791         return eStatus;
792     }
793 
794     CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface);
795     MOS_LOCK_PARAMS lockFlagsWriteOnly;
796     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
797     lockFlagsWriteOnly.WriteOnly = 1;
798 
799     MOS_LOCK_PARAMS lockFlagsReadOnly;
800     MOS_ZeroMemory(&lockFlagsReadOnly, sizeof(MOS_LOCK_PARAMS));
801     lockFlagsReadOnly.ReadOnly = 1;
802 
803     mhw_vdbox_vdenc_g12_X::VDENC_HEVC_VP9_STREAMIN_STATE_CMD *
804         streamIn = (mhw_vdbox_vdenc_g12_X::VDENC_HEVC_VP9_STREAMIN_STATE_CMD *)m_osInterface->pfnLockResource(
805             m_osInterface,
806             &m_resVdencStreamInBuffer[m_currRecycledBufIdx],
807             &lockFlagsWriteOnly);
808     CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
809 
810     // align to cache line size is OK since streamin state is padded to cacheline size - HW uses cacheline size to read, not command size
811     uint32_t blockWidth = MOS_ALIGN_CEIL(m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
812     uint32_t blockHeight = MOS_ALIGN_CEIL(m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32;
813     uint32_t streamInSize = blockHeight * blockWidth * CODECHAL_CACHELINE_SIZE;
814     MOS_ZeroMemory(streamIn, streamInSize);
815 
816     // If segment map isn't provided then we unlock surface and exit function here.
817     // Reason why check isn't done before function call is to take advantage of the fact that
818     // we need the surface locked here if seg map is provided and we want it 0'd either way.
819     // This saves us from doing 2 locks on this buffer per frame.
820     if (!m_segmentMapProvided)
821     {
822         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
823             m_osInterface,
824             &m_resVdencStreamInBuffer[m_currRecycledBufIdx]));
825         return eStatus;
826     }
827 
828     char *data = (char *)m_osInterface->pfnLockResource(
829         m_osInterface,
830         &m_mbSegmentMapSurface.OsResource,
831         &lockFlagsReadOnly);
832     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
833 
834     // Rasterization is done within a tile and then for each tile within the frame in raster order.
835     if (m_isTilingSupported)
836     {
837         uint32_t numTileColumns          = (1 << m_vp9PicParams->log2_tile_columns);
838         uint32_t numTileRows             = (1 << m_vp9PicParams->log2_tile_rows);
839         uint32_t numTiles = numTileColumns * numTileRows;
840         uint32_t currTileStartX64Aligned = 0, dwCurrTileStartY64Aligned = 0;         //Set tile Y coordinate 0
841         m_32BlocksRasterized = 0;   //Count of rasterized blocks for this frame
842         uint32_t tileX = 0;
843         uint32_t tileY = 0;
844         for (uint32_t tileIdx = 0; tileIdx < numTiles; tileIdx++)
845         {
846             tileX = tileIdx % numTileColumns; //Current tile column position
847             tileY = tileIdx / numTileColumns; //Current tile row position
848 
849             currTileStartX64Aligned   = ((tileX * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns) * CODEC_VP9_SUPER_BLOCK_WIDTH;
850             dwCurrTileStartY64Aligned = ((tileY * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows) * CODEC_VP9_SUPER_BLOCK_HEIGHT;
851 
852             uint32_t tileWidth64Aligned = (((tileX == (numTileColumns - 1)) ? m_picWidthInSb : (((tileX + 1) * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns)) *
853                                               CODEC_VP9_SUPER_BLOCK_WIDTH) -
854                                           currTileStartX64Aligned;
855 
856             uint32_t tileHeight64Aligned = (((tileY == (numTileRows - 1)) ? m_picHeightInSb : (((tileY + 1) * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows)) *
857                                                CODEC_VP9_SUPER_BLOCK_HEIGHT) -
858                                            dwCurrTileStartY64Aligned;
859 
860             // last tile col raw width and raw height not necessarily 64 aligned, use this length to duplicate values from segmap for empty padding blocks in last tiles.
861             uint32_t lastTileColWidth = (tileX == (numTileColumns - 1)) ? (m_frameWidth - currTileStartX64Aligned) : tileWidth64Aligned;
862             uint32_t lastTileRowHeight = (tileY == (numTileRows - 1)) ? (m_frameHeight - dwCurrTileStartY64Aligned) : tileHeight64Aligned;
863 
864             uint32_t tileWidth = (tileX == (numTileColumns - 1)) ? lastTileColWidth : tileWidth64Aligned;
865             uint32_t tileHeight = (tileY == (numTileRows - 1)) ? lastTileRowHeight : tileHeight64Aligned;
866 
867             // Recreate the mapbuffer and remap it if, for this frame, tile height and width have changed from previous tile
868             // which was processed from this frame or previous,
869             // or if map buffer is created for previous frame and tile map has changed from previous frame (numtilerows and cols)
870             if (!m_mapBuffer ||
871                 tileHeight != m_segStreamInHeight ||
872                 tileWidth != m_segStreamInWidth ||
873                 numTileColumns != m_tileParams[tileIdx].NumOfTileColumnsInFrame ||
874                 m_tileParams[tileIdx].NumOfTilesInFrame != numTiles)
875             {
876                 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitZigZagToRasterLUTPerTile(tileHeight,
877                     tileWidth,
878                     dwCurrTileStartY64Aligned,
879                     currTileStartX64Aligned));
880             }
881             m_tileParams[tileIdx].NumOfTileColumnsInFrame = numTileColumns;
882             m_tileParams[tileIdx].NumOfTilesInFrame       = numTiles;
883         }
884     }
885 
886     uint32_t dwPitch = m_mbSegmentMapSurface.dwPitch;
887     if (m_osInterface->pfnGetResType(&m_mbSegmentMapSurface.OsResource) == MOS_GFXRES_BUFFER)
888     {
889         //application can send 1D or 2D buffer, based on that change the pitch to correctly access the map buffer
890         //driver reads the seg ids from the buffer for each 16x16 block. Reads 4 values for each 32x32 block
891         dwPitch = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH) / CODECHAL_MACROBLOCK_WIDTH;
892     }
893     // set seg ID's of streamin states
894     for (uint32_t i = 0; i < blockHeight * blockWidth; ++i)
895     {
896         uint32_t addrOffset = CalculateBufferOffset(
897             m_mapBuffer[i],
898             m_frameWidth,
899             m_vp9PicParams->PicFlags.fields.seg_id_block_size,
900             dwPitch);
901         uint32_t segId = *(data + addrOffset);
902         streamIn[i].DW7.SegidEnable = 1;
903         streamIn[i].DW7.Segid32X32016X1603Vp9Only = segId | (segId << 4) | (segId << 8) | (segId << 12);
904 
905         // TU functions copied from there.
906         streamIn[i].DW0.Maxtusize = 3;
907         streamIn[i].DW0.Maxcusize = 3;
908 
909         // For InterFrames we change the CUsize to 32x32 if we have sub 32 blocks with different segids in superblock
910         if ((i % 4) == 3 && m_pictureCodingType == P_TYPE)
911         {
912             if (!(streamIn[i - 3].DW7.Segid32X32016X1603Vp9Only == streamIn[i - 2].DW7.Segid32X32016X1603Vp9Only &&
913                 streamIn[i - 2].DW7.Segid32X32016X1603Vp9Only == streamIn[i - 1].DW7.Segid32X32016X1603Vp9Only &&
914                 streamIn[i - 1].DW7.Segid32X32016X1603Vp9Only == streamIn[i].DW7.Segid32X32016X1603Vp9Only))
915             {
916                 streamIn[i - 3].DW0.Maxcusize = streamIn[i - 2].DW0.Maxcusize = streamIn[i - 1].DW0.Maxcusize = streamIn[i].DW0.Maxcusize = 2;
917             }
918         }
919 
920         streamIn[i].DW0.Numimepredictors = CODECHAL_VDENC_NUMIMEPREDICTORS;
921 
922         switch (m_vp9SeqParams->TargetUsage)
923         {
924         case 1:     // Quality mode
925         case 2:
926         case 4:     // Normal mode
927             streamIn[i].DW6.Nummergecandidatecu8X8   = 1;
928             streamIn[i].DW6.Nummergecandidatecu16X16 = 2;
929             streamIn[i].DW6.Nummergecandidatecu32X32 = 3;
930             streamIn[i].DW6.Nummergecandidatecu64X64 = 4;
931             break;
932         case 7:     // Speed mode
933             streamIn[i].DW0.Numimepredictors         = CODECHAL_VDENC_NUMIMEPREDICTORS_SPEED;
934             streamIn[i].DW6.Nummergecandidatecu8X8   = 0;
935             streamIn[i].DW6.Nummergecandidatecu16X16 = 2;
936             streamIn[i].DW6.Nummergecandidatecu32X32 = 2;
937             streamIn[i].DW6.Nummergecandidatecu64X64 = 2;
938             break;
939         default:
940             MHW_ASSERTMESSAGE("Invalid TU provided!");
941             return MOS_STATUS_INVALID_PARAMETER;
942         }
943     }
944 
945     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
946         m_osInterface,
947         &m_mbSegmentMapSurface.OsResource));
948 
949     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
950         m_osInterface,
951         &m_resVdencStreamInBuffer[m_currRecycledBufIdx]));
952 
953     return eStatus;
954 }
955 
GetSystemPipeNumberCommon()956 MOS_STATUS CodechalVdencVp9StateG12::GetSystemPipeNumberCommon()
957 {
958     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
959     MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
960 
961     CODECHAL_ENCODE_FUNCTION_ENTER;
962 
963     MOS_USER_FEATURE_VALUE_DATA userFeatureData;
964     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
965     statusKey = MOS_UserFeature_ReadValue_ID(
966         NULL,
967         __MEDIA_USER_FEATURE_VALUE_ENCODE_DISABLE_SCALABILITY,
968         &userFeatureData,
969         m_osInterface->pOsContext);
970 
971     bool disableScalability = m_hwInterface->IsDisableScalability();
972     if (statusKey == MOS_STATUS_SUCCESS)
973     {
974         disableScalability = userFeatureData.i32Data ? true : false;
975     }
976 
977     MEDIA_SYSTEM_INFO *gtSystemInfo = m_gtSystemInfo;
978 
979     if (gtSystemInfo && disableScalability == false)
980     {
981         // Both VE mode and media solo mode should be able to get the VDBOX number via the same interface
982         m_numVdbox = (uint8_t)(gtSystemInfo->VDBoxInfo.NumberOfVDBoxEnabled);
983     }
984     else
985     {
986         m_numVdbox = 1;
987     }
988 
989     return eStatus;
990 }
991 
InitKernelStateMe()992 MOS_STATUS CodechalVdencVp9StateG12::InitKernelStateMe()
993 {
994     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
995 
996     CODECHAL_ENCODE_FUNCTION_ENTER;
997 
998 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
999     CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderEngineInterface->GetHwCaps());
1000 
1001     uint32_t combinedKernelSize = 0;
1002     uint8_t* binary = nullptr;
1003     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
1004         m_kernelBase,
1005         m_kuidCommon,
1006         &binary,
1007         &combinedKernelSize));
1008 
1009     for (uint32_t krnStateIdx = 0; krnStateIdx < 2; krnStateIdx++)
1010     {
1011         CODECHAL_KERNEL_HEADER currKrnHeader;
1012         PMHW_KERNEL_STATE kernelStatePtr = &m_meKernelStates[krnStateIdx];
1013         uint32_t kernelSize = combinedKernelSize;
1014         EncOperation encOperation = (krnStateIdx > 0 && m_vdencEnabled) ?
1015             (m_useNonLegacyStreamin ? VDENC_STREAMIN_HEVC : VDENC_ME) : ENC_ME;
1016         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG12(
1017             binary,
1018             encOperation,
1019             (encOperation != ENC_ME) ? 0 : krnStateIdx,
1020             &currKrnHeader,
1021             &kernelSize));
1022 
1023         kernelStatePtr->KernelParams.iBTCount = CODECHAL_ENCODE_ME_NUM_SURFACES_G12;
1024         kernelStatePtr->KernelParams.iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
1025         kernelStatePtr->KernelParams.iCurbeLength = sizeof(MeCurbe);
1026         kernelStatePtr->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
1027         kernelStatePtr->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
1028         kernelStatePtr->KernelParams.iIdCount = 1;
1029 
1030         kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
1031         kernelStatePtr->KernelParams.pBinary = binary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1032         kernelStatePtr->KernelParams.iSize = kernelSize;
1033         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
1034             m_stateHeapInterface,
1035             kernelStatePtr->KernelParams.iBTCount,
1036             &kernelStatePtr->dwSshSize,
1037             &kernelStatePtr->dwBindingTableSize));
1038 
1039         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
1040         if (m_noMeKernelForPFrame)
1041         {
1042             m_meKernelStates[1] = m_meKernelStates[0];
1043             break;
1044         }
1045     }
1046 
1047     // Until a better way can be found, maintain old binding table structures
1048     MeKernelBindingTable* bindingTable = &m_meBindingTable;
1049     bindingTable->dwMEMVDataSurface = CODECHAL_ENCODE_ME_MV_DATA_SURFACE_G12;
1050     bindingTable->dw16xMEMVDataSurface = CODECHAL_ENCODE_16xME_MV_DATA_SURFACE_G12;
1051     bindingTable->dw32xMEMVDataSurface = CODECHAL_ENCODE_32xME_MV_DATA_SURFACE_G12;
1052     bindingTable->dwMEDist = CODECHAL_ENCODE_ME_DISTORTION_SURFACE_G12;
1053     bindingTable->dwMEBRCDist = CODECHAL_ENCODE_ME_BRC_DISTORTION_G12;
1054     bindingTable->dwMECurrForFwdRef = CODECHAL_ENCODE_ME_CURR_FOR_FWD_REF_G12;
1055     bindingTable->dwMEFwdRefPicIdx[0] = CODECHAL_ENCODE_ME_FWD_REF_IDX0_G12;
1056     bindingTable->dwMEFwdRefPicIdx[1] = CODECHAL_ENCODE_ME_FWD_REF_IDX1_G12;
1057     bindingTable->dwMEFwdRefPicIdx[2] = CODECHAL_ENCODE_ME_FWD_REF_IDX2_G12;
1058     bindingTable->dwMEFwdRefPicIdx[3] = CODECHAL_ENCODE_ME_FWD_REF_IDX3_G12;
1059     bindingTable->dwMEFwdRefPicIdx[4] = CODECHAL_ENCODE_ME_FWD_REF_IDX4_G12;
1060     bindingTable->dwMEFwdRefPicIdx[5] = CODECHAL_ENCODE_ME_FWD_REF_IDX5_G12;
1061     bindingTable->dwMEFwdRefPicIdx[6] = CODECHAL_ENCODE_ME_FWD_REF_IDX6_G12;
1062     bindingTable->dwMEFwdRefPicIdx[7] = CODECHAL_ENCODE_ME_FWD_REF_IDX7_G12;
1063     bindingTable->dwMECurrForBwdRef = CODECHAL_ENCODE_ME_CURR_FOR_BWD_REF_G12;
1064     bindingTable->dwMEBwdRefPicIdx[0] = CODECHAL_ENCODE_ME_BWD_REF_IDX0_G12;
1065     bindingTable->dwMEBwdRefPicIdx[1] = CODECHAL_ENCODE_ME_BWD_REF_IDX1_G12;
1066     bindingTable->dwVdencStreamInSurface = CODECHAL_ENCODE_ME_VDENC_STREAMIN_OUTPUT_G12;
1067     bindingTable->dwVdencStreamInInputSurface = CODECHAL_ENCODE_ME_VDENC_STREAMIN_INPUT_G12;
1068 #endif
1069 
1070     return eStatus;
1071 }
1072 
SetCurbeMe(MeCurbeParams * params)1073 MOS_STATUS CodechalVdencVp9StateG12::SetCurbeMe(
1074     MeCurbeParams* params)
1075 {
1076     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1077 
1078     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1079     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState);
1080 
1081     CODECHAL_ENCODE_ASSERT(params->TargetUsage <= NUM_TARGET_USAGE_MODES);
1082 
1083     uint32_t scaleFactor = 0;
1084     bool useMvFromPrevStep = false, writeDistortions = false;
1085     uint8_t mvShiftFactor = 0, prevMvReadPosFactor = 0;
1086     bool framePicture = CodecHal_PictureIsFrame(params->CurrOriginalPic);
1087     char qpPrimeY = (params->pic_init_qp_minus26 + 26) + params->slice_qp_delta;
1088 
1089     switch (params->hmeLvl)
1090     {
1091     case HME_LEVEL_32x:
1092         useMvFromPrevStep    = m_hmeFirstStep;
1093         writeDistortions     = false;
1094         scaleFactor          = SCALE_FACTOR_32x;
1095         mvShiftFactor        = m_mvShiftFactor32x;
1096         break;
1097     case HME_LEVEL_16x:
1098         useMvFromPrevStep    = (params->b32xMeEnabled) ? m_hmeFollowingStep : m_hmeFirstStep;
1099         writeDistortions     = false;
1100         scaleFactor          = SCALE_FACTOR_16x;
1101         mvShiftFactor        = m_mvShiftFactor16x;
1102         prevMvReadPosFactor  = m_prevMvReadPosition16x;
1103         break;
1104     case HME_LEVEL_4x:
1105         useMvFromPrevStep     = (params->b16xMeEnabled) ? m_hmeFollowingStep : m_hmeFirstStep;
1106         writeDistortions      = true;
1107         scaleFactor           = SCALE_FACTOR_4x;
1108         mvShiftFactor         = m_mvShiftFactor4x;
1109         prevMvReadPosFactor   = m_prevMvReadPosition4x;
1110         break;
1111     default:
1112         return MOS_STATUS_INVALID_PARAMETER;
1113     }
1114 
1115     MeCurbe cmd;
1116     CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
1117         &cmd,
1118         sizeof(MeCurbe),
1119         meCurbeInit,
1120         sizeof(MeCurbe)));
1121 
1122     cmd.DW3.SubPelMode = 3;
1123     if (m_fieldScalingOutputInterleaved)
1124     {
1125         cmd.DW3.SrcAccess =
1126             cmd.DW3.RefAccess = CodecHal_PictureIsField(params->CurrOriginalPic) ? 1 : 0;
1127         cmd.DW7.SrcFieldPolarity = CodecHal_PictureIsBottomField(params->CurrOriginalPic) ? 1 : 0;
1128     }
1129 
1130     cmd.DW4.PictureHeightMinus1 = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scaleFactor) - 1;
1131     cmd.DW4.PictureWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth / scaleFactor);
1132     cmd.DW5.QpPrimeY = qpPrimeY;
1133     cmd.DW6.WriteDistortions = writeDistortions;
1134     cmd.DW6.UseMvFromPrevStep = useMvFromPrevStep;
1135 
1136     cmd.DW6.SuperCombineDist = m_superCombineDistGeneric[params->TargetUsage];
1137     cmd.DW6.MaxVmvR = (framePicture) ?
1138         params->MaxMvLen * 4 : (params->MaxMvLen >> 1) * 4;
1139 
1140     if (m_pictureCodingType == B_TYPE)
1141     {
1142         // This field is irrelevant since we are not using the bi-direct search.
1143         cmd.DW1.BiWeight = 32;
1144         cmd.DW13.NumRefIdxL1MinusOne = params->num_ref_idx_l1_active_minus1;
1145     }
1146 
1147     if (m_pictureCodingType == P_TYPE ||
1148         m_pictureCodingType == B_TYPE)
1149     {
1150         if (params->hmeLvl == HME_LEVEL_4x && m_useNonLegacyStreamin)
1151         {
1152             cmd.DW30.ActualMBHeight = m_frameHeight;
1153             cmd.DW30.ActualMBWidth = m_frameWidth;
1154         }
1155         else if (m_vdencEnabled && m_16xMeSupported)
1156         {
1157             cmd.DW30.ActualMBHeight = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight);
1158             cmd.DW30.ActualMBWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth);
1159         }
1160         cmd.DW13.NumRefIdxL0MinusOne =
1161             params->num_ref_idx_l0_active_minus1;
1162     }
1163 
1164     cmd.DW13.RefStreaminCost = 5;
1165     // This flag is to indicate the ROI source type instead of indicating ROI is enabled or not
1166     cmd.DW13.ROIEnable = 0;
1167 
1168     if (!framePicture)
1169     {
1170         if (m_pictureCodingType != I_TYPE)
1171         {
1172             cmd.DW14.List0RefID0FieldParity = params->List0RefID0FieldParity;
1173             cmd.DW14.List0RefID1FieldParity = params->List0RefID1FieldParity;
1174             cmd.DW14.List0RefID2FieldParity = params->List0RefID2FieldParity;
1175             cmd.DW14.List0RefID3FieldParity = params->List0RefID3FieldParity;
1176             cmd.DW14.List0RefID4FieldParity = params->List0RefID4FieldParity;
1177             cmd.DW14.List0RefID5FieldParity = params->List0RefID5FieldParity;
1178             cmd.DW14.List0RefID6FieldParity = params->List0RefID6FieldParity;
1179             cmd.DW14.List0RefID7FieldParity = params->List0RefID7FieldParity;
1180         }
1181         if (m_pictureCodingType == B_TYPE)
1182         {
1183             cmd.DW14.List1RefID0FieldParity = params->List1RefID0FieldParity;
1184             cmd.DW14.List1RefID1FieldParity = params->List1RefID1FieldParity;
1185         }
1186     }
1187 
1188     cmd.DW15.MvShiftFactor = mvShiftFactor;
1189     cmd.DW15.PrevMvReadPosFactor = prevMvReadPosFactor;
1190 
1191     // r3 & r4
1192     uint8_t targetUsage = params->TargetUsage;
1193     uint8_t meMethod = 0;
1194     if (m_pictureCodingType == B_TYPE)
1195     {
1196         meMethod = params->pBMEMethodTable ?
1197             params->pBMEMethodTable[targetUsage]
1198             : m_bMeMethodGeneric[targetUsage];
1199     }
1200     else
1201     {
1202         meMethod = params->pMEMethodTable ?
1203             params->pMEMethodTable[targetUsage]
1204             : m_meMethodGeneric[targetUsage];
1205     }
1206 
1207     uint8_t tableIdx = (m_pictureCodingType == B_TYPE) ? 1 : 0;
1208     eStatus = MOS_SecureMemcpy(&(cmd.SPDelta), 14 * sizeof(uint32_t), m_encodeSearchPath[tableIdx][meMethod], 14 * sizeof(uint32_t));
1209     if (eStatus != MOS_STATUS_SUCCESS)
1210     {
1211         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to copy memory.");
1212         return eStatus;
1213     }
1214 
1215     // Non legacy stream in is for hevc vp9 streamin kernel
1216     if (params->hmeLvl == HME_LEVEL_4x && m_useNonLegacyStreamin)
1217     {
1218         //StreamIn CURBE
1219         cmd.DW6.LCUSize = 1;//Only LCU64 supported by the VDEnc HW
1220         cmd.DW6.InputStreamInSurfaceEnable = params->segmapProvided;
1221         cmd.DW31.MaxCuSize                 = 3;
1222         cmd.DW31.MaxTuSize                 = 3;
1223         cmd.DW31.NumImePredictors          = CODECHAL_VDENC_NUMIMEPREDICTORS;
1224         switch (params->TargetUsage)
1225         {
1226         case 1: // Quality mode
1227         case 2:
1228         case 4: // Normal mode
1229             cmd.DW36.NumMergeCandidateCu64x64 = 4;
1230             cmd.DW36.NumMergeCandidateCu32x32 = 3;
1231             cmd.DW36.NumMergeCandidateCu16x16 = 2;
1232             cmd.DW36.NumMergeCandidateCu8x8   = 1;
1233             break;
1234         case 7: // Speed mode
1235             cmd.DW36.NumMergeCandidateCu64x64 = 2;
1236             cmd.DW36.NumMergeCandidateCu32x32 = 2;
1237             cmd.DW36.NumMergeCandidateCu16x16 = 2;
1238             cmd.DW36.NumMergeCandidateCu8x8   = 0;
1239             cmd.DW31.NumImePredictors         = CODECHAL_VDENC_NUMIMEPREDICTORS_SPEED;
1240             break;
1241         default:
1242             MHW_ASSERTMESSAGE("Invalid TU provided!");
1243             return MOS_STATUS_INVALID_PARAMETER;
1244         }
1245     }
1246 
1247     // r5
1248     cmd.DW40._4xMeMvOutputDataSurfIndex = CODECHAL_ENCODE_ME_MV_DATA_SURFACE_G12;
1249     cmd.DW41._16xOr32xMeMvInputDataSurfIndex = (params->hmeLvl == HME_LEVEL_32x) ?
1250         CODECHAL_ENCODE_32xME_MV_DATA_SURFACE_G12 : CODECHAL_ENCODE_16xME_MV_DATA_SURFACE_G12;
1251     cmd.DW42._4xMeOutputDistSurfIndex = CODECHAL_ENCODE_ME_DISTORTION_SURFACE_G12;
1252     cmd.DW43._4xMeOutputBrcDistSurfIndex = CODECHAL_ENCODE_ME_BRC_DISTORTION_G12;
1253     cmd.DW44.VMEFwdInterPredictionSurfIndex = CODECHAL_ENCODE_ME_CURR_FOR_FWD_REF_G12;
1254     cmd.DW45.VMEBwdInterPredictionSurfIndex = CODECHAL_ENCODE_ME_CURR_FOR_BWD_REF_G12;
1255     cmd.DW46.VDEncStreamInOutputSurfIndex = CODECHAL_ENCODE_ME_VDENC_STREAMIN_OUTPUT_G12;
1256     cmd.DW47.VDEncStreamInInputSurfIndex = CODECHAL_ENCODE_ME_VDENC_STREAMIN_INPUT_G12;
1257 
1258     CODECHAL_ENCODE_CHK_STATUS_RETURN(params->pKernelState->m_dshRegion.AddData(
1259         &cmd,
1260         params->pKernelState->dwCurbeOffset,
1261         sizeof(cmd)));
1262 
1263     return eStatus;
1264 }
1265 
SendMeSurfaces(PMOS_COMMAND_BUFFER cmdBuffer,MeSurfaceParams * params)1266 MOS_STATUS CodechalVdencVp9StateG12::SendMeSurfaces(
1267     PMOS_COMMAND_BUFFER cmdBuffer,
1268     MeSurfaceParams* params)
1269 {
1270     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1271 
1272     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
1273     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1274     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState);
1275     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pCurrOriginalPic);
1276     CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps4xMeMvDataBuffer);
1277     CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeDistortionBuffer);
1278 
1279     if (!params->bVdencStreamInEnabled)
1280     {
1281         CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeBrcDistortionBuffer);
1282     }
1283     else
1284     {
1285         CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeVdencStreamInBuffer);
1286     }
1287 
1288     CODECHAL_MEDIA_STATE_TYPE encMediaStateType = (params->b32xMeInUse) ? CODECHAL_MEDIA_STATE_32X_ME :
1289         params->b16xMeInUse ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME;
1290 
1291     if (params->bVdencStreamInEnabled && encMediaStateType == CODECHAL_MEDIA_STATE_4X_ME)
1292     {
1293         encMediaStateType = CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN;
1294     }
1295 
1296     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pMeBindingTable);
1297     MeKernelBindingTable* meBindingTable = params->pMeBindingTable;
1298 
1299     bool isFieldPicture = CodecHal_PictureIsField(*(params->pCurrOriginalPic)) ? 1 : 0;
1300     bool isBottomField = CodecHal_PictureIsBottomField(*(params->pCurrOriginalPic)) ? 1 : 0;
1301     uint8_t currVDirection = (!isFieldPicture) ? CODECHAL_VDIRECTION_FRAME :
1302         ((isBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
1303 
1304     PMOS_SURFACE currScaledSurface = nullptr, meMvDataBuffer = nullptr;
1305     uint32_t meMvBottomFieldOffset = 0, currScaledBottomFieldOffset = 0;
1306     if (params->b32xMeInUse)
1307     {
1308         CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps32xMeMvDataBuffer);
1309         currScaledSurface = m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER);
1310         meMvDataBuffer = params->ps32xMeMvDataBuffer;
1311         meMvBottomFieldOffset = params->dw32xMeMvBottomFieldOffset;
1312         currScaledBottomFieldOffset = params->dw32xScaledBottomFieldOffset;
1313     }
1314     else if (params->b16xMeInUse)
1315     {
1316         CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps16xMeMvDataBuffer);
1317         currScaledSurface = m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
1318         meMvDataBuffer = params->ps16xMeMvDataBuffer;
1319         meMvBottomFieldOffset = params->dw16xMeMvBottomFieldOffset;
1320         currScaledBottomFieldOffset = params->dw16xScaledBottomFieldOffset;
1321     }
1322     else
1323     {
1324         currScaledSurface = m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
1325         meMvDataBuffer = params->ps4xMeMvDataBuffer;
1326         meMvBottomFieldOffset = params->dw4xMeMvBottomFieldOffset;
1327         currScaledBottomFieldOffset = params->dw4xScaledBottomFieldOffset;
1328     }
1329 
1330     // Reference height and width information should be taken from the current scaled surface rather
1331     // than from the reference scaled surface in the case of PAFF.
1332 
1333     uint32_t width = MOS_ALIGN_CEIL(params->dwDownscaledWidthInMb * 32, 64);
1334     uint32_t height = params->dwDownscaledHeightInMb * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER;
1335 
1336     // Force the values
1337     meMvDataBuffer->dwWidth = width;
1338     meMvDataBuffer->dwHeight = height;
1339     meMvDataBuffer->dwPitch = width;
1340 
1341     CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
1342     MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1343     surfaceParams.bIs2DSurface = true;
1344     surfaceParams.bMediaBlockRW = true;
1345     surfaceParams.psSurface = meMvDataBuffer;
1346     surfaceParams.dwOffset = meMvBottomFieldOffset;
1347     surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
1348     surfaceParams.dwBindingTableOffset = meBindingTable->dwMEMVDataSurface;
1349     surfaceParams.bIsWritable = true;
1350     surfaceParams.bRenderTarget = true;
1351     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1352         m_hwInterface,
1353         cmdBuffer,
1354         &surfaceParams,
1355         params->pKernelState));
1356 
1357     if (params->b16xMeInUse && params->b32xMeEnabled)
1358     {
1359         // Pass 32x MV to 16x ME operation
1360         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1361         surfaceParams.bIs2DSurface = true;
1362         surfaceParams.bMediaBlockRW = true;
1363         surfaceParams.psSurface = params->ps32xMeMvDataBuffer;
1364         surfaceParams.dwOffset =
1365             isBottomField ? params->dw32xMeMvBottomFieldOffset : 0;
1366         surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
1367         surfaceParams.dwBindingTableOffset = meBindingTable->dw32xMEMVDataSurface;
1368         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1369             m_hwInterface,
1370             cmdBuffer,
1371             &surfaceParams,
1372             params->pKernelState));
1373     }
1374     else if (!params->b32xMeInUse && params->b16xMeEnabled)
1375     {
1376         // Pass 16x MV to 4x ME operation
1377         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1378         surfaceParams.bIs2DSurface = true;
1379         surfaceParams.bMediaBlockRW = true;
1380         surfaceParams.psSurface = params->ps16xMeMvDataBuffer;
1381         surfaceParams.dwOffset =
1382             isBottomField ? params->dw16xMeMvBottomFieldOffset : 0;
1383         surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
1384         surfaceParams.dwBindingTableOffset = meBindingTable->dw16xMEMVDataSurface;
1385         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1386             m_hwInterface,
1387             cmdBuffer,
1388             &surfaceParams,
1389             params->pKernelState));
1390     }
1391 
1392     // Insert Distortion buffers only for 4xMe case
1393     if (!params->b32xMeInUse && !params->b16xMeInUse)
1394     {
1395         if (!params->bVdencStreamInEnabled)
1396         {
1397             MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1398             surfaceParams.bIs2DSurface = true;
1399             surfaceParams.bMediaBlockRW = true;
1400             surfaceParams.psSurface = params->psMeBrcDistortionBuffer;
1401             surfaceParams.dwOffset = params->dwMeBrcDistortionBottomFieldOffset;
1402             surfaceParams.dwBindingTableOffset = meBindingTable->dwMEBRCDist;
1403             surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
1404             surfaceParams.bIsWritable = true;
1405             surfaceParams.bRenderTarget = true;
1406             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1407                 m_hwInterface,
1408                 cmdBuffer,
1409                 &surfaceParams,
1410                 params->pKernelState));
1411         }
1412 
1413         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1414         surfaceParams.bIs2DSurface = true;
1415         surfaceParams.bMediaBlockRW = true;
1416         surfaceParams.psSurface = params->psMeDistortionBuffer;
1417         surfaceParams.dwOffset = params->dwMeDistortionBottomFieldOffset;
1418         surfaceParams.dwBindingTableOffset = meBindingTable->dwMEDist;
1419         surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value;
1420         surfaceParams.bIsWritable = true;
1421         surfaceParams.bRenderTarget = true;
1422         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1423             m_hwInterface,
1424             cmdBuffer,
1425             &surfaceParams,
1426             params->pKernelState));
1427     }
1428 
1429     // Setup references 1...n
1430     // LIST 0 references
1431     CODEC_PICTURE refPic;
1432     bool isRefFieldPicture = false, isRefBottomField = false;
1433     uint8_t refPicIdx = 0;
1434     if (params->pL0RefFrameList)
1435     {
1436         for (uint8_t refIdx = 0; refIdx <= params->dwNumRefIdxL0ActiveMinus1; refIdx++)
1437         {
1438             refPic = params->pL0RefFrameList[refIdx];
1439 
1440             if (!CodecHal_PictureIsInvalid(refPic) && params->pPicIdx[refPic.FrameIdx].bValid)
1441             {
1442                 if (refIdx == 0)
1443                 {
1444                     // Current Picture Y - VME
1445                     MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1446                     surfaceParams.bUseAdvState = true;
1447                     surfaceParams.psSurface = currScaledSurface;
1448                     surfaceParams.dwOffset = isBottomField ? currScaledBottomFieldOffset : 0;
1449                     surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
1450                     surfaceParams.dwBindingTableOffset = meBindingTable->dwMECurrForFwdRef;
1451                     surfaceParams.ucVDirection = currVDirection;
1452                     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1453                         m_hwInterface,
1454                         cmdBuffer,
1455                         &surfaceParams,
1456                         params->pKernelState));
1457                 }
1458 
1459                 isRefFieldPicture = CodecHal_PictureIsField(refPic) ? 1 : 0;
1460                 isRefBottomField = (CodecHal_PictureIsBottomField(refPic)) ? 1 : 0;
1461                 refPicIdx = params->pPicIdx[refPic.FrameIdx].ucPicIdx;
1462                 uint8_t scaledIdx = params->ppRefList[refPicIdx]->ucScalingIdx;
1463                 uint32_t refScaledBottomFieldOffset = 0;
1464                 MOS_SURFACE *refScaledSurface;
1465                 if (params->b32xMeInUse)
1466                 {
1467                     refScaledSurface           = m_trackedBuf->Get32xDsSurface(scaledIdx);
1468                 }
1469                 else if (params->b16xMeInUse)
1470                 {
1471                     refScaledSurface           = m_trackedBuf->Get16xDsSurface(scaledIdx);
1472                 }
1473                 else
1474                 {
1475                     refScaledSurface           = m_trackedBuf->Get4xDsSurface(scaledIdx);
1476                 }
1477                 refScaledBottomFieldOffset = isRefBottomField ? currScaledBottomFieldOffset : 0;
1478 
1479                 // L0 Reference Picture Y - VME
1480                 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1481                 surfaceParams.bUseAdvState = true;
1482                 surfaceParams.psSurface = refScaledSurface;
1483                 surfaceParams.dwOffset = isRefBottomField ? refScaledBottomFieldOffset : 0;
1484                 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
1485                 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEFwdRefPicIdx[refIdx];
1486                 surfaceParams.ucVDirection = !isFieldPicture ? CODECHAL_VDIRECTION_FRAME :
1487                     ((isRefBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
1488                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1489                     m_hwInterface,
1490                     cmdBuffer,
1491                     &surfaceParams,
1492                     params->pKernelState));
1493 
1494                 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEFwdRefPicIdx[refIdx] + 1;
1495                 surfaceParams.ucVDirection = !isFieldPicture ? CODECHAL_VDIRECTION_FRAME :
1496                     ((isRefBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
1497                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1498                     m_hwInterface,
1499                     cmdBuffer,
1500                     &surfaceParams,
1501                     params->pKernelState));
1502             }
1503         }
1504     }
1505 
1506     // Setup references 1...n
1507     // LIST 1 references
1508     if (params->pL1RefFrameList)
1509     {
1510         for (uint8_t refIdx = 0; refIdx <= params->dwNumRefIdxL1ActiveMinus1; refIdx++)
1511         {
1512             refPic = params->pL1RefFrameList[refIdx];
1513 
1514             if (!CodecHal_PictureIsInvalid(refPic) && params->pPicIdx[refPic.FrameIdx].bValid)
1515             {
1516                 if (refIdx == 0)
1517                 {
1518                     // Current Picture Y - VME
1519                     MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1520                     surfaceParams.bUseAdvState = true;
1521                     surfaceParams.psSurface = currScaledSurface;
1522                     surfaceParams.dwOffset = isBottomField ? currScaledBottomFieldOffset : 0;
1523                     surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
1524                     surfaceParams.dwBindingTableOffset = meBindingTable->dwMECurrForBwdRef;
1525                     surfaceParams.ucVDirection = currVDirection;
1526                     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1527                         m_hwInterface,
1528                         cmdBuffer,
1529                         &surfaceParams,
1530                         params->pKernelState));
1531                 }
1532 
1533                 isRefFieldPicture = CodecHal_PictureIsField(refPic) ? 1 : 0;
1534                 isRefBottomField = (CodecHal_PictureIsBottomField(refPic)) ? 1 : 0;
1535                 refPicIdx = params->pPicIdx[refPic.FrameIdx].ucPicIdx;
1536                 uint8_t scaledIdx = params->ppRefList[refPicIdx]->ucScalingIdx;
1537                 uint32_t refScaledBottomFieldOffset = 0;
1538                 MOS_SURFACE *refScaledSurface;
1539                 if (params->b32xMeInUse)
1540                 {
1541                     refScaledSurface           = m_trackedBuf->Get32xDsSurface(scaledIdx);
1542                 }
1543                 else if (params->b16xMeInUse)
1544                 {
1545                     refScaledSurface           = m_trackedBuf->Get16xDsSurface(scaledIdx);
1546                 }
1547                 else
1548                 {
1549                     refScaledSurface           = m_trackedBuf->Get4xDsSurface(scaledIdx);
1550                 }
1551                 refScaledBottomFieldOffset = isRefBottomField ? currScaledBottomFieldOffset : 0;
1552 
1553 
1554                 // L1 Reference Picture Y - VME
1555                 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1556                 surfaceParams.bUseAdvState = true;
1557                 surfaceParams.psSurface = refScaledSurface;
1558                 surfaceParams.dwOffset = isRefBottomField ? refScaledBottomFieldOffset : 0;
1559                 surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
1560                 surfaceParams.dwBindingTableOffset = meBindingTable->dwMEBwdRefPicIdx[refIdx];
1561                 surfaceParams.ucVDirection = (!isFieldPicture) ? CODECHAL_VDIRECTION_FRAME :
1562                     ((isRefBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
1563                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1564                     m_hwInterface,
1565                     cmdBuffer,
1566                     &surfaceParams,
1567                     params->pKernelState));
1568             }
1569         }
1570     }
1571 
1572     if (encMediaStateType == CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN)
1573     {
1574         // Output buffer
1575         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1576         surfaceParams.dwSize = params->dwVDEncStreamInSurfaceSize;
1577         surfaceParams.bIs2DSurface = false;
1578         surfaceParams.presBuffer = params->psMeVdencStreamInBuffer;
1579         surfaceParams.dwBindingTableOffset = meBindingTable->dwVdencStreamInSurface;
1580         surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
1581         surfaceParams.bIsWritable = true;
1582         surfaceParams.bRenderTarget = true;
1583         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1584             m_hwInterface,
1585             cmdBuffer,
1586             &surfaceParams,
1587             params->pKernelState));
1588 
1589         // Input buffer (for AVC case we only read the surface and update data)
1590         MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
1591         surfaceParams.dwSize = params->dwVDEncStreamInSurfaceSize;
1592         surfaceParams.bIs2DSurface = false;
1593         surfaceParams.presBuffer = params->psMeVdencStreamInBuffer;
1594         surfaceParams.dwBindingTableOffset = meBindingTable->dwVdencStreamInInputSurface;
1595         surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
1596         surfaceParams.bIsWritable = true;
1597         surfaceParams.bRenderTarget = true;
1598         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
1599             m_hwInterface,
1600             cmdBuffer,
1601             &surfaceParams,
1602             params->pKernelState));
1603     }
1604 
1605     return eStatus;
1606 }
1607 
InitInterface()1608 MOS_STATUS CodechalVdencVp9StateG12::InitInterface()
1609 {
1610     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1611 
1612     CODECHAL_ENCODE_FUNCTION_ENTER;
1613 
1614     uint8_t* binary = nullptr;
1615     uint32_t kernelSize = 0;
1616     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
1617         m_kernelBase,
1618         m_kuidCommon,
1619         &binary,
1620         &kernelSize));
1621 
1622     GetHwInterface()->GetStateHeapSettings()->dwIshSize +=
1623         MOS_ALIGN_CEIL(kernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
1624 
1625     return eStatus;
1626 }
1627 
SetMeSurfaceParams(MeSurfaceParams * meSurfaceParams)1628 MOS_STATUS CodechalVdencVp9StateG12::SetMeSurfaceParams(MeSurfaceParams *meSurfaceParams)
1629 {
1630     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1631 
1632     CODECHAL_ENCODE_FUNCTION_ENTER;
1633 
1634     CODECHAL_ENCODE_CHK_NULL_RETURN(meSurfaceParams);
1635 
1636     meSurfaceParams->bMbaff = false;
1637     meSurfaceParams->b4xMeDistortionBufferSupported = true;
1638     meSurfaceParams->dwNumRefIdxL0ActiveMinus1 = (m_vp9PicParams->PicFlags.fields.frame_type) ? m_numRefFrames - 1 : 0;
1639     meSurfaceParams->dwNumRefIdxL1ActiveMinus1 = 0;
1640 
1641     MOS_ZeroMemory(&m_refPicList0, sizeof(m_refPicList0));
1642 
1643     if (m_lastRefPic)
1644     {
1645         m_refPicList0[0].FrameIdx = m_vp9PicParams->RefFlags.fields.LastRefIdx;
1646         m_refPicList0[0].PicFlags = PICTURE_FRAME;
1647     }
1648     if (m_goldenRefPic)
1649     {
1650         m_refPicList0[1].FrameIdx = m_vp9PicParams->RefFlags.fields.GoldenRefIdx;
1651         m_refPicList0[1].PicFlags = PICTURE_FRAME;
1652     }
1653     if (m_altRefPic)
1654     {
1655         m_refPicList0[2].FrameIdx = m_vp9PicParams->RefFlags.fields.AltRefIdx;
1656         m_refPicList0[2].PicFlags = PICTURE_FRAME;
1657     }
1658 
1659     meSurfaceParams->pL0RefFrameList = &(m_refPicList0[0]);
1660     meSurfaceParams->ppRefList = &m_refList[0];
1661     meSurfaceParams->pPicIdx = &m_picIdx[0];
1662     meSurfaceParams->pCurrOriginalPic = &m_currOriginalPic;
1663     meSurfaceParams->ps4xMeMvDataBuffer = &m_4xMeMvDataBuffer;
1664     meSurfaceParams->ps16xMeMvDataBuffer = &m_16xMeMvDataBuffer;
1665     meSurfaceParams->psMeDistortionBuffer = &m_4xMeDistortionBuffer;
1666     meSurfaceParams->dwVerticalLineStride = m_verticalLineStride;
1667     meSurfaceParams->dwVerticalLineStrideOffset = m_verticalLineStrideOffset;
1668     meSurfaceParams->b32xMeEnabled = m_32xMeSupported;
1669     meSurfaceParams->b16xMeEnabled = m_16xMeEnabled;
1670     meSurfaceParams->pMeBindingTable = &m_meBindingTable;
1671     meSurfaceParams->bVdencStreamInEnabled = true;
1672     meSurfaceParams->psMeVdencStreamInBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
1673     meSurfaceParams->dwVDEncStreamInSurfaceSize = MOS_BYTES_TO_DWORDS((MOS_ALIGN_CEIL(m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32) *
1674         (MOS_ALIGN_CEIL(m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32) *
1675         CODECHAL_CACHELINE_SIZE);
1676     return eStatus;
1677 }
1678 
SetMeCurbeParams(MeCurbeParams * meParams)1679 MOS_STATUS CodechalVdencVp9StateG12::SetMeCurbeParams(MeCurbeParams *meParams)
1680 {
1681     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1682 
1683     CODECHAL_ENCODE_FUNCTION_ENTER;
1684 
1685     CODECHAL_ENCODE_CHK_NULL_RETURN(meParams);
1686 
1687     meParams->b16xMeEnabled = m_16xMeEnabled;
1688     meParams->b32xMeEnabled = m_32xMeSupported;
1689     meParams->TargetUsage = TU_QUALITY;
1690     meParams->MaxMvLen = m_hmeMaxMvLength;
1691     meParams->CurrOriginalPic.FrameIdx = m_vp9PicParams->CurrOriginalPic.FrameIdx;
1692     meParams->CurrOriginalPic.PicEntry = m_vp9PicParams->CurrOriginalPic.PicEntry;
1693     meParams->CurrOriginalPic.PicFlags = m_vp9PicParams->CurrOriginalPic.PicFlags;
1694     meParams->pic_init_qp_minus26 = m_vp9PicParams->LumaACQIndex - 26;
1695     meParams->num_ref_idx_l0_active_minus1 = (m_vp9PicParams->PicFlags.fields.frame_type) ? m_numRefFrames - 1 : 0;
1696     meParams->num_ref_idx_l1_active_minus1 = 0;
1697 
1698     return eStatus;
1699 }
1700 
ExecuteMeKernel(MeCurbeParams * meParams,MeSurfaceParams * meSurfaceParams,HmeLevel hmeLevel)1701 MOS_STATUS CodechalVdencVp9StateG12::ExecuteMeKernel(
1702     MeCurbeParams *meParams,
1703     MeSurfaceParams *meSurfaceParams,
1704     HmeLevel hmeLevel)
1705 {
1706     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1707 
1708     CODECHAL_ENCODE_FUNCTION_ENTER;
1709 
1710     CODECHAL_ENCODE_CHK_NULL_RETURN(meParams);
1711     CODECHAL_ENCODE_CHK_NULL_RETURN(meSurfaceParams);
1712 
1713     PerfTagSetting perfTag;
1714     perfTag.Value = 0;
1715     perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
1716     perfTag.CallType = m_singleTaskPhaseSupported ? CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL : CODECHAL_ENCODE_PERFTAG_CALL_ME_KERNEL;
1717     perfTag.PictureCodingType = m_pictureCodingType;
1718     m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
1719     // Each ME kernel buffer counts as a separate perf task
1720     m_osInterface->pfnResetPerfBufferID(m_osInterface);
1721 
1722     CODECHAL_MEDIA_STATE_TYPE encFunctionType = (hmeLevel == HME_LEVEL_32x) ? CODECHAL_MEDIA_STATE_32X_ME :
1723         (hmeLevel == HME_LEVEL_16x) ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME;
1724 
1725     bool vdencMeInUse = false;
1726     if (m_vdencEnabled && (encFunctionType == CODECHAL_MEDIA_STATE_4X_ME))
1727     {
1728         vdencMeInUse = true;
1729         // Non legacy stream in is for hevc vp9 streamin kernel
1730         encFunctionType = m_useNonLegacyStreamin ? CODECHAL_MEDIA_STATE_4X_ME : CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN;
1731     }
1732 
1733     uint32_t krnStateIdx = vdencMeInUse ?
1734         CODECHAL_ENCODE_ME_IDX_VDENC :
1735         ((m_pictureCodingType == P_TYPE) ? CODECHAL_ENCODE_ME_IDX_P : CODECHAL_ENCODE_ME_IDX_B);
1736 
1737     PMHW_KERNEL_STATE kernelState = &m_meKernelStates[krnStateIdx];
1738 
1739     // If Single Task Phase is not enabled, use BT count for the kernel state.
1740     if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
1741     {
1742         uint32_t maxBtCount = m_singleTaskPhaseSupported ?
1743             m_maxBtCount : kernelState->KernelParams.iBTCount;
1744         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
1745             m_stateHeapInterface,
1746             maxBtCount));
1747         m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
1748         CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
1749     }
1750 
1751     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
1752         m_stateHeapInterface,
1753         kernelState,
1754         false,
1755         0,
1756         false,
1757         m_storeData));
1758     MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
1759     MOS_ZeroMemory(&idParams, sizeof(idParams));
1760     idParams.pKernelState = kernelState;
1761     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
1762         m_stateHeapInterface,
1763         1,
1764         &idParams));
1765 
1766     // Setup Additional MeParams (Most of them set up in codec specific function, so don't zero out here)
1767     meParams->hmeLvl = hmeLevel;
1768     meParams->pKernelState = kernelState;
1769 
1770     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeMe(meParams));
1771 
1772     CODECHAL_DEBUG_TOOL(
1773         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1774             encFunctionType,
1775             MHW_DSH_TYPE,
1776             kernelState));
1777     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
1778         encFunctionType,
1779         kernelState));
1780     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1781         encFunctionType,
1782         MHW_ISH_TYPE,
1783         kernelState));
1784     )
1785     MOS_COMMAND_BUFFER cmdBuffer;
1786     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
1787     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
1788     SendKernelCmdsParams sendKernelCmdsParams;
1789     sendKernelCmdsParams = SendKernelCmdsParams();
1790     sendKernelCmdsParams.EncFunctionType = encFunctionType;
1791     sendKernelCmdsParams.pKernelState = kernelState;
1792 
1793     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
1794 
1795     // Add binding table
1796     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
1797         m_stateHeapInterface,
1798         kernelState));
1799 
1800     // Setup Additional ME surface params (Most of them set up in codec specific function, so don't zero out here)
1801     meSurfaceParams->dwDownscaledWidthInMb = (hmeLevel == HME_LEVEL_32x) ? m_downscaledWidthInMb32x :
1802         (hmeLevel == HME_LEVEL_16x) ? m_downscaledWidthInMb16x : m_downscaledWidthInMb4x;
1803     meSurfaceParams->dwDownscaledHeightInMb = (hmeLevel == HME_LEVEL_32x) ? m_downscaledFrameFieldHeightInMb32x :
1804         (hmeLevel == HME_LEVEL_16x) ? m_downscaledFrameFieldHeightInMb16x : m_downscaledFrameFieldHeightInMb4x;
1805     meSurfaceParams->b32xMeInUse = (hmeLevel == HME_LEVEL_32x) ? true : false;
1806     meSurfaceParams->b16xMeInUse = (hmeLevel == HME_LEVEL_16x) ? true : false;
1807     meSurfaceParams->pKernelState = kernelState;
1808 
1809     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMeSurfaces(&cmdBuffer, meSurfaceParams));
1810 
1811     // Dump SSH for ME kernel
1812     CODECHAL_DEBUG_TOOL(
1813         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
1814             encFunctionType,
1815             MHW_SSH_TYPE,
1816             kernelState)));
1817 
1818     /* zero out the mv data memory and me distortion buffer for the driver ULT
1819     kernel only writes out this data used for current frame, in some cases the the data used for
1820     previous frames would be left in the buffer (for example, the L1 mv for B frame would still show
1821     in the P frame mv data buffer */
1822 
1823     // Zeroing out the buffers has perf impact, so zero it out only when dumps are actually enabled
1824     CODECHAL_DEBUG_TOOL(
1825         CODECHAL_ENCODE_CHK_NULL_RETURN(m_debugInterface);
1826     uint8_t* data = NULL;
1827     uint32_t size = 0;
1828     bool driverMeDumpEnabled = m_debugInterface->DumpIsEnabled(CodechalDbgAttr::attrOutput, encFunctionType);
1829 
1830     if (driverMeDumpEnabled)
1831     {
1832         MOS_LOCK_PARAMS lockFlags;
1833         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1834         lockFlags.WriteOnly = 1;
1835 
1836         switch (hmeLevel)
1837         {
1838         case HME_LEVEL_32x:
1839             data = (uint8_t*)m_osInterface->pfnLockResource(
1840                 m_osInterface,
1841                 &meSurfaceParams->ps32xMeMvDataBuffer->OsResource,
1842                 &lockFlags);
1843             CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1844             size = MOS_ALIGN_CEIL((m_downscaledWidthInMb32x * 32), 64) *
1845                 (m_downscaledHeightInMb32x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
1846             MOS_ZeroMemory(data, size);
1847             m_osInterface->pfnUnlockResource(
1848                 m_osInterface,
1849                 &meSurfaceParams->ps32xMeMvDataBuffer->OsResource);
1850             break;
1851         case HME_LEVEL_16x:
1852             data = (uint8_t*)m_osInterface->pfnLockResource(
1853                 m_osInterface,
1854                 &meSurfaceParams->ps16xMeMvDataBuffer->OsResource,
1855                 &lockFlags);
1856             CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1857             size = MOS_ALIGN_CEIL((m_downscaledWidthInMb16x * 32), 64) *
1858                 (m_downscaledHeightInMb16x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
1859             MOS_ZeroMemory(data, size);
1860             m_osInterface->pfnUnlockResource(
1861                 m_osInterface,
1862                 &meSurfaceParams->ps16xMeMvDataBuffer->OsResource);
1863             break;
1864         case HME_LEVEL_4x:
1865             if (!m_vdencEnabled)
1866             {
1867                 data = (uint8_t*)m_osInterface->pfnLockResource(
1868                     m_osInterface,
1869                     &meSurfaceParams->ps4xMeMvDataBuffer->OsResource,
1870                     &lockFlags);
1871                 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1872                 size = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 32), 64) *
1873                     (m_downscaledHeightInMb4x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
1874                 MOS_ZeroMemory(data, size);
1875                 m_osInterface->pfnUnlockResource(
1876                     m_osInterface,
1877                     &meSurfaceParams->ps4xMeMvDataBuffer->OsResource);
1878             }
1879             break;
1880         default:
1881             return MOS_STATUS_INVALID_PARAMETER;
1882         }
1883 
1884         // zeroing out ME dist buffer
1885         if (meSurfaceParams->b4xMeDistortionBufferSupported)
1886         {
1887             data = (uint8_t*)m_osInterface->pfnLockResource(
1888                 m_osInterface, &meSurfaceParams->psMeDistortionBuffer->OsResource, &lockFlags);
1889             CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1890             size = meSurfaceParams->psMeDistortionBuffer->dwHeight * meSurfaceParams->psMeDistortionBuffer->dwPitch;
1891             MOS_ZeroMemory(data, size);
1892             m_osInterface->pfnUnlockResource(
1893                 m_osInterface,
1894                 &meSurfaceParams->psMeDistortionBuffer->OsResource);
1895         }
1896     }
1897     );
1898 
1899     uint32_t scalingFactor = (hmeLevel == HME_LEVEL_32x) ? SCALE_FACTOR_32x :
1900         (hmeLevel == HME_LEVEL_16x) ? SCALE_FACTOR_16x : SCALE_FACTOR_4x;
1901 
1902     uint32_t resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / scalingFactor);
1903     uint32_t resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scalingFactor);
1904 
1905     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
1906     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
1907     walkerCodecParams.WalkerMode = m_walkerMode;
1908     walkerCodecParams.dwResolutionX = resolutionX;
1909     walkerCodecParams.dwResolutionY = resolutionY;
1910     walkerCodecParams.bNoDependency = true;
1911     walkerCodecParams.bMbaff = meSurfaceParams->bMbaff;
1912     walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported;
1913     walkerCodecParams.ucGroupId = m_groupId;
1914 
1915     MHW_WALKER_PARAMS walkerParams;
1916     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
1917         m_hwInterface,
1918         &walkerParams,
1919         &walkerCodecParams));
1920 
1921     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
1922         &cmdBuffer,
1923         &walkerParams));
1924 
1925     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
1926 
1927     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
1928         m_stateHeapInterface,
1929         kernelState));
1930 
1931     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
1932 
1933     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1934     {
1935         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
1936             m_stateHeapInterface));
1937         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
1938     }
1939 
1940     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
1941         &cmdBuffer,
1942         encFunctionType,
1943         nullptr)));
1944 
1945     m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase);
1946 
1947     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
1948 
1949     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1950     {
1951         m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
1952         m_lastTaskInPhase = false;
1953     }
1954 
1955     return eStatus;
1956 }
1957 
ExecuteKernelFunctions()1958 MOS_STATUS CodechalVdencVp9StateG12::ExecuteKernelFunctions()
1959 {
1960     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1961 
1962     CODECHAL_ENCODE_FUNCTION_ENTER;
1963 
1964 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
1965     uint32_t dumpFormat = 0;
1966     CODECHAL_DEBUG_TOOL(
1967     //    CodecHal_DbgMapSurfaceFormatToDumpFormat(m_rawSurfaceToEnc->Format, &dumpFormat);
1968     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1969         m_rawSurfaceToEnc,
1970         CodechalDbgAttr::attrEncodeRawInputSurface,
1971         "SrcSurf"));
1972 
1973     if (m_lastRefPic)
1974     {
1975     //    CodecHal_DbgMapSurfaceFormatToDumpFormat(m_lastRefPic->Format, &dumpFormat);
1976         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1977             m_lastRefPic,
1978             CodechalDbgAttr::attrReferenceSurfaces,
1979             "LastRefSurface"));
1980     }
1981 
1982     if (m_goldenRefPic)
1983     {
1984      //   CodecHal_DbgMapSurfaceFormatToDumpFormat(m_goldenRefPic->Format, &dumpFormat);
1985         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1986             m_goldenRefPic,
1987             CodechalDbgAttr::attrReferenceSurfaces,
1988             "GoldenRefSurface"));
1989     }
1990 
1991     if (m_altRefPic)
1992     {
1993     //    CodecHal_DbgMapSurfaceFormatToDumpFormat(m_altRefPic->Format, &dumpFormat);
1994         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1995             m_altRefPic,
1996             CodechalDbgAttr::attrReferenceSurfaces,
1997             "_AltRefSurface"));
1998     }
1999     );
2000 
2001     m_setRequestedEUSlices = ((m_frameHeight * m_frameWidth) >= m_ssdResolutionThreshold &&
2002         m_targetUsage <= m_ssdTargetUsageThreshold) ? true : false;
2003 
2004     m_hwInterface->m_numRequestedEuSlices = (m_setRequestedEUSlices) ?
2005         m_sliceShutdownRequestState : m_sliceShutdownDefaultState;
2006 
2007     // While this streamin isn't a kernel function, we 0 the surface here which is needed before HME kernel
2008     SetupSegmentationStreamIn();
2009     if (m_16xMeSupported)
2010     {
2011         //4x Downscaling
2012         CodechalEncodeCscDs::KernelParams cscScalingKernelParams;
2013         MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams));
2014         cscScalingKernelParams.bLastTaskInPhaseCSC =
2015             cscScalingKernelParams.bLastTaskInPhase4xDS = !(m_16xMeSupported || m_hmeEnabled);
2016         cscScalingKernelParams.bLastTaskInPhase16xDS = !(m_32xMeSupported || m_hmeEnabled);
2017         cscScalingKernelParams.bLastTaskInPhase32xDS = !m_hmeEnabled;
2018 
2019         m_firstTaskInPhase = true;
2020         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->KernelFunctions(&cscScalingKernelParams));
2021     }
2022 
2023     if (m_16xMeEnabled)
2024     {
2025         //Initialize the ME struct for HME kernel calls
2026         MeCurbeParams meParams;
2027         MOS_ZeroMemory(&meParams, sizeof(MeCurbeParams));
2028         SetMeCurbeParams(&meParams);
2029 
2030         MeSurfaceParams meSurfaceParams;
2031         MOS_ZeroMemory(&meSurfaceParams, sizeof(MeSurfaceParams));
2032         SetMeSurfaceParams(&meSurfaceParams);
2033 
2034         // P_HME kernel (16x HME)
2035         m_lastTaskInPhase = false;
2036         CODECHAL_ENCODE_CHK_STATUS_RETURN(ExecuteMeKernel(&meParams, &meSurfaceParams, HME_LEVEL_16x));
2037 
2038         //StreamIn kernel, 4xME
2039         m_lastTaskInPhase = true;
2040         meParams.segmapProvided = m_segmentMapProvided;
2041         CODECHAL_ENCODE_CHK_STATUS_RETURN(ExecuteMeKernel(&meParams, &meSurfaceParams, HME_LEVEL_4x));
2042     }
2043 
2044     if (!Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
2045     {
2046         MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
2047         syncParams.GpuContext = m_renderContext;
2048         syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
2049 
2050         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
2051         m_waitForEnc = true;
2052     }
2053 #endif
2054 
2055     return eStatus;
2056 }
2057 
StatusReportCleanup(EncodeStatusReport * encodeStatusReport,HCPPakHWTileSizeRecord_G12 * tileStatusReport,CODECHAL_ENCODE_BUFFER * tileSizeStreamoutBuffer,PMOS_INTERFACE osInterface,uint8_t * tempBsBuffer,uint8_t * bitstream)2058 static void StatusReportCleanup(
2059     EncodeStatusReport* encodeStatusReport,
2060     HCPPakHWTileSizeRecord_G12* tileStatusReport,
2061     CODECHAL_ENCODE_BUFFER* tileSizeStreamoutBuffer,
2062     PMOS_INTERFACE osInterface,
2063     uint8_t* tempBsBuffer,
2064     uint8_t* bitstream)
2065 {
2066 
2067     if (tempBsBuffer)
2068     {
2069         MOS_FreeMemory(tempBsBuffer);
2070     }
2071 
2072     if (bitstream)
2073     {
2074         osInterface->pfnUnlockResource(osInterface, &encodeStatusReport->pCurrRefList->resBitstreamBuffer);
2075     }
2076 
2077     if (tileStatusReport)
2078     {
2079         // clean-up the tile status report buffer
2080         if (encodeStatusReport->CodecStatus == CODECHAL_STATUS_SUCCESSFUL)
2081         {
2082             for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
2083             {
2084                 MOS_ZeroMemory(&tileStatusReport[i], sizeof(tileStatusReport[i]));
2085             }
2086         }
2087 
2088         osInterface->pfnUnlockResource(osInterface, &tileSizeStreamoutBuffer->sResource);
2089     }
2090 }
2091 
GetStatusReport(EncodeStatus * encodeStatus,EncodeStatusReport * encodeStatusReport)2092 MOS_STATUS CodechalVdencVp9StateG12::GetStatusReport(
2093     EncodeStatus*       encodeStatus,
2094     EncodeStatusReport* encodeStatusReport)
2095 {
2096     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2097 
2098     CODECHAL_ENCODE_FUNCTION_ENTER;
2099 
2100     CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus);
2101     CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport);
2102 
2103     if (encodeStatusReport->UsedVdBoxNumber == 1)
2104     {
2105         encodeStatusReport->bitstreamSize = encodeStatus->dwMFCBitstreamByteCountPerFrame + encodeStatus->dwHeaderBytesInserted;
2106         encodeStatusReport->NumberPasses = (uint8_t)encodeStatus->dwNumberPasses;
2107         ENCODE_VERBOSEMESSAGE("statusReportData->numberPasses: %d\n", encodeStatusReport->NumberPasses);
2108         encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
2109         return eStatus;
2110     }
2111 
2112     // Tile record always in m_tileRecordBuffer even in scala mode
2113     PCODECHAL_ENCODE_BUFFER presTileSizeStatusReport = &m_tileRecordBuffer[encodeStatusReport->CurrOriginalPic.FrameIdx];
2114 
2115     MOS_LOCK_PARAMS lockFlags;
2116     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2117     HCPPakHWTileSizeRecord_G12* tileStatusReport = (HCPPakHWTileSizeRecord_G12*)m_osInterface->pfnLockResource(
2118         m_osInterface,
2119         &presTileSizeStatusReport->sResource,
2120         &lockFlags);
2121     CODECHAL_ENCODE_CHK_NULL_RETURN(tileStatusReport);
2122 
2123     encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
2124     encodeStatusReport->PanicMode = false;
2125     encodeStatusReport->AverageQp = 0;
2126     encodeStatusReport->QpY = 0;
2127     encodeStatusReport->SuggestedQpYDelta = 0;
2128     encodeStatusReport->NumberPasses = 1;
2129     encodeStatusReport->bitstreamSize = 0;
2130     encodeStatus->ImageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQp = 0;
2131 
2132     double sum_qp = 0.0;
2133     uint32_t totalCU = 0;
2134     CODECHAL_ENCODE_CHK_COND_RETURN((encodeStatusReport->NumberTilesInFrame == 0), "ERROR - invalid number of tiles in frame");
2135     for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
2136     {
2137         if (tileStatusReport[i].Length == 0)
2138         {
2139             encodeStatusReport->CodecStatus = CODECHAL_STATUS_INCOMPLETE;
2140             StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, nullptr, nullptr);
2141             return eStatus;
2142         }
2143 
2144         encodeStatusReport->bitstreamSize += tileStatusReport[i].Length;
2145         totalCU += (m_tileParams[i].TileHeightInMinCbMinus1 + 1) * (m_tileParams[i].TileWidthInMinCbMinus1 + 1);
2146         sum_qp += tileStatusReport[i].Hcp_Qp_Status_Count;
2147     }
2148 
2149     encodeStatusReport->QpY = encodeStatusReport->AverageQp =
2150         (uint8_t)((sum_qp / (double)totalCU) / 4.0); // due to TU is 4x4 and there are 4 TUs in one CU
2151 
2152     if (m_enableTileStitchByHW)
2153     {
2154         StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, nullptr, nullptr);
2155         return eStatus;
2156     }
2157 
2158     uint8_t* bufPtr = (uint8_t*)MOS_AllocAndZeroMemory(encodeStatusReport->bitstreamSize);
2159     uint8_t* tempBsBuffer = bufPtr;
2160     CODECHAL_ENCODE_CHK_NULL_RETURN(tempBsBuffer);
2161 
2162     CODEC_REF_LIST currRefList = *(encodeStatus->encodeStatusReport.pCurrRefList);
2163     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2164     lockFlags.ReadOnly = 1;
2165     uint8_t* bitstream = (uint8_t*)m_osInterface->pfnLockResource(
2166         m_osInterface,
2167         &currRefList.resBitstreamBuffer,
2168         &lockFlags);
2169     if (bitstream == nullptr)
2170     {
2171         MOS_SafeFreeMemory(tempBsBuffer);
2172         CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr);
2173     }
2174 
2175     for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
2176     {
2177         uint32_t offset = m_tileParams[i].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
2178         uint32_t len = tileStatusReport[i].Length;
2179 
2180         if (offset + len >= m_bitstreamUpperBound)
2181         {
2182             eStatus = MOS_STATUS_INVALID_FILE_SIZE;
2183             CODECHAL_ENCODE_ASSERTMESSAGE("Error: Tile offset and length add up to more than bitstream upper bound");
2184             encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR;
2185             encodeStatusReport->bitstreamSize = 0;
2186             StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, tempBsBuffer, bitstream);
2187             return eStatus;
2188         }
2189 
2190         MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len);
2191         bufPtr += len;
2192     }
2193 
2194     MOS_SecureMemcpy(bitstream, encodeStatusReport->bitstreamSize, tempBsBuffer, encodeStatusReport->bitstreamSize);
2195     MOS_ZeroMemory(&bitstream[encodeStatusReport->bitstreamSize],
2196         m_bitstreamUpperBound - encodeStatusReport->bitstreamSize);
2197 
2198     StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, tempBsBuffer, bitstream);
2199 
2200     return eStatus;
2201 }
2202 
DecideEncodingPipeNumber()2203 MOS_STATUS CodechalVdencVp9StateG12::DecideEncodingPipeNumber()
2204 {
2205     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2206 
2207     CODECHAL_ENCODE_FUNCTION_ENTER;
2208 
2209     m_numPipe = m_numVdbox;
2210 
2211     uint8_t num_tile_columns = (1 << m_vp9PicParams->log2_tile_columns);
2212 
2213     if (num_tile_columns > m_numPipe)
2214     {
2215         m_numPipe = 1;
2216     }
2217 
2218     if (num_tile_columns < m_numPipe)
2219     {
2220         if (num_tile_columns >= 1 && num_tile_columns <= 4)
2221         {
2222             m_numPipe = num_tile_columns;
2223         }
2224         else
2225         {
2226             m_numPipe = 1; // invalid tile column test cases and switch back to the single VDBOX mode
2227         }
2228     }
2229 
2230     if (m_numPipe == 0 || m_numPipe > CODECHAL_ENCODE_VP9_MAX_NUM_HCP_PIPE)
2231     {
2232         m_numPipe = 1;
2233     }
2234 
2235     if (m_numPipe > 1)
2236     {
2237         m_scalableMode = true; // KMD VE is now enabled by default. Mediasolo can also use the VE interface.
2238     }
2239     else
2240     {
2241         m_scalableMode = false;
2242     }
2243 
2244     if (m_scalabilityState)
2245     {
2246         // Create/ re-use a GPU context with 2 pipes
2247         m_scalabilityState->ucScalablePipeNum = m_numPipe;
2248     }
2249 
2250     return eStatus;
2251 }
2252 
PlatformCapabilityCheck()2253 MOS_STATUS CodechalVdencVp9StateG12::PlatformCapabilityCheck()
2254 {
2255     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2256 
2257     CODECHAL_ENCODE_FUNCTION_ENTER;
2258 
2259     CODECHAL_ENCODE_CHK_STATUS_RETURN(DecideEncodingPipeNumber());
2260 
2261     if (MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
2262     {
2263         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ChkGpuCtxReCreation(this, m_scalabilityState,
2264             (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
2265     }
2266 
2267 
2268     if (m_numPipe > 1)
2269     {
2270         m_singleTaskPhaseSupported = m_singleTaskPhaseSupportedInPak = false;
2271     }
2272 
2273     //so far only validate Tiling for VDEnc VP9
2274     uint8_t col = (1 << (m_vp9PicParams->log2_tile_columns));
2275     uint8_t row = (1 << (m_vp9PicParams->log2_tile_rows));
2276 
2277     // Handling invalid tiling and scalability cases. When NumTilingColumn does not match NumPipe fall back to single pipe mode
2278     if (m_numPipe > 1 && (col != m_numPipe))
2279     {
2280         if ((col == 1) || (row == 1))
2281         {
2282             m_numPipe = 1; // number of tile columns cannot be greater than number of pipes (VDBOX), run in single pipe mode
2283             m_scalableMode = false;
2284         }
2285         else
2286         {
2287             CODECHAL_ENCODE_ASSERTMESSAGE("Number of tile columns cannot be greater than number of pipes (VDBOX) when number of rows > 1");
2288             return MOS_STATUS_INVALID_PARAMETER;
2289         }
2290     }
2291 
2292     //num columns must be either 2 or 4 for scalability mode, H/W limitation
2293     if ((m_numPipe > 1) && (m_numPipe != 2) && (m_numPipe != 4))
2294     {
2295         CODECHAL_ENCODE_ASSERTMESSAGE("Num pipes must be either 2 or 4 for scalability mode, H/W limitation");
2296         return MOS_STATUS_INVALID_PARAMETER;
2297     }
2298 
2299     // Tile width needs to be minimum size 256, error out if less
2300     if ((col != 1) && ((m_vp9PicParams->SrcFrameWidthMinus1 + 1) < col * CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH))
2301     {
2302         CODECHAL_ENCODE_ASSERTMESSAGE("Incorrect number of columns input parameter, Tile width is < 256");
2303         return MOS_STATUS_INVALID_PARAMETER;
2304     }
2305 
2306     if (row > 4)
2307     {
2308         CODECHAL_ENCODE_ASSERTMESSAGE("Max number of rows cannot exceeds 4 by VP9 Spec.");
2309         return MOS_STATUS_INVALID_PARAMETER;
2310     }
2311 
2312     // number of tiles for this frame
2313     m_numberTilesInFrame = col * row;
2314     m_numUsedVdbox = m_numPipe;
2315 
2316     if (!m_newSeq)
2317     {
2318         // If there is no new SEQ header, then the number of passes is decided here.
2319         // Otherwise, it is done in SetSequenceStructs. For example, BRC setting may be changed.
2320         m_numPasses = (m_numPassesInOnePipe + 1) * m_numPipe - 1;
2321     }
2322 
2323     //Last place where scalable mode is decided
2324     if(m_frameNum == 0)
2325     {
2326         m_lastFrameScalableMode = m_scalableMode;
2327     }
2328 
2329     return eStatus;
2330 }
2331 
SetGpuCtxCreatOption()2332 MOS_STATUS CodechalVdencVp9StateG12::SetGpuCtxCreatOption()
2333 {
2334     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2335 
2336     CODECHAL_ENCODE_FUNCTION_ENTER;
2337 
2338     if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
2339     {
2340         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SetGpuCtxCreatOption());
2341     }
2342     else
2343     {
2344         m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS_ENHANCED);
2345         CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt);
2346 
2347         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ConstructParmsForGpuCtxCreation(
2348             m_scalabilityState,
2349             (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
2350     }
2351 
2352     return eStatus;
2353 }
2354 
SetAndPopulateVEHintParams(PMOS_COMMAND_BUFFER cmdBuffer)2355 MOS_STATUS CodechalVdencVp9StateG12::SetAndPopulateVEHintParams(
2356     PMOS_COMMAND_BUFFER  cmdBuffer)
2357 {
2358     MOS_STATUS                      eStatus = MOS_STATUS_SUCCESS;
2359 
2360     CODECHAL_ENCODE_FUNCTION_ENTER;
2361 
2362     if (!MOS_VE_SUPPORTED(m_osInterface))
2363     {
2364         return eStatus;
2365     }
2366 
2367     CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS scalSetParms;
2368     MOS_ZeroMemory(&scalSetParms, sizeof(CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS));
2369 
2370     if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
2371     {
2372         scalSetParms.bNeedSyncWithPrevious = true;
2373     }
2374 
2375     int32_t currentPass = GetCurrentPass();
2376     uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
2377     // Scalable mode only
2378     if (m_scalableMode)
2379     {
2380         for (auto i = 0; i < m_numPipe; i++)
2381         {
2382             scalSetParms.veBatchBuffer[i] = m_veBatchBuffer[m_virtualEngineBBIndex][i][passIndex].OsResource;
2383         }
2384     }
2385 
2386     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_SetHintParams(this, m_scalabilityState, &scalSetParms));
2387 
2388     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2389     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_PopulateHintParams(m_scalabilityState, cmdBuffer));
2390 
2391     return eStatus;
2392 }
2393 
SetTileData()2394 MOS_STATUS CodechalVdencVp9StateG12::SetTileData()
2395 {
2396     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2397 
2398     CODECHAL_ENCODE_FUNCTION_ENTER;
2399 
2400     MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 *tileCodingParams = m_tileParams;
2401 
2402     tileCodingParams->Mode = CODECHAL_ENCODE_MODE_VP9;
2403 
2404     uint32_t numTileRows    = (1 << m_vp9PicParams->log2_tile_rows);
2405     uint32_t numTileColumns = (1 << m_vp9PicParams->log2_tile_columns);
2406     uint32_t numTiles = numTileRows * numTileColumns;
2407 
2408     uint32_t bitstreamSizePerTile = m_bitstreamUpperBound / (numTiles * CODECHAL_CACHELINE_SIZE);
2409     uint32_t numLcusInTiles = 0, numCuRecord = 64;
2410     uint32_t cuLevelStreamoutOffset = 0, sliceSizeStreamoutOffset = 0, bitstreamByteOffset = 0, sseRowstoreOffset = 0;
2411 
2412     for (uint32_t tileCntr = 0; tileCntr < numTiles; tileCntr++)
2413     {
2414         uint32_t tileX, tileY, tileStartSbX, tileStartSbY, tileWidthInSb, tileHeightInSb, lastTileColWidth, lastTileRowHeight, numLcuInTile;
2415         bool isLastTileCol, isLastTileRow;
2416 
2417         tileX = tileCntr % numTileColumns;
2418         tileY = tileCntr / numTileColumns;
2419 
2420         isLastTileCol = ((numTileColumns - 1) == tileX);
2421         isLastTileRow = ((numTileRows - 1) == tileY);
2422 
2423         tileStartSbX = (tileX * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns;
2424         tileStartSbY = (tileY * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows;
2425 
2426         tileWidthInSb  = (isLastTileCol ? m_picWidthInSb : (((tileX + 1) * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns)) - tileStartSbX;
2427         tileHeightInSb = (isLastTileRow ? m_picHeightInSb : (((tileY + 1) * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows)) - tileStartSbY;
2428 
2429         lastTileColWidth  = (MOS_ALIGN_CEIL((m_vp9PicParams->SrcFrameWidthMinus1 + 1 - tileStartSbX * CODEC_VP9_SUPER_BLOCK_WIDTH), CODEC_VP9_MIN_BLOCK_WIDTH) / CODEC_VP9_MIN_BLOCK_WIDTH) - 1;
2430         lastTileRowHeight = (MOS_ALIGN_CEIL((m_vp9PicParams->SrcFrameHeightMinus1 + 1 - tileStartSbY * CODEC_VP9_SUPER_BLOCK_HEIGHT), CODEC_VP9_MIN_BLOCK_HEIGHT) / CODEC_VP9_MIN_BLOCK_HEIGHT) - 1;
2431 
2432         numLcuInTile = tileWidthInSb * tileHeightInSb;
2433         tileCodingParams[tileCntr].NumberOfActiveBePipes = m_numPipe;
2434         tileCodingParams[tileCntr].NumOfTilesInFrame = numTiles;
2435         tileCodingParams[tileCntr].NumOfTileColumnsInFrame = numTileColumns;
2436         tileCodingParams[tileCntr].TileStartLCUX = tileStartSbX;
2437         tileCodingParams[tileCntr].TileStartLCUY = tileStartSbY;
2438         tileCodingParams[tileCntr].IsLastTileofColumn = isLastTileRow;
2439         tileCodingParams[tileCntr].IsLastTileofRow = isLastTileCol;
2440 
2441         tileCodingParams[tileCntr].TileWidthInMinCbMinus1 = isLastTileCol ? lastTileColWidth : (tileWidthInSb * CODEC_VP9_MIN_BLOCK_WIDTH) - 1;
2442         tileCodingParams[tileCntr].TileHeightInMinCbMinus1 = isLastTileRow ? lastTileRowHeight : (tileHeightInSb * CODEC_VP9_MIN_BLOCK_HEIGHT) - 1;
2443 
2444         if (m_scalableMode)
2445         {
2446             sseRowstoreOffset = (tileStartSbX + (3 * tileX)) << 5;
2447 
2448             tileCodingParams[tileCntr].CuRecordOffset = MOS_ALIGN_CEIL(((numCuRecord * numLcusInTiles) * 64),
2449                 CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
2450             tileCodingParams[tileCntr].presHcpSyncBuffer = &m_hcpScalabilitySyncBuffer.sResource;
2451             tileCodingParams[tileCntr].SliceSizeStreamoutOffset = sliceSizeStreamoutOffset;
2452             tileCodingParams[tileCntr].SseRowstoreOffset = sseRowstoreOffset;
2453             tileCodingParams[tileCntr].BitstreamByteOffset = bitstreamByteOffset;
2454             tileCodingParams[tileCntr].CuLevelStreamoutOffset = cuLevelStreamoutOffset;
2455 
2456             cuLevelStreamoutOffset += (tileCodingParams[tileCntr].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[tileCntr].TileHeightInMinCbMinus1 + 1);
2457             sliceSizeStreamoutOffset += (tileCodingParams[tileCntr].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[tileCntr].TileHeightInMinCbMinus1 + 1);
2458             sseRowstoreOffset += (numLcuInTile * m_sizeOfSseSrcPixelRowStoreBufferPerLcu) / CODECHAL_CACHELINE_SIZE;
2459             bitstreamByteOffset += bitstreamSizePerTile;
2460             numLcusInTiles += numLcuInTile;
2461 
2462             tileCodingParams[tileCntr].TileSizeStreamoutOffset = (tileCntr*m_hcpInterface->GetPakHWTileSizeRecordSize() + CODECHAL_CACHELINE_SIZE - 1) / CODECHAL_CACHELINE_SIZE;
2463 
2464             //DW5
2465             const uint32_t frameStatsStreamoutSize = m_brcPakStatsBufSize;
2466             tileCodingParams[tileCntr].PakTileStatisticsOffset = (tileCntr*frameStatsStreamoutSize + CODECHAL_CACHELINE_SIZE - 1) / CODECHAL_CACHELINE_SIZE;
2467 
2468             //DW12
2469             tileCodingParams[tileCntr].Vp9ProbabilityCounterStreamoutOffset = ((tileCntr * m_probabilityCounterBufferSize) + (CODECHAL_CACHELINE_SIZE - 1)) / CODECHAL_CACHELINE_SIZE;
2470         }
2471         else
2472         {
2473             tileCodingParams[tileCntr].CuRecordOffset = 0;
2474             tileCodingParams[tileCntr].presHcpSyncBuffer = nullptr;
2475             tileCodingParams[tileCntr].SliceSizeStreamoutOffset = 0;
2476             tileCodingParams[tileCntr].SseRowstoreOffset = 0;
2477             tileCodingParams[tileCntr].BitstreamByteOffset = 0;
2478             tileCodingParams[tileCntr].CuLevelStreamoutOffset = 0;
2479             tileCodingParams[tileCntr].TileSizeStreamoutOffset = 0;
2480 
2481             //DW5
2482             tileCodingParams[tileCntr].PakTileStatisticsOffset = 0;
2483 
2484             //DW12
2485             tileCodingParams[tileCntr].Vp9ProbabilityCounterStreamoutOffset = 0;
2486         }
2487     }
2488 
2489     return eStatus;
2490 }
2491 
SetTileCommands(PMOS_COMMAND_BUFFER cmdBuffer)2492 MOS_STATUS CodechalVdencVp9StateG12::SetTileCommands(
2493     PMOS_COMMAND_BUFFER cmdBuffer)
2494 {
2495     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2496 
2497     CODECHAL_ENCODE_FUNCTION_ENTER;
2498 
2499     MHW_VDBOX_VDENC_WALKER_STATE_PARAMS_G12 vdencWalkerStateParams;
2500     vdencWalkerStateParams.Mode = CODECHAL_ENCODE_MODE_VP9;
2501     vdencWalkerStateParams.pVp9EncPicParams = m_vp9PicParams;
2502     vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_SINGLE_PIPE;
2503 
2504     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2505     MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2506     // MFXPipeDone should not be set for tail insertion
2507     vdPipelineFlushParams.Flags.bWaitDoneMFX =
2508         (m_lastPicInStream || m_lastPicInSeq) ? 0 : 1;
2509     vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1;
2510     vdPipelineFlushParams.Flags.bFlushVDENC = 1;
2511     vdPipelineFlushParams.Flags.bFlushHEVC = 1;
2512     vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2513 
2514     if (IsFirstPipe() && IsFirstPass())
2515     {
2516         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData());
2517     }
2518 
2519     MHW_VDBOX_VDENC_WEIGHT_OFFSET_PARAMS vdencWeightOffsetParams;
2520     uint32_t                             numTileColumns = (1 << m_vp9PicParams->log2_tile_columns);
2521     uint32_t                             numTileRows    = (1 << m_vp9PicParams->log2_tile_rows);
2522     int currentPipe = GetCurrentPipe();
2523     for (uint32_t tileRow = 0, tileIdx = 0; tileRow < numTileRows; tileRow++)
2524     {
2525         for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++, tileIdx++)
2526         {
2527             if (m_numPipe > 1)
2528             {
2529                 if (tileCol != currentPipe)
2530                 {
2531                     continue;
2532                 }
2533             }
2534 
2535             if (m_scalableMode)
2536             {
2537                 MHW_MI_VD_CONTROL_STATE_PARAMS vdCtrlParam;
2538                 //in scalability mode
2539                 MOS_ZeroMemory(&vdCtrlParam, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
2540                 vdCtrlParam.scalableModePipeLock = true;
2541                 MhwMiInterfaceG12 *miInterfaceG12 = static_cast <MhwMiInterfaceG12 *>(m_miInterface);
2542                 CODECHAL_ENCODE_CHK_STATUS_RETURN((miInterfaceG12)->AddMiVdControlStateCmd(cmdBuffer, &vdCtrlParam));
2543             }
2544 
2545             // HCP_TILE_CODING commmand
2546             CODECHAL_ENCODE_CHK_STATUS_RETURN(static_cast<MhwVdboxHcpInterfaceG12 *>(m_hcpInterface)->AddHcpTileCodingCmd(cmdBuffer, &m_tileParams[tileIdx]));
2547 
2548             MOS_ZeroMemory(&vdencWeightOffsetParams, sizeof(vdencWeightOffsetParams));
2549             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWeightsOffsetsStateCmd(cmdBuffer, nullptr, &vdencWeightOffsetParams));
2550 
2551             vdencWalkerStateParams.pTileCodingParams = &m_tileParams[tileIdx];
2552             vdencWalkerStateParams.dwTileId = tileIdx;
2553             switch (m_numPipe)
2554             {
2555             case 0:
2556             case 1:
2557                 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_SINGLE_PIPE;
2558                 break;
2559             case 2:
2560                 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_TWO_PIPE;
2561                 break;
2562             case 4:
2563                 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_FOUR_PIPE;
2564                 break;
2565             default:
2566                 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_INVALID;
2567                 CODECHAL_ENCODE_ASSERTMESSAGE("Num Pipes invalid");
2568                 return eStatus;
2569                 break;
2570             }
2571             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWalkerStateCmd(cmdBuffer, &vdencWalkerStateParams));
2572 
2573             if (m_scalableMode)
2574             {
2575                 MHW_MI_VD_CONTROL_STATE_PARAMS vdCtrlParam;
2576                 MOS_ZeroMemory(&vdCtrlParam, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
2577                 vdCtrlParam.scalableModePipeUnlock = true;
2578                 MhwMiInterfaceG12 *miInterfaceG12 = static_cast <MhwMiInterfaceG12 *>(m_miInterface);
2579                 CODECHAL_ENCODE_CHK_STATUS_RETURN((miInterfaceG12)->AddMiVdControlStateCmd(cmdBuffer, &vdCtrlParam));
2580             }
2581 
2582             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(cmdBuffer, &vdPipelineFlushParams));
2583             // Send MI_FLUSH command
2584             MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2585             MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2586             flushDwParams.bVideoPipelineCacheInvalidate = true;
2587             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
2588         }
2589     }
2590 
2591     return eStatus;
2592 }
2593 
StoreNumPasses(EncodeStatusBuffer * encodeStatusBuf,MhwMiInterface * miInterface,PMOS_COMMAND_BUFFER cmdBuffer,uint32_t currPass)2594 MOS_STATUS CodechalVdencVp9StateG12::StoreNumPasses(
2595     EncodeStatusBuffer *encodeStatusBuf,
2596     MhwMiInterface     *miInterface,
2597     PMOS_COMMAND_BUFFER cmdBuffer,
2598     uint32_t            currPass)
2599 {
2600     MHW_MI_STORE_DATA_PARAMS storeDataParams;
2601     uint32_t                 offset;
2602     MOS_STATUS               eStatus = MOS_STATUS_SUCCESS;
2603 
2604     CODECHAL_ENCODE_FUNCTION_ENTER;
2605 
2606     CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusBuf);
2607     CODECHAL_ENCODE_CHK_NULL_RETURN(miInterface);
2608     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2609 
2610     offset =
2611         (encodeStatusBuf->wCurrIndex * encodeStatusBuf->dwReportSize) +
2612         encodeStatusBuf->dwNumPassesOffset +  // Num passes offset
2613         sizeof(uint32_t) * 2;                 // pEncodeStatus is offset by 2 DWs in the resource
2614 
2615     storeDataParams.pOsResource      = &encodeStatusBuf->resStatusBuffer;
2616     storeDataParams.dwResourceOffset = offset;
2617     storeDataParams.dwValue          = currPass + 1;
2618     CODECHAL_ENCODE_CHK_STATUS_RETURN(miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
2619 
2620     return MOS_STATUS_SUCCESS;
2621 }
ExecuteTileLevel()2622 MOS_STATUS CodechalVdencVp9StateG12::ExecuteTileLevel()
2623 {
2624     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2625 
2626     CODECHAL_ENCODE_FUNCTION_ENTER;
2627 
2628     int currentPipe = GetCurrentPipe();
2629     int currentPass = GetCurrentPass();
2630 
2631     if (currentPipe < 0 || currentPass < 0)
2632     {
2633         return MOS_STATUS_INVALID_PARAMETER;
2634     }
2635 
2636     MOS_COMMAND_BUFFER cmdBuffer;
2637     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2638 
2639     if (IsFirstPipe())
2640     {
2641         MHW_BATCH_BUFFER secondLevelBatchBuffer;
2642         MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(secondLevelBatchBuffer));
2643         secondLevelBatchBuffer.dwOffset = 0;
2644         secondLevelBatchBuffer.bSecondLevel = true;
2645 
2646         if (!m_hucEnabled)
2647         {
2648             secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx];
2649         }
2650         else
2651         {
2652             secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderWriteBuffer;
2653         }
2654         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
2655             &cmdBuffer,
2656             &secondLevelBatchBuffer));
2657     }
2658 
2659     // Setup Tile level PAK commands
2660     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileCommands(&cmdBuffer));
2661 
2662     MHW_MI_VD_CONTROL_STATE_PARAMS vdCtrlParam;
2663     MOS_ZeroMemory(&vdCtrlParam, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
2664     vdCtrlParam.memoryImplicitFlush = true;
2665     MhwMiInterfaceG12 *miInterfaceG12 = static_cast <MhwMiInterfaceG12 *>(m_miInterface);
2666     CODECHAL_ENCODE_CHK_STATUS_RETURN((miInterfaceG12)->AddMiVdControlStateCmd(&cmdBuffer, &vdCtrlParam));
2667 
2668     // Send VD_PIPELINE_FLUSH command
2669     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS              vdPipelineFlushParams;
2670     MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2671     // MFXPipeDone should not be set for tail insertion
2672     vdPipelineFlushParams.Flags.bWaitDoneMFX = 1;
2673     vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
2674     vdPipelineFlushParams.Flags.bFlushHEVC = 1;
2675     vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2676     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
2677 
2678     // Send MI_FLUSH command
2679     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2680     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2681     flushDwParams.bVideoPipelineCacheInvalidate = true;
2682     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2683 
2684     if (IsFirstPipe())
2685     {
2686         if (m_numPipe > 1 && m_enableTileStitchByHW)
2687         {
2688             for (auto i = 1; i < m_numPipe; i++)
2689             {
2690                 if (!Mos_ResourceIsNull(&m_stitchWaitSemaphoreMem[i].sResource) && m_hucEnabled)
2691                 {
2692                     // This semaphore waits for all pipes except pipe 1 vdenc+pak to finish processing before stitching bitstream
2693                     SendHWWaitCommand(&m_stitchWaitSemaphoreMem[i].sResource, &cmdBuffer, (currentPass + 1));
2694                     SetSemaphoreMem(&m_stitchWaitSemaphoreMem[i].sResource, &cmdBuffer, 0); // Reset above semaphore
2695                 }
2696             }
2697         }
2698         // PAK integration kernel to integrate stats for next HUC pass
2699         if (m_scalableMode && m_hucEnabled && m_isTilingSupported && IsFirstPipe())
2700         {
2701             CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCVp9PakInt(&cmdBuffer));
2702             // Signal pak int done semaphore here for next pass to proceed
2703             if (!IsLastPass())
2704             {
2705                 SetSemaphoreMem(&m_pakIntDoneSemaphoreMem.sResource, &cmdBuffer, (currentPass + 1));
2706             }
2707 
2708             if (m_enableTileStitchByHW)
2709             {
2710                 // 2nd level BB buffer for stitching cmd
2711                 // current location to add cmds in 2nd level batch buffer
2712                 m_HucStitchCmdBatchBuffer.iCurrent = 0;
2713                 // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
2714                 m_HucStitchCmdBatchBuffer.dwOffset = 0;
2715                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_HucStitchCmdBatchBuffer));
2716                 // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases
2717                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, m_osInterface->osCpInterface->IsCpEnabled() ? true : false));
2718             }
2719         }
2720 
2721         CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2722 
2723         if (!m_scalableMode) // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer
2724         {
2725             CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
2726         }
2727     }
2728     else // 2nd Pipe
2729     {
2730         // Signal stitch command to proceed because vdenc+pak is done in this pipe and we can stitch bs
2731         if (m_hucEnabled && m_isTilingSupported && !Mos_ResourceIsNull(&m_stitchWaitSemaphoreMem[currentPipe].sResource))
2732         {
2733             SetSemaphoreMem(&m_stitchWaitSemaphoreMem[currentPipe].sResource, &cmdBuffer, (currentPass + 1));
2734         }
2735     }
2736 
2737     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2738     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2739     if (m_singleTaskPhaseSupported && m_hucEnabled && IsLastPass())
2740     {
2741         m_lastTaskInPhase = true; //HPU singletask phase mode only
2742     }
2743     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreNumPasses(
2744         &(m_encodeStatusBuf),
2745         m_miInterface,
2746         &cmdBuffer,
2747         m_currPass));
2748 
2749     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase || m_scalableMode)
2750     {
2751         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
2752     }
2753 
2754     std::string currPassName = "PAK_PASS" + std::to_string((int)m_currPass);
2755     if ((m_dysRefFrameFlags != DYS_REF_NONE) && m_dysVdencMultiPassEnabled)
2756     {
2757         // Added extra symbol into log to avoid log's file overwrite on the next pass
2758         // For DYS Mutlipass mode next pass should run with "m_currPass = 0" again
2759         // See ExecutePictureLevel() function for all details
2760         currPassName.append("_0");
2761     }
2762     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
2763         &cmdBuffer,
2764         CODECHAL_NUM_MEDIA_STATES,
2765         currPassName.data())));
2766 
2767     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2768 
2769     if (IsLastPipe())
2770     {
2771         if (m_hucEnabled)
2772         {
2773             // We save the index of the 2nd level batch buffer in case there is a pass that needs the last SLBB
2774             m_lastVdencPictureState2ndLevelBBIndex = m_vdencPictureState2ndLevelBBIndex;
2775         }
2776         m_vdencPictureState2ndLevelBBIndex = (m_vdencPictureState2ndLevelBBIndex + 1) % CODECHAL_VP9_ENCODE_RECYCLED_BUFFER_NUM;
2777     }
2778 
2779     if (IsFirstPipe() &&
2780         m_waitForEnc &&
2781         IsFirstPass() &&
2782         !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
2783     {
2784         MOS_SYNC_PARAMS syncParams;
2785         syncParams = g_cInitSyncParams;
2786         syncParams.GpuContext = m_videoContext;
2787         syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
2788 
2789         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
2790         m_waitForEnc = false;
2791     }
2792 
2793     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2794     {
2795         bool renderFlags = m_videoContextUsesNullHw;
2796 
2797         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderFlags));
2798         m_lastTaskInPhase = false;
2799 
2800         CODECHAL_DEBUG_TOOL(
2801             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
2802                 m_resVdencPakObjCmdStreamOutBuffer,
2803                 CodechalDbgAttr::attrPakObjStreamout,
2804                 currPassName.data(),
2805                 m_mbCodeSize + 8 * CODECHAL_CACHELINE_SIZE,
2806                 0,
2807                 CODECHAL_NUM_MEDIA_STATES));
2808 
2809             if (m_vp9PicParams->PicFlags.fields.segmentation_enabled) {
2810                 //CodecHal_DbgDumpEncodeVp9SegmentStreamout(m_debugInterface, m_encoder);
2811                 //m_debugInterface->DumpBuffer(
2812                 //    (PCODECHAL_ENCODE_VP9_STATE)pvStandardState.resVdencSegmentMapStreamOut,
2813                 //    CodechalDbgAttr::attrOutput,
2814                 //    "SegMap_Out",
2815                 //    CODECHAL_CACHELINE_SIZE * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameHeight, 64) * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameWidth, 64),
2816                 //    0,
2817                 //    CODECHAL_MEDIA_STATE_VP9_PAK_LUMA_RECON);
2818             }
2819 
2820             if (m_mmcState && !m_mmcUserFeatureUpdated) {
2821                 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
2822                 m_mmcUserFeatureUpdated = true;
2823             });
2824     }
2825 
2826     // Reset parameters for next PAK execution
2827     if (IsLastPipe() && IsLastPass())
2828     {
2829         if (m_vp9PicParams->PicFlags.fields.super_frame && m_tsEnabled)
2830         {
2831             CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructSuperFrame());
2832         }
2833 
2834         if ((currentPipe == 0) &&
2835             m_signalEnc &&
2836             !Mos_ResourceIsNull(&m_resSyncObjectVideoContextInUse))
2837         {
2838             // signal semaphore
2839             MOS_SYNC_PARAMS syncParams;
2840             syncParams = g_cInitSyncParams;
2841             syncParams.GpuContext = m_videoContext;
2842             syncParams.presSyncResource = &m_resSyncObjectVideoContextInUse;
2843 
2844             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
2845             m_semaphoreObjCount++;
2846         }
2847 
2848         m_prevFrameInfo.KeyFrame    = !m_vp9PicParams->PicFlags.fields.frame_type;
2849         m_prevFrameInfo.IntraOnly   = (m_vp9PicParams->PicFlags.fields.frame_type == CODEC_VP9_KEY_FRAME) || m_vp9PicParams->PicFlags.fields.intra_only;
2850         m_prevFrameInfo.ShowFrame   = m_vp9PicParams->PicFlags.fields.show_frame;
2851         m_prevFrameInfo.FrameWidth  = m_oriFrameWidth;
2852         m_prevFrameInfo.FrameHeight = m_oriFrameHeight;
2853         m_currMvTemporalBufferIndex ^= 0x01;
2854         m_contextFrameTypes[m_vp9PicParams->PicFlags.fields.frame_context_idx] = m_vp9PicParams->PicFlags.fields.frame_type;
2855         m_prevFrameSegEnabled                                                  = m_vp9PicParams->PicFlags.fields.segmentation_enabled;
2856 
2857         // Reset parameters for next PAK execution
2858         if ((!m_singleTaskPhaseSupported) && (IsLastPass()))
2859         {
2860             m_osInterface->pfnResetPerfBufferID(m_osInterface);
2861         }
2862 
2863         m_newPpsHeader = 0;
2864         m_newSeqHeader = 0;
2865         m_frameNum++;
2866         //Save the last frame's scalable mode flag to prevent switching buffers when doing next pass
2867         m_lastFrameScalableMode = m_scalableMode;
2868     }
2869 
2870     return eStatus;
2871 }
2872 
ExecuteSliceLevel()2873 MOS_STATUS CodechalVdencVp9StateG12::ExecuteSliceLevel()
2874 {
2875     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2876 
2877     CODECHAL_ENCODE_FUNCTION_ENTER;
2878 
2879     return ExecuteTileLevel();
2880 }
2881 
SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS & pipeModeSelectParams)2882 void CodechalVdencVp9StateG12::SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS& pipeModeSelectParams)
2883 {
2884     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2885 
2886     CODECHAL_ENCODE_FUNCTION_ENTER;
2887 
2888     CodechalVdencVp9State::SetHcpPipeModeSelectParams(pipeModeSelectParams);
2889 
2890     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12& pipeModeSelectParamsG12 = static_cast<MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12&>(pipeModeSelectParams);
2891 
2892     pipeModeSelectParamsG12.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
2893     pipeModeSelectParamsG12.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY;
2894     pipeModeSelectParamsG12.bDynamicScalingEnabled = (m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled;
2895     if (m_scalableMode)
2896     {
2897         // Running in the multiple VDBOX mode
2898         if (IsFirstPipe())
2899         {
2900             pipeModeSelectParamsG12.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT;
2901         }
2902         else
2903         {
2904             if (IsLastPipe())
2905             {
2906                 pipeModeSelectParamsG12.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT;
2907             }
2908             else
2909             {
2910                 pipeModeSelectParamsG12.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE;
2911             }
2912         }
2913 
2914         pipeModeSelectParamsG12.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_CODEC_BE;
2915     }
2916 
2917     return;
2918 }
2919 
SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS & indObjBaseAddrParams)2920 void CodechalVdencVp9StateG12::SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS& indObjBaseAddrParams)
2921 {
2922     CodechalVdencVp9State::SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
2923 
2924     PCODECHAL_ENCODE_BUFFER tileRecordBuffer = &m_tileRecordBuffer[m_virtualEngineBBIndex];
2925     bool useTileRecordBuffer = !Mos_ResourceIsNull(&tileRecordBuffer->sResource);
2926 
2927     if (m_scalableMode && m_hucEnabled && m_isTilingSupported)
2928     {
2929         // overwrite presProbabilityCounterBuffer and it's params for scalable mode
2930         indObjBaseAddrParams.presProbabilityCounterBuffer = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource;
2931         indObjBaseAddrParams.dwProbabilityCounterOffset = m_tileStatsOffset.counterBuffer;
2932         indObjBaseAddrParams.dwProbabilityCounterSize = m_statsSize.counterBuffer;
2933     }
2934 
2935     // Need to use presPakTileSizeStasBuffer instead of presTileRecordBuffer, so setting to null
2936     indObjBaseAddrParams.presTileRecordBuffer        = nullptr;
2937     indObjBaseAddrParams.dwTileRecordSize            = 0;
2938     indObjBaseAddrParams.presPakTileSizeStasBuffer   = useTileRecordBuffer? &tileRecordBuffer->sResource : nullptr;
2939     indObjBaseAddrParams.dwPakTileSizeStasBufferSize = useTileRecordBuffer? ((m_statsSize.tileSizeRecord) * GetNumTilesInFrame()) : 0;
2940     indObjBaseAddrParams.dwPakTileSizeRecordOffset   = useTileRecordBuffer? m_tileStatsOffset.tileSizeRecord: 0;
2941 }
2942 
VerifyCommandBufferSize()2943 MOS_STATUS CodechalVdencVp9StateG12::VerifyCommandBufferSize()
2944 {
2945     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2946 
2947     CODECHAL_ENCODE_FUNCTION_ENTER;
2948 
2949     if (UseLegacyCommandBuffer())   // legacy mode & resize CommandBuffer Size for every BRC pass
2950     {
2951         if (!m_singleTaskPhaseSupported)
2952         {
2953             CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
2954         }
2955     }
2956     else    // virtual engine
2957     {
2958         uint32_t requestedSize =
2959             m_pictureStatesSize +
2960             m_picturePatchListSize +
2961             m_extraPictureStatesSize +
2962             (m_sliceStatesSize * m_numSlices);
2963         requestedSize += requestedSize*m_numPassesInOnePipe;
2964         if (m_hucEnabled && m_brcEnabled)
2965         {
2966             requestedSize += m_brcMaxNumPasses*(m_defaultHucCmdsSize + m_defaultHucPatchListSize);
2967         }
2968         // Running in the multiple VDBOX mode
2969         int currentPipe = GetCurrentPipe();
2970         int currentPass = GetCurrentPass();
2971         if (currentPipe < 0 || currentPipe >= m_numPipe)
2972         {
2973             return MOS_STATUS_INVALID_PARAMETER;
2974         }
2975         if (currentPass < 0 || currentPass >= m_brcMaxNumPasses)
2976         {
2977             return MOS_STATUS_INVALID_PARAMETER;
2978         }
2979 
2980         if (IsFirstPipe() && m_osInterface->bUsesPatchList)
2981         {
2982             CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
2983         }
2984         uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
2985         PMOS_COMMAND_BUFFER cmdBuffer;
2986         if (m_osInterface->phasedSubmission)
2987         {
2988             m_osInterface->pfnVerifyCommandBufferSize(m_osInterface, requestedSize, 0);
2989             return eStatus;
2990         }
2991         else
2992         {
2993             cmdBuffer = &m_veBatchBuffer[m_virtualEngineBBIndex][(uint32_t)currentPipe][passIndex];
2994         }
2995 
2996         if (Mos_ResourceIsNull(&cmdBuffer->OsResource) ||
2997             m_sizeOfVEBatchBuffer < requestedSize)
2998         {
2999             MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3000 
3001             MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3002             allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3003             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3004             allocParamsForBufferLinear.Format = Format_Buffer;
3005             allocParamsForBufferLinear.dwBytes = requestedSize;
3006             allocParamsForBufferLinear.pBufName = "Batch buffer for each VDBOX";
3007 
3008             if (!Mos_ResourceIsNull(&cmdBuffer->OsResource))
3009             {
3010                 if (cmdBuffer->pCmdBase)
3011                 {
3012                     m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
3013                 }
3014                 m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
3015             }
3016 
3017             eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
3018                 m_osInterface,
3019                 &allocParamsForBufferLinear,
3020                 &cmdBuffer->OsResource);
3021             CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
3022 
3023             m_sizeOfVEBatchBuffer = requestedSize;
3024         }
3025 
3026         if (cmdBuffer->pCmdBase == 0)
3027         {
3028             MOS_LOCK_PARAMS lockParams;
3029             MOS_ZeroMemory(&lockParams, sizeof(lockParams));
3030             lockParams.WriteOnly = true;
3031             cmdBuffer->pCmdPtr = cmdBuffer->pCmdBase = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &cmdBuffer->OsResource, &lockParams);
3032             cmdBuffer->iRemaining = m_sizeOfVEBatchBuffer;
3033             cmdBuffer->iOffset = 0;
3034 
3035             if (cmdBuffer->pCmdBase == nullptr)
3036             {
3037                 return MOS_STATUS_NULL_POINTER;
3038             }
3039         }
3040     }
3041 
3042     return eStatus;
3043 }
3044 
GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)3045 MOS_STATUS CodechalVdencVp9StateG12::GetCommandBuffer(
3046     PMOS_COMMAND_BUFFER cmdBuffer)
3047 {
3048     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3049 
3050     CODECHAL_ENCODE_FUNCTION_ENTER;
3051 
3052     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
3053 
3054     if (UseLegacyCommandBuffer())        // legacy mode
3055     {
3056         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, 0));
3057     }
3058     else    // virtual engine
3059     {
3060         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &m_realCmdBuffer, 0));
3061         if (m_osInterface->phasedSubmission)
3062         {
3063             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, GetCurrentPipe() + 1));
3064 
3065             CodecHalEncodeScalability_EncodePhaseToSubmissionType(IsFirstPipe(), cmdBuffer);
3066             if (IsLastPipe())
3067             {
3068                 cmdBuffer->iSubmissionType |= SUBMISSION_TYPE_MULTI_PIPE_FLAGS_LAST_PIPE;
3069             }
3070         }
3071         else
3072         {
3073             int currentPipe = GetCurrentPipe();
3074             int currentPass = GetCurrentPass();
3075             if (currentPipe < 0 || currentPipe >= m_numPipe)
3076             {
3077                 return MOS_STATUS_INVALID_PARAMETER;
3078             }
3079             uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
3080             *cmdBuffer = m_veBatchBuffer[m_virtualEngineBBIndex][currentPipe][passIndex];
3081         }
3082     }
3083 
3084     return eStatus;
3085 }
3086 
ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)3087 MOS_STATUS CodechalVdencVp9StateG12::ReturnCommandBuffer(
3088     PMOS_COMMAND_BUFFER cmdBuffer)
3089 {
3090     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3091 
3092     CODECHAL_ENCODE_FUNCTION_ENTER;
3093 
3094     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
3095 
3096     if (UseLegacyCommandBuffer())        // legacy mode
3097     {
3098         m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, 0);
3099     }
3100     else    // virtual engine
3101     {
3102         if (m_osInterface->phasedSubmission)
3103         {
3104             m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, GetCurrentPipe() + 1);
3105             m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
3106         }
3107         else
3108         {
3109 
3110             int currentPipe = GetCurrentPipe();
3111             int currentPass = GetCurrentPass();
3112             if (currentPipe < 0 || currentPipe >= m_numPipe)
3113             {
3114                 return MOS_STATUS_INVALID_PARAMETER;
3115             }
3116 
3117             if (eStatus == MOS_STATUS_SUCCESS)
3118             {
3119                 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
3120                 m_veBatchBuffer[m_virtualEngineBBIndex][currentPipe][passIndex] = *cmdBuffer;
3121                 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
3122             }
3123         }
3124     }
3125 
3126     return eStatus;
3127 }
3128 
SubmitCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer,bool bNullRendering)3129 MOS_STATUS CodechalVdencVp9StateG12::SubmitCommandBuffer(
3130     PMOS_COMMAND_BUFFER cmdBuffer,
3131     bool bNullRendering)
3132 {
3133     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3134 
3135     CODECHAL_ENCODE_FUNCTION_ENTER;
3136 
3137     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
3138 
3139     if (UseLegacyCommandBuffer())        // legacy mode
3140     {
3141         if (!IsRenderContext()) // Set VE Hints for video contexts only
3142         {
3143             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(cmdBuffer));
3144         }
3145         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, bNullRendering));
3146     }
3147     else // virtual engine
3148     {
3149         if (!IsLastPipe())
3150         {
3151             return eStatus;
3152         }
3153 
3154         if (m_osInterface->phasedSubmission)
3155         {
3156             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
3157         }
3158         else
3159         {
3160             int currentPass = GetCurrentPass();
3161             for (auto i = 0; i < m_numPipe; i++)
3162             {
3163                 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
3164                 PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[m_virtualEngineBBIndex][i][passIndex];
3165 
3166                 if (cmdBuffer->pCmdBase)
3167                 {
3168                     m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
3169                 }
3170 
3171                 cmdBuffer->pCmdBase = 0;
3172                 cmdBuffer->iOffset = cmdBuffer->iRemaining = 0;
3173             }
3174 
3175             if (eStatus == MOS_STATUS_SUCCESS)
3176             {
3177                 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(&m_realCmdBuffer));
3178                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
3179             }
3180         }
3181     }
3182 
3183     return eStatus;
3184 }
3185 
SendPrologWithFrameTracking(PMOS_COMMAND_BUFFER cmdBuffer,bool frameTrackingRequested,MHW_MI_MMIOREGISTERS * mmioRegister)3186 MOS_STATUS CodechalVdencVp9StateG12::SendPrologWithFrameTracking(
3187     PMOS_COMMAND_BUFFER cmdBuffer,
3188     bool frameTrackingRequested,
3189     MHW_MI_MMIOREGISTERS *mmioRegister)
3190 {
3191     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3192 
3193     CODECHAL_ENCODE_FUNCTION_ENTER;
3194 
3195     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
3196 
3197     MOS_GPU_CONTEXT gpuContext = m_osInterface->pfnGetGpuContext(m_osInterface);
3198 
3199     if (IsRenderContext())   //Render context only
3200     {
3201         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SendPrologWithFrameTracking(cmdBuffer, frameTrackingRequested, mmioRegister));
3202         return eStatus;
3203     }
3204     else        // Legacy mode or virtual engine
3205     {
3206 #ifdef _MMC_SUPPORTED
3207         CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
3208         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SendPrologCmd(m_miInterface, cmdBuffer, gpuContext));
3209 #endif
3210 
3211         if (!IsLastPipe())
3212         {
3213             return eStatus;
3214         }
3215         PMOS_COMMAND_BUFFER commandBufferInUse;
3216         if (m_realCmdBuffer.pCmdBase)
3217         {
3218             commandBufferInUse = &m_realCmdBuffer; //virtual engine mode
3219         }
3220         else
3221         {
3222             if (cmdBuffer && cmdBuffer->pCmdBase)
3223             {
3224                 commandBufferInUse = cmdBuffer; //legacy mode
3225             }
3226             else
3227             {
3228                 eStatus = MOS_STATUS_INVALID_PARAMETER;
3229                 return eStatus;
3230             }
3231         }
3232 
3233         commandBufferInUse->Attributes.bTurboMode = m_hwInterface->m_turboMode;
3234         commandBufferInUse->Attributes.dwNumRequestedEUSlices = m_hwInterface->m_numRequestedEuSlices;
3235         commandBufferInUse->Attributes.dwNumRequestedSubSlices = m_hwInterface->m_numRequestedSubSlices;
3236         commandBufferInUse->Attributes.dwNumRequestedEUs = m_hwInterface->m_numRequestedEus;
3237         commandBufferInUse->Attributes.bValidPowerGatingRequest = true;
3238 
3239         if (frameTrackingRequested && m_frameTrackingEnabled)
3240         {
3241             commandBufferInUse->Attributes.bEnableMediaFrameTracking = true;
3242             commandBufferInUse->Attributes.resMediaFrameTrackingSurface =
3243                 &m_encodeStatusBuf.resStatusBuffer;
3244             commandBufferInUse->Attributes.dwMediaFrameTrackingTag = m_storeData;
3245             // Set media frame tracking address offset(the offset from the encoder status buffer page)
3246             commandBufferInUse->Attributes.dwMediaFrameTrackingAddrOffset = 0;
3247         }
3248 
3249         MHW_GENERIC_PROLOG_PARAMS genericPrologParams;
3250         MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
3251         genericPrologParams.pOsInterface = m_hwInterface->GetOsInterface();
3252         genericPrologParams.pvMiInterface = m_hwInterface->GetMiInterface();
3253         genericPrologParams.bMmcEnabled = m_mmcState ? m_mmcState->IsMmcEnabled() : false;
3254         genericPrologParams.dwStoreDataValue = m_storeData - 1;
3255         CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(commandBufferInUse, &genericPrologParams));
3256     }
3257 
3258     return eStatus;
3259 }
3260 
SetSemaphoreMem(PMOS_RESOURCE semaphoreMem,PMOS_COMMAND_BUFFER cmdBuffer,uint32_t value)3261 MOS_STATUS CodechalVdencVp9StateG12::SetSemaphoreMem(
3262     PMOS_RESOURCE               semaphoreMem,
3263     PMOS_COMMAND_BUFFER         cmdBuffer,
3264     uint32_t                    value)
3265 {
3266     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3267 
3268     CODECHAL_ENCODE_FUNCTION_ENTER;
3269 
3270     CODECHAL_ENCODE_CHK_NULL_RETURN(semaphoreMem);
3271     MHW_MI_STORE_DATA_PARAMS storeDataParams;
3272     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3273     storeDataParams.pOsResource = semaphoreMem;
3274     storeDataParams.dwResourceOffset = 0;
3275     storeDataParams.dwValue = value;
3276 
3277     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
3278         cmdBuffer,
3279         &storeDataParams));
3280 
3281     return eStatus;
3282 }
3283 
SendHWWaitCommand(PMOS_RESOURCE semaphoreMem,PMOS_COMMAND_BUFFER cmdBuffer,uint32_t value)3284 MOS_STATUS CodechalVdencVp9StateG12::SendHWWaitCommand(
3285     PMOS_RESOURCE               semaphoreMem,
3286     PMOS_COMMAND_BUFFER         cmdBuffer,
3287     uint32_t                    value)
3288 {
3289     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3290 
3291     CODECHAL_ENCODE_FUNCTION_ENTER;
3292     CODECHAL_ENCODE_CHK_NULL_RETURN(semaphoreMem);
3293 
3294     MHW_MI_SEMAPHORE_WAIT_PARAMS semaphoreWaitParams;
3295     MOS_ZeroMemory(&semaphoreWaitParams, sizeof(semaphoreWaitParams));
3296     semaphoreWaitParams.presSemaphoreMem = semaphoreMem;
3297     semaphoreWaitParams.bPollingWaitMode = true;
3298     semaphoreWaitParams.dwSemaphoreData = value;
3299     semaphoreWaitParams.CompareOperation = MHW_MI_SAD_EQUAL_SDD;
3300     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiSemaphoreWaitCmd(cmdBuffer, &semaphoreWaitParams));
3301 
3302     return eStatus;
3303 }
3304 
SetDmemHuCPakInt()3305 MOS_STATUS CodechalVdencVp9StateG12::SetDmemHuCPakInt()
3306 {
3307     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3308 
3309     CODECHAL_ENCODE_FUNCTION_ENTER;
3310 
3311     uint8_t currPass = (uint8_t)GetCurrentPass();
3312 
3313     MOS_LOCK_PARAMS lockFlags;
3314     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3315     lockFlags.WriteOnly = 1;
3316     // All bytes in below dmem for fields not used by VP9 to be set to 0xFF.
3317     HucPakIntDmem* dmem = (HucPakIntDmem*)m_osInterface->pfnLockResource(
3318         m_osInterface, &m_hucPakIntDmemBuffer[m_currRecycledBufIdx][currPass], &lockFlags);
3319     CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
3320 
3321     MOS_ZeroMemory(dmem, sizeof(HucPakIntDmem));
3322     // CODECHAL_VDENC_VP9_PAK_INT_DMEM_OFFSETS_SIZE size of offsets in the CODECHAL_VDENC_VP9_HUC_PAK_INT_DMEM struct.
3323     // Reset offsets to 0xFFFFFFFF as unavailable
3324     memset(dmem, 0xFF, m_pakIntDmemOffsetsSize);
3325 
3326     dmem->totalSizeInCommandBuffer = GetNumTilesInFrame() * CODECHAL_CACHELINE_SIZE;
3327     dmem->offsetInCommandBuffer = 0xFFFF; // Not used for VP9, all bytes in dmem for fields not used are 0xFF
3328     dmem->picWidthInPixel = (uint16_t)m_frameWidth;
3329     dmem->picHeightInPixel = (uint16_t)m_frameHeight;
3330     dmem->totalNumberOfPaks = m_numPipe;
3331     dmem->codec = m_pakIntVp9CodecId;
3332     dmem->maxPass = m_brcMaxNumPasses; // Only VDEnc CQP and BRC
3333     dmem->currentPass = currPass + 1;
3334     dmem->lastTileBSStartInBytes = m_tileParams[GetNumTilesInFrame() - 1].TileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8;
3335     dmem->picStateStartInBytes = 0xFFFF;
3336 
3337     if (m_enableTileStitchByHW)
3338     {
3339         dmem->StitchEnable = true;
3340         dmem->StitchCommandOffset = 0;
3341         dmem->BBEndforStitch = HUC_BATCH_BUFFER_END;
3342     }
3343 
3344     // Offset 0 is for region 1 - output of integrated frame stats from PAK integration kernel
3345 
3346     dmem->tileSizeRecordOffset[0] = m_frameStatsOffset.tileSizeRecord;
3347     dmem->vdencStatOffset[0] = m_frameStatsOffset.vdencStats;
3348     dmem->vp9PakStatOffset[0] = m_frameStatsOffset.pakStats;
3349     dmem->vp9CounterBufferOffset[0] = m_frameStatsOffset.counterBuffer;
3350 
3351     //Offset 1 - 4 is for region 0 - Input to PAK integration kernel for all tile statistics per pipe
3352     for (auto i = 1; i <= m_numPipe; i++)
3353     {
3354         dmem->numTiles[i - 1] = (GetNumTilesInFrame()) / m_numPipe;
3355         dmem->tileSizeRecordOffset[i] = m_tileStatsOffset.tileSizeRecord + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.tileSizeRecord);
3356         dmem->vdencStatOffset[i] = m_tileStatsOffset.vdencStats + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.vdencStats);
3357         dmem->vp9PakStatOffset[i] = m_tileStatsOffset.pakStats + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.pakStats);
3358         dmem->vp9CounterBufferOffset[i] = m_tileStatsOffset.counterBuffer + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.counterBuffer);
3359     }
3360     m_osInterface->pfnUnlockResource(m_osInterface, &m_hucPakIntDmemBuffer[m_currRecycledBufIdx][currPass]);
3361 
3362     return eStatus;
3363 }
3364 
SetSequenceStructs()3365 MOS_STATUS CodechalVdencVp9StateG12::SetSequenceStructs()
3366 {
3367     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3368 
3369     CODECHAL_ENCODE_FUNCTION_ENTER;
3370 
3371 #if defined(LINUX) && !defined(WDDM_LINUX)
3372     MOS_SURFACE rawSurface;
3373     PCODEC_VP9_ENCODE_SEQUENCE_PARAMS seqParams = (PCODEC_VP9_ENCODE_SEQUENCE_PARAMS)m_encodeParams.pSeqParams;
3374     rawSurface = *(m_encodeParams.psRawSurface);
3375 
3376     if (rawSurface.OsResource.Format == Format_A8R8G8B8 ||
3377         rawSurface.OsResource.Format == Format_B10G10R10A2)
3378     {
3379         seqParams->SeqFlags.fields.DisplayFormatSwizzle = 1;
3380     }
3381 #endif
3382 
3383     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::SetSequenceStructs());
3384 
3385     // All pipe need to go through the picture-level and slice-level commands
3386     m_numPassesInOnePipe = m_numPasses;
3387     m_numPasses = (m_numPasses + 1) * m_numPipe - 1;
3388 
3389     return eStatus;
3390 }
3391 
SetPictureStructs()3392 MOS_STATUS CodechalVdencVp9StateG12::SetPictureStructs()
3393 {
3394     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3395 
3396     CODECHAL_ENCODE_FUNCTION_ENTER;
3397 
3398     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::SetPictureStructs());
3399 
3400     m_virtualEngineBBIndex = m_currOriginalPic.FrameIdx;
3401     m_picWidthInMinBlk =
3402         MOS_ALIGN_CEIL(m_oriFrameWidth, CODEC_VP9_MIN_BLOCK_WIDTH);
3403     m_picHeightInMinBlk =
3404         MOS_ALIGN_CEIL(m_oriFrameHeight, CODEC_VP9_MIN_BLOCK_WIDTH);
3405 
3406     // When buffers start recycling , we need to know the index of last buffer for next frame.
3407     if (m_dysRefFrameFlags != DYS_REF_NONE && m_dysVdencMultiPassEnabled)
3408     {
3409         if (!m_hucEnabled)
3410         {
3411             m_numPassesInOnePipe = (m_dysRefFrameFlags != DYS_REF_NONE);
3412         }
3413         if (m_vdencBrcEnabled)
3414         {
3415             //Reduce per pipe passes by 1, as m_numPassesInOnePipe == 1 becomes m_numPassesInOnePipe = 0 for Huc to run
3416             m_dysBrc             = true;
3417             m_numPassesInOnePipe = (m_numPassesInOnePipe > 0 ) ? m_numPassesInOnePipe - 1 : m_numPassesInOnePipe;
3418         }
3419         else
3420         {
3421             m_dysCqp = true;
3422         }
3423         m_numPasses = (m_numPassesInOnePipe + 1) * m_numPipe - 1;
3424     }
3425     // This is BRC DYS SinglePass case
3426     // Actually, repak is disabled
3427     if (m_vdencBrcEnabled && (m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
3428     {
3429         m_dysBrc             = true;
3430         m_numPassesInOnePipe = 1;
3431         m_numPasses          = (m_numPassesInOnePipe + 1) * m_numPipe - 1;
3432     }
3433     if (!m_vdencBrcEnabled && (m_dysRefFrameFlags != DYS_REF_NONE))
3434     {
3435         m_dysCqp = true;
3436     }
3437 
3438 #ifdef _MMC_SUPPORTED
3439     //WA to clear CCS by VE resolve
3440     if (MEDIA_IS_WA(m_waTable, Wa_1408785368))
3441     {
3442         bool        clearccswa     = false;
3443         MOS_SURFACE surfaceDetails = {};
3444         m_osInterface->pfnGetResourceInfo(m_osInterface, &m_reconSurface.OsResource, &surfaceDetails);
3445 
3446         // Restore CCS if the surface's width/height is not aligned with that of current frame due to resolution change
3447         if ((m_frameNum != 0) &&
3448             ((surfaceDetails.dwWidth != m_picWidthInMinBlk) ||
3449             (surfaceDetails.dwHeight != m_picHeightInMinBlk)))
3450         {
3451             clearccswa = true;
3452         }
3453 
3454         if (clearccswa && m_mmcState && m_mmcState->IsMmcEnabled())
3455         {
3456             m_osInterface->pfnDecompResource(m_osInterface, &m_reconSurface.OsResource);
3457             m_osInterface->pfnSetGpuContext(m_osInterface, m_renderContext);
3458         }
3459     }
3460 #endif
3461 
3462     return eStatus;
3463 }
3464 
ExecutePictureLevel()3465 MOS_STATUS CodechalVdencVp9StateG12::ExecutePictureLevel()
3466 {
3467     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3468 
3469     CODECHAL_ENCODE_FUNCTION_ENTER;
3470 
3471     CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifyCommandBufferSize());
3472 
3473     PerfTagSetting perfTag;
3474     perfTag.Value = 0;
3475     perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
3476     perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE;
3477     perfTag.PictureCodingType = m_pictureCodingType;
3478     m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
3479 
3480     if ((m_dysRefFrameFlags == DYS_REF_NONE) && m_pakOnlyModeEnabledForLastPass)
3481     {
3482         //This flag sets pak-only mode in slbb for RePak pass. In single-pass mode, this flag should be disabled.
3483         m_vdencPakonlyMultipassEnabled = ((m_numPasses > 0) && (IsLastPass())) ? true : false;
3484     }
3485 
3486     // Scalable Mode header
3487     if (m_scalableMode)
3488     {
3489         MOS_COMMAND_BUFFER cmdBuffer;
3490         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3491 
3492         MHW_MI_FORCE_WAKEUP_PARAMS forceWakeupParams;
3493         MOS_ZeroMemory(&forceWakeupParams, sizeof(MHW_MI_FORCE_WAKEUP_PARAMS));
3494         forceWakeupParams.bMFXPowerWellControl      = true;
3495         forceWakeupParams.bMFXPowerWellControlMask  = true;
3496         forceWakeupParams.bHEVCPowerWellControl     = true;
3497         forceWakeupParams.bHEVCPowerWellControlMask = true;
3498 
3499         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiForceWakeupCmd(
3500             &cmdBuffer,
3501             &forceWakeupParams));
3502 
3503         bool requestFrameTracking = m_singleTaskPhaseSupported ? IsFirstPass() : IsLastPass();
3504         // In scalable mode, command buffer header is sent on last pipe only
3505         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
3506         CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
3507     }
3508 
3509     // for VDENC dynamic scaling, here are the steps we need to process
3510     // 1. Use PAK to down scale the reference picture (PASS 0)
3511     // 2. Run VDENC to stream out PakObjCmd (PASS 0)
3512     // 3. Run VDENC (with PAK only multi pass enabled) to stream in PakObjCmd from previous pass (PASS 0)
3513     // 4. Repak (PASS 1) - it is only for CQP mode
3514     // 5. Extra note: Repak is disabled for BRC Dynamic scaling single pass mode
3515     if (m_dysRefFrameFlags != DYS_REF_NONE)
3516     {
3517         if (m_currPass == 0)
3518         {
3519             // Turn off scalability and Tiling for Dynamic scaling pass 0 for reference scaling
3520             uint8_t logTileRows = m_vp9PicParams->log2_tile_rows;
3521             uint8_t logTileColumns = m_vp9PicParams->log2_tile_columns;
3522             bool scalableMode = m_scalableMode;
3523             uint8_t numPipe = m_numPipe;
3524             m_vp9PicParams->log2_tile_rows = 0;
3525             m_vp9PicParams->log2_tile_columns = 0;
3526             m_scalableMode = false;
3527             m_numPipe = 1;
3528             // Execute Reference scaling pass
3529             CODECHAL_ENCODE_CHK_STATUS_RETURN(DysRefFrames());
3530 
3531             // Restore scalability and Tiling status for subsequent passes
3532             m_vp9PicParams->log2_tile_rows = logTileRows;
3533             m_vp9PicParams->log2_tile_columns = logTileColumns;
3534             m_scalableMode = scalableMode;
3535             m_numPipe = numPipe;
3536 
3537             if (m_dysVdencMultiPassEnabled)
3538             {
3539                 m_vdencPakObjCmdStreamOutEnabled = true;
3540                 m_resVdencPakObjCmdStreamOutBuffer = &m_resMbCodeSurface;
3541                 // enable single task phase here since we need to combine the pakobj streamout and pakonly pass into one batch buffer
3542                 m_singleTaskPhaseSupported = true;
3543                 m_firstTaskInPhase = true;
3544 
3545                 if (Mos_ResourceIsNull(&m_resVdencDysPictureState2NdLevelBatchBuffer))
3546                 {
3547                     MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3548 
3549                     MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3550                     allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3551                     allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3552                     allocParamsForBufferLinear.Format = Format_Buffer;
3553                     allocParamsForBufferLinear.dwBytes = m_vdencPicStateSecondLevelBatchBufferSize;
3554                     allocParamsForBufferLinear.pBufName = "VDEnc DYS Picture Second Level Batch Buffer";
3555 
3556                     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
3557                         m_osInterface,
3558                         &allocParamsForBufferLinear,
3559                         &m_resVdencDysPictureState2NdLevelBatchBuffer);
3560 
3561                     if (eStatus != MOS_STATUS_SUCCESS)
3562                     {
3563                         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate VDEnc DYS Picture Second Level Batch Buffer.");
3564                         return eStatus;
3565                     }
3566                 }
3567 
3568                 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
3569             }
3570             else
3571             {
3572                 m_hucEnabled = m_dysHucEnabled; // recover huc state
3573             }
3574         }
3575         else if (m_currPass == 1 && m_dysVdencMultiPassEnabled)
3576         {
3577             m_hucEnabled = m_dysHucEnabled; // recover huc state
3578             m_vdencPakonlyMultipassEnabled = true;
3579             m_dysRefFrameFlags = DYS_REF_NONE;
3580             m_currPass = 0; // reset ucCurrPass = 0 to run the Huc
3581             m_lastTaskInPhase = false;
3582         }
3583     }
3584     else
3585     {
3586         if (!(IsLastPass()))
3587         {
3588             m_vdencPakObjCmdStreamOutEnabled = true;
3589             m_resVdencPakObjCmdStreamOutBuffer = &m_resMbCodeSurface;
3590         }
3591         else
3592         {
3593             m_vdencPakObjCmdStreamOutEnabled = false;
3594         }
3595     }
3596 
3597     if (m_isTilingSupported)
3598     {
3599         MOS_LOCK_PARAMS lockFlagsWriteOnly;
3600         uint8_t* tileStatsData = nullptr;
3601         MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
3602         lockFlagsWriteOnly.WriteOnly = 1;
3603         if (Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBBIndex].sResource))
3604         {
3605             // Allocate Tile Stats Buffer for PAK integration and to be used everywhere for tile stats
3606             MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3607             MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3608             allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3609             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3610             allocParamsForBufferLinear.Format = Format_Buffer;
3611             auto size = m_maxTileNumber * MOS_ALIGN_CEIL(m_hcpInterface->GetPakHWTileSizeRecordSize(), CODECHAL_CACHELINE_SIZE);
3612             allocParamsForBufferLinear.dwBytes = size;
3613             allocParamsForBufferLinear.pBufName = "Tile Record Buffer";
3614 
3615             CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
3616                 m_osInterface,
3617                 &allocParamsForBufferLinear,
3618                 &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource));
3619             m_tileRecordBuffer[m_virtualEngineBBIndex].dwSize = size;
3620             auto tileRecordData = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource, &lockFlagsWriteOnly);
3621 
3622             MOS_ZeroMemory(tileRecordData, allocParamsForBufferLinear.dwBytes);
3623             m_osInterface->pfnUnlockResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource);
3624         }
3625     }
3626 
3627     if (m_isTilingSupported && m_scalableMode && m_hucEnabled && IsFirstPipe() && IsFirstPass())
3628     {
3629         // Max row is 4 by VP9 Spec
3630         uint32_t m_maxScalableModeRows = 4;
3631         uint32_t m_maxScalableModeTiles = m_numVdbox * m_maxScalableModeRows;
3632 
3633         // Fill Pak integration kernel input tile stats structure
3634         MOS_ZeroMemory(&m_tileStatsOffset, sizeof(StatsInfo));
3635         // TileSizeRecord has to be 4k aligned
3636         m_tileStatsOffset.tileSizeRecord = 0;
3637         // VdencStats has to be 4k aligned
3638         m_tileStatsOffset.vdencStats = MOS_ALIGN_CEIL((m_tileStatsOffset.tileSizeRecord + (m_maxScalableModeTiles * m_statsSize.tileSizeRecord)), CODECHAL_PAGE_SIZE);
3639         // VP9PAKStats has to be 64 byte aligned
3640         m_tileStatsOffset.pakStats = MOS_ALIGN_CEIL((m_tileStatsOffset.vdencStats + (m_maxScalableModeTiles * m_statsSize.vdencStats)), CODECHAL_PAGE_SIZE);
3641         // VP9CounterBuffer has to be 4k aligned
3642         m_tileStatsOffset.counterBuffer = MOS_ALIGN_CEIL((m_tileStatsOffset.pakStats + (m_maxScalableModeTiles * m_statsSize.pakStats)), CODECHAL_PAGE_SIZE);
3643 
3644         MOS_LOCK_PARAMS lockFlagsWriteOnly;
3645         uint8_t* tileStatsData = nullptr;
3646         MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
3647         lockFlagsWriteOnly.WriteOnly = 1;
3648 
3649         if (Mos_ResourceIsNull(&m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource))
3650         {
3651             // Allocate Tile Stats Buffer for PAK integration and to be used everywhere for tile stats
3652             MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3653             MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3654             allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3655             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3656             allocParamsForBufferLinear.Format = Format_Buffer;
3657             allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL((m_tileStatsOffset.counterBuffer + (m_maxScalableModeTiles * m_statsSize.counterBuffer)), CODECHAL_PAGE_SIZE);
3658             allocParamsForBufferLinear.pBufName = "GEN12 Tile Level Statistics Buffer";
3659 
3660             m_tileStatsPakIntegrationBufferSize = allocParamsForBufferLinear.dwBytes;
3661 
3662             CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
3663                 m_osInterface,
3664                 &allocParamsForBufferLinear,
3665                 &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource));
3666             m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].dwSize = allocParamsForBufferLinear.dwBytes;
3667 
3668             tileStatsData = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource, &lockFlagsWriteOnly);
3669 
3670             MOS_ZeroMemory(tileStatsData, allocParamsForBufferLinear.dwBytes);
3671             m_osInterface->pfnUnlockResource(m_osInterface, &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource);
3672         }
3673     }
3674 
3675     if (IsFirstPass())
3676     {
3677         CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructPakInsertObjBatchBuf(&m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx]));
3678     }
3679     int currPass = GetCurrentPass();
3680     if ((m_dysRefFrameFlags != DYS_REF_NONE) && m_dysVdencMultiPassEnabled)
3681     {
3682         CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructPicStateBatchBuf(&m_resVdencDysPictureState2NdLevelBatchBuffer));
3683     }
3684     else
3685     {
3686         if (IsFirstPipe())
3687         {
3688             CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructPicStateBatchBuf(&m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex]));
3689         }
3690 
3691         if (!m_scalableMode)
3692         {
3693             MOS_COMMAND_BUFFER cmdBuffer;
3694             CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3695             MHW_MI_FORCE_WAKEUP_PARAMS forceWakeupParams;
3696             MOS_ZeroMemory(&forceWakeupParams, sizeof(MHW_MI_FORCE_WAKEUP_PARAMS));
3697             forceWakeupParams.bMFXPowerWellControl      = true;
3698             forceWakeupParams.bMFXPowerWellControlMask  = true;
3699             forceWakeupParams.bHEVCPowerWellControl     = true;
3700             forceWakeupParams.bHEVCPowerWellControlMask = true;
3701             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiForceWakeupCmd(&cmdBuffer, &forceWakeupParams));
3702             ReturnCommandBuffer(&cmdBuffer);
3703         }
3704     }
3705 
3706     if (m_dysRefFrameFlags != DYS_REF_NONE)
3707     {
3708         m_brcReset = 1;
3709     }
3710 
3711     if (m_vdencBrcEnabled && IsFirstPipe())
3712     {
3713         // Invoke BRC init/reset FW
3714         if (m_brcInit || m_brcReset)
3715         {
3716             CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_INIT_RESET);
3717             CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcInitReset());
3718             m_brcInit = m_brcReset = false;
3719         }
3720         // For multipass and singlepass+RePAK we call BRC update for all passes except last pass (RePAK)
3721         // For single pass w/o RePAK (1 total pass) we call BRC update on one and only pass
3722         if (!IsLastPass() || (m_currPass == 0 && m_numPasses == 0))
3723         {
3724             bool origFrameTrackingHeader = false;
3725             bool origSingleTaskPhase = m_singleTaskPhaseSupported;
3726             // If this is the case of Dynamic Scaling + BRC Pass 0'  VDENC + Pak  pass
3727             // Disable SingleTaskPhase before running 1st BRC update
3728             // To run HPU0 on the next pass i.e Pak only pass, we make Pass 1 as Pass 0 in which case the
3729             // BRC dmem buffer( resVdencBrcUpdateDmemBuffer[0] ) will get overridden if we do not submit BRC command now.
3730             if (m_dysBrc && m_dysRefFrameFlags != DYS_REF_NONE)
3731             {
3732                 //Reset Frame Tracking Header for this submission
3733                 MOS_COMMAND_BUFFER cmdBuffer;
3734                 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3735                 origFrameTrackingHeader = cmdBuffer.Attributes.bEnableMediaFrameTracking;
3736                 cmdBuffer.Attributes.bEnableMediaFrameTracking = false;
3737                 ReturnCommandBuffer(&cmdBuffer);
3738                 m_singleTaskPhaseSupported = false;
3739             }
3740 
3741             CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE);
3742             CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcUpdate());
3743             //Restore Original Frame Tracking Header
3744             if (m_dysBrc && m_dysRefFrameFlags != DYS_REF_NONE)
3745             {
3746                 MOS_COMMAND_BUFFER cmdBuffer;
3747                 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3748                 cmdBuffer.Attributes.bEnableMediaFrameTracking = origFrameTrackingHeader;
3749                 ReturnCommandBuffer(&cmdBuffer);
3750             }
3751             //Restore the original state of SingleTaskPhaseSupported flag
3752             m_singleTaskPhaseSupported = origSingleTaskPhase;
3753         }
3754     }
3755 
3756     // run HuC_VP9Prob first pass (it runs in parallel with ENC)
3757     if (m_hucEnabled)
3758     {
3759         if (IsFirstPipe() && (IsFirstPass() || IsLastPass() || m_vdencBrcEnabled))  // Before the first PAK pass and for RePak pass
3760         {
3761             CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_PU);
3762             CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCVp9Prob());
3763             // restore perf tag to PAK
3764             CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE);
3765         }
3766     }
3767     else
3768     {
3769         CODECHAL_ENCODE_CHK_STATUS_RETURN(RefreshFrameInternalBuffers());
3770     }
3771 
3772     // set HCP_SURFACE_STATE values
3773     MHW_VDBOX_SURFACE_PARAMS surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID + 1];
3774     for (uint8_t i = 0; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
3775     {
3776         MOS_ZeroMemory(&surfaceParams[i], sizeof(surfaceParams[i]));
3777         surfaceParams[i].Mode = m_mode;
3778         surfaceParams[i].ucSurfaceStateId = i;
3779         surfaceParams[i].ChromaType = m_outputChromaFormat;
3780         surfaceParams[i].bSrc8Pak10Mode   = (m_vp9SeqParams->SeqFlags.fields.EncodedBitDepth) && (!m_vp9SeqParams->SeqFlags.fields.SourceBitDepth);
3781 
3782         switch (m_vp9SeqParams->SeqFlags.fields.EncodedBitDepth)
3783         {
3784             case VP9_ENCODED_BIT_DEPTH_10: //10 bit encoding
3785             {
3786                 surfaceParams[i].ucBitDepthChromaMinus8 = 2;
3787                 surfaceParams[i].ucBitDepthLumaMinus8 = 2;
3788                 break;
3789             }
3790             default:
3791             {
3792                 surfaceParams[i].ucBitDepthChromaMinus8 = 0;
3793                 surfaceParams[i].ucBitDepthLumaMinus8 = 0;
3794                 break;
3795             }
3796         }
3797     }
3798 
3799     // For PAK engine, we do NOT use scaled reference images even if dynamic scaling is enabled
3800     PMOS_SURFACE refSurface[3], refSurfaceNonScaled[3], dsRefSurface4x[3], dsRefSurface8x[3];
3801     for (auto i = 0; i < 3; i++)
3802     {
3803         refSurface[i] = refSurfaceNonScaled[i] = dsRefSurface4x[i] = dsRefSurface8x[i] = nullptr;
3804     }
3805     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetHcpSrcSurfaceParams(surfaceParams, refSurface, refSurfaceNonScaled, dsRefSurface4x, dsRefSurface8x));
3806 
3807     MOS_COMMAND_BUFFER cmdBuffer;
3808     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
3809 
3810     // Non scalable mode header
3811     if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && !m_scalableMode)
3812     {
3813         // Send command buffer header at the beginning (OS dependent)
3814         // frame tracking tag is only added in the last command buffer header
3815         bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
3816         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
3817     }
3818 
3819     // Place hw semaphore on all other pipe to wait for first pipe HUC to finish.
3820     int currPipe = GetCurrentPipe();
3821     if (m_scalableMode && m_hucEnabled && m_isTilingSupported)
3822     {
3823         if (!IsFirstPipe())
3824         {
3825             if (!Mos_ResourceIsNull(&m_hucDoneSemaphoreMem[currPipe].sResource))
3826             {
3827                 // On second pipe, wait here for huc to finish on first pipe
3828                 SendHWWaitCommand(&m_hucDoneSemaphoreMem[currPipe].sResource, &cmdBuffer, (currPass + 1));
3829                 SetSemaphoreMem(&m_hucDoneSemaphoreMem[currPipe].sResource, &cmdBuffer, 0);
3830             }
3831         }
3832     }
3833 
3834     // Repak conditional batch buffer end based on repak flag written by Huc to HUC_STATUS regster
3835     if (m_hucEnabled && (m_numPasses > 0) && IsLastPass())
3836     {
3837         // Insert conditional batch buffer end
3838         // Bit 30 has been added as a success condition, therefore this needs to be masked to only check 31 for RePAK
3839         // or else if HuC decides not to do RePAK for conditional RePAK yet terminates successfully RePAK will still happen.
3840         // Success = bit 30 set to 1, Do RePAK = bit 31 set to 1, value is always 0; if 0 < memory, continue
3841         MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
3842         MOS_ZeroMemory(
3843             &miConditionalBatchBufferEndParams,
3844             sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
3845 
3846         miConditionalBatchBufferEndParams.presSemaphoreBuffer =
3847             &m_resHucPakMmioBuffer;
3848         // Make the DisableCompareMask 0, so that the HW will do AND operation on DW0 with Mask DW1, refer to HuCVp9Prob() for the settings
3849         // and compare the result against the Semaphore data which in our case dwValue = 0.
3850         // If result > dwValue then continue execution otherwise terminate the batch buffer
3851         miConditionalBatchBufferEndParams.bDisableCompareMask = false;
3852 
3853         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
3854             &cmdBuffer,
3855             &miConditionalBatchBufferEndParams));
3856     }
3857 
3858     if (IsFirstPipe())
3859     {
3860         CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
3861     }
3862 
3863     // Send VDENC_CONTROL_STATE Pipe Initialization
3864     MHW_VDBOX_VDENC_CONTROL_STATE_PARAMS vdencControlStateParams;
3865     {
3866         MOS_ZeroMemory(&vdencControlStateParams, sizeof(MHW_VDBOX_VDENC_CONTROL_STATE_PARAMS));
3867         vdencControlStateParams.bVdencInitialization = true;
3868         CODECHAL_ENCODE_CHK_STATUS_RETURN(
3869             static_cast<MhwVdboxVdencInterfaceG12X *>(m_vdencInterface)->AddVdencControlStateCmd(&cmdBuffer, &vdencControlStateParams));
3870     }
3871 
3872     //Send VD_CONTROL_STATE Pipe Initialization
3873     MHW_MI_VD_CONTROL_STATE_PARAMS vdCtrlParam;
3874     MOS_ZeroMemory(&vdCtrlParam, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
3875     vdCtrlParam.initialization = true;
3876     MhwMiInterfaceG12 *miInterfaceG12 = static_cast <MhwMiInterfaceG12 *>(m_miInterface);
3877     CODECHAL_ENCODE_CHK_STATUS_RETURN((miInterfaceG12)->AddMiVdControlStateCmd(&cmdBuffer, &vdCtrlParam));
3878 
3879     // set HCP_PIPE_MODE_SELECT values
3880     PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams = nullptr;
3881     pipeModeSelectParams = m_vdencInterface->CreateMhwVdboxPipeModeSelectParams();
3882     CODECHAL_ENCODE_CHK_NULL_RETURN(pipeModeSelectParams);
3883 
3884     auto release_func = [&]()
3885     {
3886         m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams);
3887         pipeModeSelectParams = nullptr;
3888     };
3889 
3890     SetHcpPipeModeSelectParams(*pipeModeSelectParams);
3891     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams), release_func);
3892 
3893     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, false), release_func);
3894 
3895     // Decoded picture
3896 #ifdef _MMC_SUPPORTED
3897     CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(m_mmcState, release_func);
3898     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]), release_func);
3899 #endif
3900     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]), release_func);
3901 
3902     // Source input
3903 #ifdef _MMC_SUPPORTED
3904     CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(m_mmcState, release_func);
3905     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]), release_func);
3906 #endif
3907     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]), release_func);
3908 
3909     if (MEDIA_IS_WA(m_waTable, Wa_Vp9UnalignedHeight))
3910     {
3911         uint32_t real_height = m_oriFrameHeight;
3912         uint32_t aligned_height = MOS_ALIGN_CEIL(real_height, CODEC_VP9_MIN_BLOCK_HEIGHT);
3913 
3914         fill_pad_with_value(m_rawSurfaceToPak, real_height, aligned_height);
3915     }
3916 
3917     if (m_pictureCodingType != I_TYPE)
3918     {
3919 #ifdef _MMC_SUPPORTED
3920         //Get each reference surface state and be recorded by skipMask if current surface state is mmc disabled
3921         //In VP9 mode, Bit 8is (here is bit0 in skipMask ) for Previous Reference;
3922         //Bit 9is (here is bit1 in skipMask ) for Golden Reference and Bit 10is (here is bit2 in skipMask ) for Alterante Reference;
3923         //Bits11-15are unused and should be programmed to 0 (skipped)
3924         uint8_t skipMask = 0xf8;
3925         for (uint8_t i = CODECHAL_HCP_LAST_SURFACE_ID; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
3926         {
3927             CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(m_mmcState, release_func);
3928             CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[i]), release_func);
3929             if (surfaceParams[i].mmcState == MOS_MEMCOMP_DISABLED)
3930             {
3931                 skipMask |= (1 << (i - 2));
3932             }
3933         }
3934         CODECHAL_ENCODE_NORMALMESSAGE("MMC skip mask is %d\n", skipMask);
3935         for (uint8_t i = CODECHAL_HCP_LAST_SURFACE_ID; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
3936         {
3937             //Set each ref surface state as MOS_MEMCOMP_MC to satisfy MmcEnable in AddHcpSurfaceCmd
3938             //Because each ref surface state should be programmed as the same
3939             //The actual mmc state is recorded by skipMask and set each ref surface too
3940             surfaceParams[i].mmcState = MOS_MEMCOMP_MC;
3941             surfaceParams[i].mmcSkipMask = skipMask;
3942         }
3943 #endif
3944         for (uint8_t i = CODECHAL_HCP_LAST_SURFACE_ID; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
3945         {
3946             CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[i]), release_func);
3947         }
3948     }
3949 
3950     // set HCP_PIPE_BUF_ADDR_STATE values
3951     PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS pipeBufAddrParams = nullptr;
3952     pipeBufAddrParams = CreateHcpPipeBufAddrParams(pipeBufAddrParams);
3953 
3954     auto delete_func = [&]()
3955     {
3956         if (pipeModeSelectParams)
3957         {
3958             m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams);
3959             pipeModeSelectParams = nullptr;
3960         }
3961         if (pipeBufAddrParams)
3962         {
3963             MOS_Delete(pipeBufAddrParams);
3964             pipeBufAddrParams = nullptr;
3965         }
3966     };
3967 
3968     if (pipeBufAddrParams)
3969     {
3970         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(SetHcpPipeBufAddrParams(*pipeBufAddrParams, refSurface, refSurfaceNonScaled, dsRefSurface4x, dsRefSurface8x), delete_func);
3971 #ifdef _MMC_SUPPORTED
3972         CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(m_mmcState, delete_func);
3973         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_mmcState->SetPipeBufAddr(pipeBufAddrParams), delete_func);
3974 #endif
3975         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(&cmdBuffer, pipeBufAddrParams), delete_func);
3976     }
3977 
3978     // set HCP_IND_OBJ_BASE_ADDR_STATE values
3979     MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
3980     SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
3981     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams), delete_func);
3982 
3983     // Send VD_CONTROL_STATE Pipe Initialization
3984     MOS_ZeroMemory(&vdCtrlParam, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
3985     vdCtrlParam.vdencEnabled = true;
3986     vdCtrlParam.vdencInitialization = true;
3987     miInterfaceG12 = static_cast <MhwMiInterfaceG12 *>(m_miInterface);
3988     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN((miInterfaceG12)->AddMiVdControlStateCmd(&cmdBuffer, &vdCtrlParam), delete_func);
3989 
3990     // Change ref surfaces to scaled for VDENC for DYS
3991     if ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
3992     {
3993         surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID].psSurface = refSurface[0];
3994         surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID].psSurface = refSurface[1];
3995         surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID].psSurface = refSurface[2];
3996     }
3997 
3998     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams), delete_func);
3999     if (pipeModeSelectParams)
4000     {
4001         MOS_Delete(pipeModeSelectParams);
4002         pipeModeSelectParams = nullptr;
4003     }
4004 
4005     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencSrcSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]), delete_func);
4006     if (m_pictureCodingType == I_TYPE)
4007     {
4008         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]), delete_func);
4009     }
4010     else
4011     {
4012         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID]), delete_func);
4013         if ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
4014         {
4015             if (m_refFrameFlags & 0x02)
4016             {
4017                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID]), delete_func);
4018             }
4019             if (m_refFrameFlags & 0x04)
4020             {
4021                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID]), delete_func);
4022             }
4023         }
4024     }
4025 
4026     MHW_VDBOX_SURFACE_PARAMS dsSurfaceParams[2];     // 8x and 4x DS surfaces
4027     SetHcpDsSurfaceParams(&dsSurfaceParams[0]);
4028     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencDsRefSurfaceStateCmd(&cmdBuffer, &dsSurfaceParams[0], 2), delete_func);
4029 
4030     if (pipeBufAddrParams)
4031     {
4032         pipeBufAddrParams->presVdencTileRowStoreBuffer = &m_vdencTileRowStoreBuffer;
4033         pipeBufAddrParams->presVdencCumulativeCuCountStreamoutSurface = &m_vdencCumulativeCuCountStreamoutSurface;
4034         pipeBufAddrParams->bDynamicScalingEnable = (m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled;
4035         pipeBufAddrParams->pRawSurfParam                              = &surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID];
4036         pipeBufAddrParams->pDecodedReconParam                         = &surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID];
4037         pipeBufAddrParams->isIFrame                                   = (m_vp9PicParams->PicFlags.fields.frame_type == 0);
4038 
4039 #ifdef _MMC_SUPPORTED
4040         CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(m_mmcState, delete_func);
4041         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_mmcState->SetPipeBufAddr(pipeBufAddrParams), delete_func);
4042 #endif
4043         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencPipeBufAddrCmd(&cmdBuffer, pipeBufAddrParams), delete_func);
4044         MOS_Delete(pipeBufAddrParams);
4045         pipeBufAddrParams = nullptr;
4046     }
4047 
4048     MHW_BATCH_BUFFER secondLevelBatchBuffer;
4049     MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(secondLevelBatchBuffer));
4050     secondLevelBatchBuffer.dwOffset = 0;
4051     secondLevelBatchBuffer.bSecondLevel = true;
4052     if (m_hucEnabled)
4053     {
4054         secondLevelBatchBuffer.OsResource = m_resVdencPictureState2NdLevelBatchBufferWrite[0];
4055     }
4056     else
4057     {
4058         if (m_dysRefFrameFlags != DYS_REF_NONE && m_dysVdencMultiPassEnabled)
4059         {
4060             secondLevelBatchBuffer.OsResource = m_resVdencDysPictureState2NdLevelBatchBuffer;
4061         }
4062         else
4063         {
4064             secondLevelBatchBuffer.OsResource = m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
4065         }
4066     }
4067 
4068     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
4069         &cmdBuffer,
4070         &secondLevelBatchBuffer));
4071 
4072     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
4073 
4074     return eStatus;
4075 }
4076 
SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams,PMOS_SURFACE * refSurface,PMOS_SURFACE * refSurfaceNonScaled,PMOS_SURFACE * dsRefSurface4x,PMOS_SURFACE * dsRefSurface8x)4077 MOS_STATUS CodechalVdencVp9StateG12::SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams,
4078     PMOS_SURFACE* refSurface,
4079     PMOS_SURFACE* refSurfaceNonScaled,
4080     PMOS_SURFACE* dsRefSurface4x,
4081     PMOS_SURFACE* dsRefSurface8x)
4082 {
4083     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4084 
4085     CODECHAL_ENCODE_FUNCTION_ENTER;
4086 
4087     pipeBufAddrParams = {};
4088     pipeBufAddrParams.Mode = m_mode;
4089     pipeBufAddrParams.psPreDeblockSurface = &m_reconSurface;
4090     pipeBufAddrParams.psPostDeblockSurface = &m_reconSurface;
4091     pipeBufAddrParams.psRawSurface = m_rawSurfaceToPak;
4092 
4093     pipeBufAddrParams.presMfdDeblockingFilterRowStoreScratchBuffer =
4094         &m_resDeblockingFilterLineBuffer;
4095 
4096     pipeBufAddrParams.presDeblockingFilterTileRowStoreScratchBuffer =
4097         &m_resDeblockingFilterTileLineBuffer;
4098 
4099     pipeBufAddrParams.presDeblockingFilterColumnRowStoreScratchBuffer =
4100         &m_resDeblockingFilterTileColumnBuffer;
4101 
4102     pipeBufAddrParams.presMetadataLineBuffer       = &m_resMetadataLineBuffer;
4103     pipeBufAddrParams.presMetadataTileLineBuffer   = &m_resMetadataTileLineBuffer;
4104     pipeBufAddrParams.presMetadataTileColumnBuffer = &m_resMetadataTileColumnBuffer;
4105     pipeBufAddrParams.presCurMvTempBuffer = m_trackedBuf->GetMvTemporalBuffer(m_currMvTemporalBufferIndex);
4106     pipeBufAddrParams.bDynamicScalingEnable = (m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled;
4107 
4108     if (m_mmcState && m_mmcState->IsMmcEnabled() && m_reconSurface.bCompressible)
4109     {
4110         pipeBufAddrParams.PreDeblockSurfMmcState = MOS_MEMCOMP_HORIZONTAL;
4111         pipeBufAddrParams.PostDeblockSurfMmcState = pipeBufAddrParams.PreDeblockSurfMmcState;
4112     }
4113     else
4114     {
4115         pipeBufAddrParams.PreDeblockSurfMmcState = MOS_MEMCOMP_DISABLED;
4116     }
4117 
4118     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
4119     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetPipeBufAddr(&pipeBufAddrParams));
4120 
4121     // Huc first pass doesn't write probabilities to output prob region but only updates to the input region. HuC run before repak writes to the ouput region.
4122     uint8_t frameCtxIdx = 0;
4123     if (m_hucEnabled && IsLastPass())
4124     {
4125         pipeBufAddrParams.presVp9ProbBuffer = &m_resHucProbOutputBuffer;
4126     }
4127     else
4128     {
4129         frameCtxIdx = m_vp9PicParams->PicFlags.fields.frame_context_idx;
4130         CODECHAL_ENCODE_ASSERT(frameCtxIdx < CODEC_VP9_NUM_CONTEXTS);
4131         pipeBufAddrParams.presVp9ProbBuffer = &m_resProbBuffer[frameCtxIdx];
4132     }
4133 
4134     pipeBufAddrParams.presVp9SegmentIdBuffer              = &m_resSegmentIdBuffer;
4135     pipeBufAddrParams.presHvdTileRowStoreBuffer           = &m_resHvcTileRowstoreBuffer;
4136     pipeBufAddrParams.ps4xDsSurface = m_trackedBuf->Get4xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
4137     pipeBufAddrParams.ps8xDsSurface = m_trackedBuf->Get8xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
4138     pipeBufAddrParams.presVdencIntraRowStoreScratchBuffer = &m_resVdencIntraRowStoreScratchBuffer;
4139     pipeBufAddrParams.dwNumRefIdxL0ActiveMinus1           = (m_vp9PicParams->PicFlags.fields.frame_type) ? m_numRefFrames - 1 : 0;
4140 
4141     if (m_scalableMode && m_hucEnabled && m_isTilingSupported)
4142     {
4143         pipeBufAddrParams.presVdencStreamOutBuffer = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource;
4144         pipeBufAddrParams.dwVdencStatsStreamOutOffset = m_tileStatsOffset.vdencStats;
4145     }
4146     else
4147     {
4148         pipeBufAddrParams.presVdencStreamOutBuffer = &m_resVdencBrcStatsBuffer;
4149         pipeBufAddrParams.dwVdencStatsStreamOutOffset = 0;
4150     }
4151 
4152     pipeBufAddrParams.presStreamOutBuffer = nullptr;
4153 
4154     if (m_scalableMode && m_hucEnabled && m_isTilingSupported)
4155     {
4156         PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex];
4157         bool useTileStatisticsBuffer = tileStatisticsBuffer && !Mos_ResourceIsNull(&tileStatisticsBuffer->sResource);
4158         // the new framestats streamout will now be the tile level stats buffer because each pak is spewing out tile level stats
4159         pipeBufAddrParams.presFrameStatStreamOutBuffer = useTileStatisticsBuffer ? &tileStatisticsBuffer->sResource : nullptr;
4160         pipeBufAddrParams.dwFrameStatStreamOutOffset = useTileStatisticsBuffer ? m_tileStatsOffset.pakStats : 0;
4161         //Main Frame Stats are integrated by PAK integration kernel
4162     }
4163     else
4164     {
4165         pipeBufAddrParams.presFrameStatStreamOutBuffer        = &m_resFrameStatStreamOutBuffer;
4166         pipeBufAddrParams.dwFrameStatStreamOutOffset = 0;
4167     }
4168 
4169     pipeBufAddrParams.presSseSrcPixelRowStoreBuffer       = &m_resSseSrcPixelRowStoreBuffer;
4170     pipeBufAddrParams.presVdencStreamInBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
4171     pipeBufAddrParams.presSegmentMapStreamOut             = &m_resVdencSegmentMapStreamOut;
4172     pipeBufAddrParams.presPakCuLevelStreamoutBuffer =
4173         Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource) ? nullptr : &m_resPakcuLevelStreamoutData.sResource;
4174     if (m_dysRefFrameFlags != DYS_REF_NONE)
4175     {
4176         pipeBufAddrParams.presVdencPakObjCmdStreamOutBuffer =
4177             (m_vdencPakObjCmdStreamOutEnabled) ? m_resVdencPakObjCmdStreamOutBuffer : nullptr;
4178     }
4179     else
4180     {
4181         pipeBufAddrParams.presVdencPakObjCmdStreamOutBuffer = m_resVdencPakObjCmdStreamOutBuffer = &m_resMbCodeSurface;
4182     }
4183 
4184     if (m_pictureCodingType != I_TYPE)
4185     {
4186         for (auto i = 0; i < 3; i++)
4187         {
4188             CODECHAL_ENCODE_CHK_NULL_RETURN(refSurface[i]);
4189             CODECHAL_ENCODE_CHK_NULL_RETURN(dsRefSurface4x[i]);
4190             CODECHAL_ENCODE_CHK_NULL_RETURN(dsRefSurface8x[i]);
4191             if ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
4192             {
4193                 pipeBufAddrParams.presReferences[i] = &refSurfaceNonScaled[i]->OsResource;
4194                 pipeBufAddrParams.presReferences[i+4] = &refSurfaceNonScaled[i]->OsResource;
4195             }
4196             else
4197             {
4198                 pipeBufAddrParams.presReferences[i] = &refSurface[i]->OsResource;
4199             }
4200             pipeBufAddrParams.presVdencReferences[i] = &refSurface[i]->OsResource;
4201             pipeBufAddrParams.presVdenc4xDsSurface[i] = &dsRefSurface4x[i]->OsResource;
4202             pipeBufAddrParams.presVdenc8xDsSurface[i] = &dsRefSurface8x[i]->OsResource;
4203         }
4204         if ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
4205         {
4206             pipeBufAddrParams.psFwdRefSurface0 = refSurface[0];
4207             pipeBufAddrParams.psFwdRefSurface1 = refSurface[1];
4208             pipeBufAddrParams.psFwdRefSurface2 = refSurface[2];
4209         }
4210 
4211         pipeBufAddrParams.presColMvTempBuffer[0] = m_trackedBuf->GetMvTemporalBuffer(m_currMvTemporalBufferIndex ^ 0x01);
4212     }
4213 
4214     return eStatus;
4215 }
4216 
GetNumTilesInFrame()4217 uint16_t CodechalVdencVp9StateG12::GetNumTilesInFrame()
4218 {
4219     return ((1 << m_vp9PicParams->log2_tile_rows) * (1 << m_vp9PicParams->log2_tile_columns));
4220 }
4221 
AllocateResources()4222 MOS_STATUS CodechalVdencVp9StateG12::AllocateResources()
4223 {
4224     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4225 
4226     CODECHAL_ENCODE_FUNCTION_ENTER;
4227 
4228     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::AllocateResources());
4229 
4230     // create the tile coding state parameters
4231     CODECHAL_ENCODE_CHK_NULL_RETURN(m_tileParams =
4232                                         (PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12)MOS_AllocAndZeroMemory(sizeof(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12) * m_maxTileNumber));
4233 
4234     if (m_isTilingSupported)
4235     {
4236 
4237         // VDENC tile row store buffer
4238         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
4239         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4240         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4241         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4242         allocParamsForBufferLinear.Format = Format_Buffer;
4243         allocParamsForBufferLinear.dwBytes = MOS_ROUNDUP_DIVIDE(m_frameWidth, 32) * CODECHAL_CACHELINE_SIZE * 2;
4244         allocParamsForBufferLinear.pBufName = "VDENC Tile Row Store Buffer";
4245 
4246         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
4247             m_osInterface,
4248             &allocParamsForBufferLinear,
4249             &m_vdencTileRowStoreBuffer),
4250             "Failed to allocate VDENC Tile Row Store Buffer");
4251 
4252         uint32_t maxPicWidthInSb = MOS_ROUNDUP_DIVIDE(m_maxPicWidth, CODEC_VP9_SUPER_BLOCK_WIDTH);
4253         uint32_t maxPicHeightInSb = MOS_ROUNDUP_DIVIDE(m_maxPicHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT);
4254 
4255         //PAK CU Level Streamout Data:   DW57-59 in HCP pipe buffer address command
4256         uint32_t size = maxPicWidthInSb * maxPicHeightInSb * 64 * CODECHAL_CACHELINE_SIZE; // One CU has 16-byte, and there are 64 CU in one SB. But, each tile needs to be aliged to the cache line
4257         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4258         allocParamsForBufferLinear.dwBytes = size;
4259         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4260         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4261         allocParamsForBufferLinear.Format = Format_Buffer;
4262         allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
4263 
4264         eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4265             m_osInterface,
4266             &allocParamsForBufferLinear,
4267             &m_resPakcuLevelStreamoutData.sResource);
4268         CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4269 
4270         //PAK Slice Level Streamut Data. DW60-DW62 in HCP pipe buffer address command
4271         // one LCU has one cache line. Use CU as LCU during creation
4272         allocParamsForBufferLinear.dwBytes = size;
4273         allocParamsForBufferLinear.pBufName = "PAK Slice Level Streamout Data";
4274 
4275         eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4276             m_osInterface,
4277             &allocParamsForBufferLinear,
4278             &m_resPakSliceLevelStreamutData.sResource);
4279         CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4280 
4281         //HCP scalability Sync buffer
4282         size = CODECHAL_ENCODE_VP9_MAX_NUM_HCP_PIPE * CODECHAL_CACHELINE_SIZE;
4283         allocParamsForBufferLinear.dwBytes = size;
4284         allocParamsForBufferLinear.pBufName = "Hcp scalability Sync buffer ";
4285 
4286         eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4287             m_osInterface,
4288             &allocParamsForBufferLinear,
4289             &m_hcpScalabilitySyncBuffer.sResource);
4290         CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4291         m_hcpScalabilitySyncBuffer.dwSize = size;
4292 
4293         //HCP Tile Size Streamout Buffer. Use in HCP_IND_OBJ_CMD
4294         size = m_maxTileNumber * MOS_ALIGN_CEIL(m_hcpInterface->GetPakHWTileSizeRecordSize(), CODECHAL_CACHELINE_SIZE);
4295         allocParamsForBufferLinear.dwBytes = size;
4296         allocParamsForBufferLinear.pBufName = "HCP Tile Record Buffer";
4297 
4298         if (m_scalableMode && m_hucEnabled)
4299         {
4300             //Sizes of each buffer to be loaded into the region 0 as input and 1 loaded out as output.
4301 
4302             MOS_ZeroMemory(&m_statsSize, sizeof(StatsInfo));
4303             m_statsSize.tileSizeRecord = m_hcpInterface->GetPakHWTileSizeRecordSize();
4304             m_statsSize.vdencStats = m_brcStatsBufSize;  // VDEnc stats size
4305             m_statsSize.pakStats = m_brcPakStatsBufSize; // Frame stats size
4306             m_statsSize.counterBuffer = m_probabilityCounterBufferSize;
4307 
4308             // HUC Pak Int DMEM buffer
4309             allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucPakIntDmem), CODECHAL_CACHELINE_SIZE);
4310             allocParamsForBufferLinear.pBufName = "Huc Pak Int Dmem Buffer";
4311             for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
4312             {
4313                 for (auto j = 0; j < m_brcMaxNumPasses; j++)
4314                 {
4315                     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4316                         m_osInterface,
4317                         &allocParamsForBufferLinear,
4318                         &m_hucPakIntDmemBuffer[i][j]);
4319                     CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4320                 }
4321             }
4322 
4323             // HuC PAK Int Region 1 programming related stats
4324             MOS_ZeroMemory(&m_frameStatsOffset, sizeof(StatsInfo));
4325             m_frameStatsOffset.tileSizeRecord = 0;
4326             m_frameStatsOffset.vdencStats = MOS_ALIGN_CEIL((m_frameStatsOffset.tileSizeRecord + (m_maxTileNumber * m_statsSize.tileSizeRecord)), CODECHAL_PAGE_SIZE);
4327             m_frameStatsOffset.pakStats = MOS_ALIGN_CEIL((m_frameStatsOffset.vdencStats + m_statsSize.vdencStats), CODECHAL_PAGE_SIZE);
4328             m_frameStatsOffset.counterBuffer = MOS_ALIGN_CEIL((m_frameStatsOffset.pakStats + m_statsSize.pakStats), CODECHAL_PAGE_SIZE);
4329 
4330             // HuC PAK Int DMEM region 1 buffer allocation
4331             allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_frameStatsOffset.counterBuffer + m_statsSize.counterBuffer, CODECHAL_PAGE_SIZE);
4332             allocParamsForBufferLinear.pBufName = "PAK HUC Integrated Frame Stats Buffer";
4333             allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4334             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4335             allocParamsForBufferLinear.Format = Format_Buffer;
4336 
4337             m_frameStatsPakIntegrationBufferSize = allocParamsForBufferLinear.dwBytes;
4338 
4339             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
4340                 m_osInterface,
4341                 &allocParamsForBufferLinear,
4342                 &m_frameStatsPakIntegrationBuffer.sResource));
4343             m_frameStatsPakIntegrationBuffer.dwSize = allocParamsForBufferLinear.dwBytes;
4344 
4345             MOS_LOCK_PARAMS lockFlags;
4346             MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4347             lockFlags.WriteOnly = 1;
4348             uint8_t* data = nullptr;
4349 
4350             data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_frameStatsPakIntegrationBuffer.sResource, &lockFlags);
4351             MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
4352             m_osInterface->pfnUnlockResource(m_osInterface, &m_frameStatsPakIntegrationBuffer.sResource);
4353 
4354             // HuC PAK Int region 7, 8
4355             allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(64, CODECHAL_PAGE_SIZE);
4356             allocParamsForBufferLinear.pBufName = "HUC PAK Int Dummy Buffer";
4357 
4358             eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
4359                 m_osInterface,
4360                 &allocParamsForBufferLinear,
4361                 &m_hucPakIntDummyBuffer);
4362             CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4363 
4364             MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4365             lockFlags.WriteOnly = 1;
4366 
4367             data = (uint8_t*)m_osInterface->pfnLockResource(
4368                 m_osInterface,
4369                 &m_hucPakIntDummyBuffer,
4370                 &lockFlags);
4371 
4372             CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4373             MOS_ZeroMemory(
4374                 data,
4375                 allocParamsForBufferLinear.dwBytes);
4376             m_osInterface->pfnUnlockResource(m_osInterface, &m_hucPakIntDummyBuffer);
4377 
4378             // Allocate region 9 of pak integration to be fed as input to HUC BRC region 7
4379             MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4380             allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4381             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4382             allocParamsForBufferLinear.Format = Format_Buffer;
4383             allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE, CODECHAL_PAGE_SIZE);
4384             allocParamsForBufferLinear.pBufName = "GEN12 PAK Integration FrameByteCount output";
4385             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
4386                 m_osInterface,
4387                 &allocParamsForBufferLinear,
4388                 &m_hucPakIntBrcDataBuffer));
4389 
4390             MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4391             lockFlags.WriteOnly = 1;
4392             data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_hucPakIntBrcDataBuffer, &lockFlags);
4393             MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
4394             m_osInterface->pfnUnlockResource(m_osInterface, &m_hucPakIntBrcDataBuffer);
4395 
4396             // Allocate Semaphore memory for HUC to signal other pipe VDENC/PAK to continue
4397             MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4398             allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4399             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4400             allocParamsForBufferLinear.Format = Format_Buffer;
4401             allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
4402             allocParamsForBufferLinear.pBufName = "GEN12 HUC done Semaphore Memory";
4403 
4404             for (auto i = 0; i < m_numPipe; i++)
4405             {
4406                 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
4407                     m_osInterface,
4408                     &allocParamsForBufferLinear,
4409                     &m_hucDoneSemaphoreMem[i].sResource));
4410                 m_hucDoneSemaphoreMem[i].dwSize = allocParamsForBufferLinear.dwBytes;
4411             }
4412 
4413             // Allocate Semaphore memory for VDEnc/PAK on all pipes to signal stitch command to stop waiting
4414             MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4415             allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4416             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4417             allocParamsForBufferLinear.Format = Format_Buffer;
4418             allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
4419             allocParamsForBufferLinear.pBufName = "GEN12 VDEnc PAK done Semaphore Memory";
4420 
4421             for (auto i = 0; i < m_numPipe; i++)
4422             {
4423                 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
4424                     m_osInterface,
4425                     &allocParamsForBufferLinear,
4426                     &m_stitchWaitSemaphoreMem[i].sResource));
4427                 m_stitchWaitSemaphoreMem[i].dwSize = allocParamsForBufferLinear.dwBytes;
4428             }
4429 
4430             // Allocate semaphore memory for HUC HPU or BRC to wait on previous pass' PAK Integration command to finish
4431             MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4432             allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4433             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4434             allocParamsForBufferLinear.Format = Format_Buffer;
4435             allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
4436             allocParamsForBufferLinear.pBufName = "GEN12 VDEnc PAK Int done Semaphore Memory";
4437 
4438             CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
4439                 m_osInterface,
4440                 &allocParamsForBufferLinear,
4441                 &m_pakIntDoneSemaphoreMem.sResource));
4442             m_pakIntDoneSemaphoreMem.dwSize = allocParamsForBufferLinear.dwBytes;
4443         }
4444     }
4445 
4446     if (m_enableTileStitchByHW)
4447     {
4448         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
4449         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4450         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
4451         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
4452         allocParamsForBufferLinear.Format = Format_Buffer;
4453 
4454         for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
4455         {
4456             for (auto j = 0; j < CODECHAL_ENCODE_VP9_BRC_MAX_NUM_OF_PASSES; j++)
4457             {
4458                 // HuC stitching Data buffer
4459                 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE);
4460                 allocParamsForBufferLinear.pBufName = "VP9 HuC Stitch Data Buffer";
4461                 CODECHAL_ENCODE_CHK_STATUS_RETURN(
4462                     m_osInterface->pfnAllocateResource(
4463                         m_osInterface,
4464                         &allocParamsForBufferLinear,
4465                         &m_resHucStitchDataBuffer[i][j]));
4466                 MOS_LOCK_PARAMS lockFlagsWriteOnly;
4467                 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
4468                 lockFlagsWriteOnly.WriteOnly = 1;
4469                 uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource(
4470                     m_osInterface,
4471                     &m_resHucStitchDataBuffer[i][j],
4472                     &lockFlagsWriteOnly);
4473                 CODECHAL_ENCODE_CHK_NULL_RETURN(pData);
4474                 MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes);
4475                 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[i][j]);
4476             }
4477         }
4478         //Second level BB for huc stitching cmd
4479         MOS_ZeroMemory(&m_HucStitchCmdBatchBuffer, sizeof(m_HucStitchCmdBatchBuffer));
4480         m_HucStitchCmdBatchBuffer.bSecondLevel = true;
4481         CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
4482             m_osInterface,
4483             &m_HucStitchCmdBatchBuffer,
4484             nullptr,
4485             m_hwInterface->m_HucStitchCmdBatchBufferSize));
4486     }
4487 
4488     uint32_t aligned_width = MOS_ALIGN_CEIL(m_frameWidth, 64);
4489     uint32_t aligned_height = MOS_ALIGN_CEIL(m_frameHeight, 64);
4490     uint32_t num_lcu = (aligned_width * aligned_height) / (64 * 64);
4491 
4492     MOS_ALLOC_GFXRES_PARAMS allocParamsForSurface;
4493     MOS_ZeroMemory(&allocParamsForSurface, sizeof(MOS_ALLOC_GFXRES_PARAMS));
4494     allocParamsForSurface.Type = MOS_GFXRES_BUFFER;
4495     allocParamsForSurface.TileType = MOS_TILE_LINEAR;
4496     allocParamsForSurface.Format = Format_Buffer;
4497     allocParamsForSurface.dwBytes = num_lcu * 4;
4498     allocParamsForSurface.pBufName = "VDEnc Cumulative CU Count Streamout Surface";
4499 
4500     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
4501         m_osInterface,
4502         &allocParamsForSurface,
4503         &m_vdencCumulativeCuCountStreamoutSurface),
4504         "Failed to allocate VDEnc Cumulative CU Count Streamout Surface");
4505 
4506     return eStatus;
4507 }
4508 
FreeResources()4509 void CodechalVdencVp9StateG12::FreeResources()
4510 {
4511     CodechalVdencVp9State::FreeResources();
4512 
4513     MOS_FreeMemory(m_tileParams);
4514     if (m_isTilingSupported)
4515     {
4516         if (!Mos_ResourceIsNull(&m_vdencTileRowStoreBuffer))
4517         {
4518             m_osInterface->pfnFreeResource(
4519                 m_osInterface,
4520                 &m_vdencTileRowStoreBuffer);
4521         }
4522 
4523         if (!Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource))
4524         {
4525             m_osInterface->pfnFreeResource(
4526                 m_osInterface,
4527                 &m_resPakcuLevelStreamoutData.sResource);
4528         }
4529 
4530         if (!Mos_ResourceIsNull(&m_resPakSliceLevelStreamutData.sResource))
4531         {
4532             m_osInterface->pfnFreeResource(
4533                 m_osInterface,
4534                 &m_resPakSliceLevelStreamutData.sResource);
4535         }
4536 
4537         // Release Hcp scalability Sync buffer
4538         if (!Mos_ResourceIsNull(&m_hcpScalabilitySyncBuffer.sResource))
4539         {
4540             m_osInterface->pfnFreeResource(
4541                 m_osInterface,
4542                 &m_hcpScalabilitySyncBuffer.sResource);
4543         }
4544 
4545         for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileRecordBuffer); i++)
4546         {
4547             if (!Mos_ResourceIsNull(&m_tileRecordBuffer[i].sResource))
4548             {
4549                 m_osInterface->pfnFreeResource(
4550                     m_osInterface,
4551                     &m_tileRecordBuffer[i].sResource);
4552             }
4553         }
4554 
4555         for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileStatsPakIntegrationBuffer); i++)
4556         {
4557             if (!Mos_ResourceIsNull(&m_tileStatsPakIntegrationBuffer[i].sResource))
4558             {
4559                 m_osInterface->pfnFreeResource(
4560                     m_osInterface,
4561                     &m_tileStatsPakIntegrationBuffer[i].sResource);
4562             }
4563         }
4564 
4565         if (!Mos_ResourceIsNull(&m_frameStatsPakIntegrationBuffer.sResource))
4566         {
4567             m_osInterface->pfnFreeResource(
4568                 m_osInterface,
4569                 &m_frameStatsPakIntegrationBuffer.sResource);
4570         }
4571 
4572         if (!Mos_ResourceIsNull(&m_hucPakIntBrcDataBuffer))
4573         {
4574             m_osInterface->pfnFreeResource(
4575                 m_osInterface,
4576                 &m_hucPakIntBrcDataBuffer);
4577         }
4578 
4579         for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
4580         {
4581             for (auto j = 0; j < m_brcMaxNumPasses; j++)
4582             {
4583                 if (!Mos_ResourceIsNull(&m_hucPakIntDmemBuffer[i][j]))
4584                 {
4585                     m_osInterface->pfnFreeResource(
4586                         m_osInterface,
4587                         &m_hucPakIntDmemBuffer[i][j]);
4588                 }
4589             }
4590         }
4591 
4592         if (!Mos_ResourceIsNull(&m_hucPakIntDummyBuffer))
4593         {
4594             m_osInterface->pfnFreeResource(
4595                 m_osInterface,
4596                 &m_hucPakIntDummyBuffer);
4597         }
4598 
4599         for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_stitchWaitSemaphoreMem); i++)
4600         {
4601             if (!Mos_ResourceIsNull(&m_stitchWaitSemaphoreMem[i].sResource))
4602             {
4603                 m_osInterface->pfnFreeResource(
4604                     m_osInterface,
4605                     &m_stitchWaitSemaphoreMem[i].sResource);
4606             }
4607         }
4608 
4609         if (!Mos_ResourceIsNull(&m_pakIntDoneSemaphoreMem.sResource))
4610         {
4611             m_osInterface->pfnFreeResource(
4612                 m_osInterface,
4613                 &m_pakIntDoneSemaphoreMem.sResource);
4614         }
4615 
4616         for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_hucDoneSemaphoreMem); i++)
4617         {
4618             if (!Mos_ResourceIsNull(&m_hucDoneSemaphoreMem[i].sResource))
4619             {
4620                 m_osInterface->pfnFreeResource(
4621                     m_osInterface,
4622                     &m_hucDoneSemaphoreMem[i].sResource);
4623             }
4624         }
4625 
4626         for (auto i = 0; i < m_numUncompressedSurface; i++)
4627         {
4628             for (auto j = 0; j < CODECHAL_ENCODE_VP9_MAX_NUM_HCP_PIPE; j++)
4629             {
4630                 for (auto k = 0; k < 3; k++)
4631                 {
4632                     PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[i][j][k];
4633 
4634                     if (!Mos_ResourceIsNull(&cmdBuffer->OsResource))
4635                     {
4636                         if (cmdBuffer->pCmdBase)
4637                         {
4638                             m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
4639                         }
4640                         m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
4641                     }
4642                 }
4643             }
4644         }
4645     }
4646 
4647     if (m_enableTileStitchByHW)
4648     {
4649         for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
4650         {
4651             for (auto j = 0; j < CODECHAL_ENCODE_VP9_BRC_MAX_NUM_OF_PASSES; j++)
4652             {
4653                 // HuC stitching Data buffer
4654                 m_osInterface->pfnFreeResource(
4655                     m_osInterface,
4656                     &m_resHucStitchDataBuffer[i][j]);
4657             }
4658         }
4659         //Second level BB for huc stitching cmd
4660         Mhw_FreeBb(m_osInterface, &m_HucStitchCmdBatchBuffer, nullptr);
4661     }
4662 
4663     if (!Mos_ResourceIsNull(&m_vdencCumulativeCuCountStreamoutSurface))
4664     {
4665         m_osInterface->pfnFreeResource(
4666             m_osInterface,
4667             &m_vdencCumulativeCuCountStreamoutSurface);
4668     }
4669 
4670     return;
4671 }
4672 
SetRowstoreCachingOffsets()4673 MOS_STATUS CodechalVdencVp9StateG12::SetRowstoreCachingOffsets()
4674 {
4675     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4676     // Add row store cache support for VDENC Gen12.
4677     if (m_hwInterface->GetHcpInterface()->IsRowStoreCachingSupported())
4678     {
4679         //add row store cache support.
4680         MHW_VDBOX_ROWSTORE_PARAMS rowstoreParams;
4681         rowstoreParams.Mode             = m_mode;
4682         rowstoreParams.dwPicWidth       = m_frameWidth;
4683         rowstoreParams.ucChromaFormat   = ToHCPChromaFormat(m_chromaFormat);
4684         rowstoreParams.ucBitDepthMinus8 = m_bitDepth * 2;  // 0(8bit) -> 0, 1(10bit)->2, 2(12bit)->4
4685         m_hwInterface->SetRowstoreCachingOffsets(&rowstoreParams);
4686     }
4687     return eStatus;
4688 }
4689 
Initialize(CodechalSetting * settings)4690 MOS_STATUS CodechalVdencVp9StateG12::Initialize(CodechalSetting * settings)
4691 {
4692     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4693     uint32_t   maxRows = 1;
4694 
4695     CODECHAL_ENCODE_FUNCTION_ENTER;
4696 
4697     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::Initialize(settings));
4698 
4699     GetSystemPipeNumberCommon();
4700 
4701     if (MOS_VE_SUPPORTED(m_osInterface))
4702     {
4703         m_scalabilityState = (PCODECHAL_ENCODE_SCALABILITY_STATE)MOS_AllocAndZeroMemory(sizeof(CODECHAL_ENCODE_SCALABILITY_STATE));
4704         CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalabilityState);
4705         //scalability initialize
4706         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_InitializeState(m_scalabilityState, m_hwInterface));
4707     }
4708 
4709     m_adaptiveRepakSupported = true;
4710     //This flag enables pak-only mode for RePak pass
4711     m_pakOnlyModeEnabledForLastPass = true;
4712 
4713     maxRows = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_HEIGHT) / CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_HEIGHT;
4714     //Max num of rows = 4 by VP9 Spec
4715     maxRows = MOS_MIN(maxRows, 4);
4716 
4717     //Max tile numbers = max of number tiles for single pipe or max muber of tiles for scalable pipes
4718     m_maxTileNumber = MOS_MAX((MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH) / CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH), m_numVdbox) * maxRows;
4719 
4720     m_numPipe = m_numVdbox;
4721 
4722     m_scalableMode = (m_numPipe > 1);
4723 
4724     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRowstoreCachingOffsets());
4725 
4726     MOS_USER_FEATURE_VALUE_DATA userFeatureData;
4727 
4728     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4729     MOS_STATUS eStatusKey = MOS_UserFeature_ReadValue_ID(
4730         nullptr,
4731         __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ENABLE_HW_STITCH,
4732         &userFeatureData,
4733         m_osInterface->pOsContext);
4734     m_enableTileStitchByHW = userFeatureData.i32Data ? true : false;
4735 
4736     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4737     userFeatureData.i32Data = 1;
4738     userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
4739     MOS_UserFeature_ReadValue_ID(
4740         nullptr,
4741         __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_HUC_ENABLE_ID,
4742         &userFeatureData,
4743         m_osInterface->pOsContext);
4744     m_hucEnabled = (userFeatureData.i32Data) ? true : false;
4745 
4746     //Enable single pass dynamic scaling by default
4747     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4748     userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
4749     userFeatureData.i32Data = 1;
4750     MOS_UserFeature_ReadValue_ID(
4751         nullptr,
4752         __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_SINGLE_PASS_DYS_ENABLE_ID,
4753         &userFeatureData,
4754         m_osInterface->pOsContext);
4755     m_dysVdencMultiPassEnabled = (userFeatureData.i32Data) ? false : true;
4756     m_singlePassDys = !m_dysVdencMultiPassEnabled;
4757 
4758     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4759     userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
4760     userFeatureData.i32Data = 1;
4761     MOS_UserFeature_ReadValue_ID(
4762         nullptr,
4763         __MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID,
4764         &userFeatureData,
4765         m_osInterface->pOsContext);
4766     m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false;
4767     m_singleTaskPhaseSupportedInPak = m_singleTaskPhaseSupported;
4768     // For dynamic scaling, the SingleTaskPhaseSupported is set to true and it does not get restored
4769     // to the original value after encoding of the frame. So need to restore to the original state
4770     m_storeSingleTaskPhaseSupported = m_singleTaskPhaseSupported; //Save the SingleTaskPhase state here
4771 
4772    // Multi-Pass BRC: currently disabled by default
4773     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4774     MOS_UserFeature_ReadValue_ID(
4775         nullptr,
4776         __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_MULTIPASS_BRC_ENABLE_ID,
4777         &userFeatureData,
4778         m_osInterface->pOsContext);
4779     m_multipassBrcSupported = (userFeatureData.i32Data) ? true : false;
4780 
4781     m_vdencBrcStatsBufferSize = m_brcStatsBufSize;
4782     m_vdencBrcPakStatsBufferSize = m_brcPakStatsBufSize;
4783     m_brcHistoryBufferSize = m_brcHistoryBufSize;
4784     // HME enabled by default for VP9
4785     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4786     MOS_UserFeature_ReadValue_ID(
4787         NULL,
4788         __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ME_ENABLE_ID,
4789         &userFeatureData,
4790         m_osInterface->pOsContext);
4791     m_hmeSupported = (userFeatureData.i32Data) ? true : false;
4792 
4793     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4794     MOS_UserFeature_ReadValue_ID(
4795         NULL,
4796         __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_16xME_ENABLE_ID,
4797         &userFeatureData,
4798         m_osInterface->pOsContext);
4799     m_16xMeSupported = (userFeatureData.i32Data) ? true : false;
4800 
4801     // disable superHME when HME is disabled
4802     if (m_hmeSupported == false)
4803     {
4804         m_16xMeSupported = false;
4805     }
4806 
4807     // UHME disabled
4808     m_32xMeSupported = false;
4809 
4810     // VP9 uses a different streamin kernel
4811     m_useNonLegacyStreamin = true;
4812 
4813     // Initialize kernel State
4814     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStates());
4815 
4816     // Get max binding table count
4817     m_maxBtCount = GetMaxBtCount();    // Need to add the correct BTcount when HME is enabled
4818 
4819 #if (_DEBUG || _RELEASE_INTERNAL)
4820     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4821     MOS_UserFeature_ReadValue_ID(
4822         nullptr,
4823         __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ENABLE_BRC_DLL,
4824         &userFeatureData,
4825         m_osInterface->pOsContext);
4826 
4827     if (userFeatureData.i32Data)
4828     {
4829         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4830         MOS_UserFeature_ReadValue_ID(
4831             nullptr,
4832             __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ENABLE_BRC_DLL_CUSTOMPATH,
4833             &userFeatureData,
4834             m_osInterface->pOsContext);
4835 
4836         if (!userFeatureData.i32Data)
4837         {
4838             CODECHAL_ENCODE_CHK_STATUS_RETURN(MosUtilities::MosLoadLibrary(VP9SWBRCLIB, &m_swBrcMode));  // Load Dependency (use on RS1)
4839         }
4840         else
4841         {
4842             char path_buffer[MAXPATH];
4843             MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
4844             MOS_ZeroMemory(path_buffer, MAXPATH);
4845             userFeatureData.StringData.pStringData = path_buffer;
4846             MOS_UserFeature_ReadValue_ID(
4847                 nullptr,
4848                 __MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_BRC_DLL_PATH,
4849                 &userFeatureData,
4850                 m_osInterface->pOsContext);
4851             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnLoadLibrary(m_osInterface, path_buffer, &m_swBrcMode));
4852         }
4853     }
4854 #endif  // (_DEBUG || _RELEASE_INTERNAL)
4855 
4856     return eStatus;
4857 }
4858 
4859 /*----------------------------------------------------------------------------
4860 | Name      : GetSegmentBlockIndexInFrame
4861 | Purpose   : Returns the offset of 32x32 block in the frame based on current x,y 32 block location in current tile
4862 |
4863 | Returns   : MOS_STATUS
4864 \---------------------------------------------------------------------------*/
GetSegmentBlockIndexInFrame(uint32_t frameWidth,uint32_t curr32XInTile,uint32_t curr32YInTile,uint32_t currTileStartY64aligned,uint32_t currTileStartX64aligned)4865 uint32_t CodechalVdencVp9StateG12::GetSegmentBlockIndexInFrame(
4866     uint32_t frameWidth,
4867     uint32_t curr32XInTile,
4868     uint32_t curr32YInTile,
4869     uint32_t currTileStartY64aligned,
4870     uint32_t currTileStartX64aligned)
4871 {
4872     uint32_t frameWidthIn32 = MOS_ALIGN_CEIL(frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
4873     uint32_t curr32XInFrame = currTileStartX64aligned / 32 + curr32XInTile;
4874     uint32_t curr32YInFrame = currTileStartY64aligned / 32 + curr32YInTile;
4875     uint32_t curr32BlockInFrame = curr32YInFrame * frameWidthIn32 + curr32XInFrame;
4876     return curr32BlockInFrame;
4877 }
4878 
4879 /*----------------------------------------------------------------------------
4880 | Name      : InitZigZagToRasterLUTPerTile
4881 | Purpose   : Rasterize a tile's 32 blocks' segmap indices, add to frame mapbuffer created for these indices
4882 |
4883 | Returns   : MOS_STATUS
4884 \---------------------------------------------------------------------------*/
InitZigZagToRasterLUTPerTile(uint32_t tileHeight,uint32_t tileWidth,uint32_t currTileStartYInFrame,uint32_t currTileStartXInFrame)4885 MOS_STATUS CodechalVdencVp9StateG12::InitZigZagToRasterLUTPerTile(
4886     uint32_t tileHeight,
4887     uint32_t tileWidth,
4888     uint32_t currTileStartYInFrame,
4889     uint32_t currTileStartXInFrame)
4890 {
4891     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
4892 
4893     // Allocate space for zig-zag to raster LUT used for vdenc streamin (1 int32_t for every 32x32 block (pic 64 aligned))
4894     // We only do this when the 1st tile of new frame is being processed and keep it the same unless tile resolutions changed.
4895     // We keep this map around until sequence is finished, it's deleted at device destruction.
4896     if (currTileStartXInFrame == 0 && currTileStartYInFrame == 0)
4897     {
4898         if (m_mapBuffer) // free previous if it exists - it may exist if this isn't first seg streamin frame, but it's a new tile with different res
4899         {
4900             MOS_FreeMemory(m_mapBuffer);
4901         }
4902         // Allocate one integer space for each 32*32 block in the whole frame to hold the segmentation index.
4903         m_mapBuffer = (uint32_t*)MOS_AllocAndZeroMemory(
4904             (MOS_ALIGN_CEIL(m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32) *
4905             (MOS_ALIGN_CEIL(m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32) *
4906             sizeof(int32_t)); //Framewidth and height are 64 aligned already
4907     }
4908     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mapBuffer);
4909 
4910     uint32_t align64Width32 = MOS_ALIGN_CEIL(tileWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
4911     uint32_t align64Height32 = MOS_ALIGN_CEIL(tileHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32;
4912     uint32_t* mapBufferZigZagPerTile = (uint32_t*)MOS_AllocAndZeroMemory(align64Width32*align64Height32 * sizeof(uint32_t));
4913     CODECHAL_ENCODE_CHK_NULL_RETURN(mapBufferZigZagPerTile);
4914 
4915     m_segStreamInHeight = m_frameHeight;
4916     m_segStreamInWidth = m_frameWidth;
4917 
4918     uint32_t dwCount32 = 0; //Number of 32 by 32 blocks that will be processed here
4919     for (uint32_t curr32YInTile = 0; curr32YInTile< align64Height32; curr32YInTile++)
4920     {
4921         for (uint32_t curr32XInTile = 0; curr32XInTile < align64Width32; curr32XInTile++)
4922         {
4923             mapBufferZigZagPerTile[dwCount32++] = GetSegmentBlockIndexInFrame(
4924                 m_frameWidth,
4925                 curr32XInTile,
4926                 curr32YInTile,
4927                 currTileStartYInFrame,
4928                 currTileStartXInFrame);
4929         }
4930     }
4931 
4932     //    mapBufferZigZagPerTile --->   m_mapBuffer
4933     //  | a b c d ...               ---> | a b W X c d Y Z ....
4934     //  | W X Y Z ...
4935     uint32_t num32blocks = align64Width32 * align64Height32;
4936     uint32_t tileOffsetIndex = m_32BlocksRasterized;
4937     for (uint32_t i = 0, dwRasterCount = 0; i < num32blocks; i += (align64Width32 * 2))
4938     {
4939         for (uint32_t j = i; j < i + (align64Width32 * 2); j += 4)
4940         {
4941             m_mapBuffer[j + tileOffsetIndex] = mapBufferZigZagPerTile[dwRasterCount++];
4942             m_mapBuffer[j + tileOffsetIndex + 1] = mapBufferZigZagPerTile[dwRasterCount++];
4943         }
4944         for (uint32_t j = i + 2; j < i + (align64Width32 * 2); j += 4)
4945         {
4946             m_mapBuffer[j + tileOffsetIndex] = mapBufferZigZagPerTile[dwRasterCount++];
4947             m_mapBuffer[j + tileOffsetIndex + 1] = mapBufferZigZagPerTile[dwRasterCount++];
4948         }
4949     }
4950     if (mapBufferZigZagPerTile) // free per tile map buffer as it has been rasterized and copied into the mapbuffer
4951     {
4952         MOS_FreeMemory(mapBufferZigZagPerTile);
4953     }
4954 
4955     // ^ Zig-zag pattern filled to SB aligned (CEIL), if unaligned then we base seg ID address on previous row/column (data replication)
4956     uint32_t width32 = CODECHAL_GET_WIDTH_IN_BLOCKS(tileWidth, 32);
4957     if (width32 != align64Width32) // replicate last column
4958     {
4959         for (auto i = (align64Width32 * 2) - 1 - 2; i < num32blocks; i += (align64Width32 * 2))
4960         {
4961             m_mapBuffer[i + tileOffsetIndex] = m_mapBuffer[i + tileOffsetIndex - 1];
4962             m_mapBuffer[i + tileOffsetIndex + 2] = m_mapBuffer[i + tileOffsetIndex + 1];
4963         }
4964     }
4965 
4966     uint32_t height32 = CODECHAL_GET_HEIGHT_IN_BLOCKS(tileHeight, 32);
4967     if (height32 != align64Height32) // replicate last row
4968     {
4969         for (auto i = num32blocks - (align64Width32 * 2) + 2; i < num32blocks; i += 4)
4970         {
4971             m_mapBuffer[i + tileOffsetIndex] = m_mapBuffer[i + tileOffsetIndex - 2];
4972             m_mapBuffer[i + tileOffsetIndex + 1] = m_mapBuffer[i + tileOffsetIndex + 1 - 2];
4973         }
4974     }
4975     //Index offset to be added to the buffer for the next tile depending on how many blocks were rasterized already in this tile
4976     m_32BlocksRasterized += dwCount32;
4977 
4978     return eStatus;
4979 }
4980 
CalculateVdencPictureStateCommandSize()4981 MOS_STATUS CodechalVdencVp9StateG12::CalculateVdencPictureStateCommandSize()
4982 {
4983     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4984 
4985     CODECHAL_ENCODE_FUNCTION_ENTER;
4986 
4987     MHW_VDBOX_STATE_CMDSIZE_PARAMS_G12 stateCmdSizeParams;
4988     uint32_t vdencPictureStatesSize = 0, vdencPicturePatchListSize = 0;
4989     stateCmdSizeParams.bHucDummyStream = true;
4990     m_hwInterface->GetHxxStateCommandSize(
4991         CODECHAL_ENCODE_MODE_VP9,
4992         &vdencPictureStatesSize,
4993         &vdencPicturePatchListSize,
4994         &stateCmdSizeParams);
4995 
4996     m_defaultPictureStatesSize += vdencPictureStatesSize;
4997     m_defaultPicturePatchListSize += vdencPicturePatchListSize;
4998 
4999     m_hwInterface->GetVdencStateCommandsDataSize(
5000         CODECHAL_ENCODE_MODE_VP9,
5001         &vdencPictureStatesSize,
5002         &vdencPicturePatchListSize);
5003 
5004     m_defaultPictureStatesSize += vdencPictureStatesSize;
5005     m_defaultPicturePatchListSize += vdencPicturePatchListSize;
5006 
5007     return eStatus;
5008 }
5009 
CreateHcpPipeBufAddrParams(PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS pipeBufAddrParams)5010 PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS CodechalVdencVp9StateG12::CreateHcpPipeBufAddrParams(PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS pipeBufAddrParams)
5011 {
5012     pipeBufAddrParams = MOS_New(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS_G12);
5013 
5014     return pipeBufAddrParams;
5015 }
5016 
UpdateCmdBufAttribute(PMOS_COMMAND_BUFFER cmdBuffer,bool renderEngineInUse)5017 MOS_STATUS CodechalVdencVp9StateG12::UpdateCmdBufAttribute(
5018     PMOS_COMMAND_BUFFER cmdBuffer,
5019     bool                renderEngineInUse)
5020 {
5021     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5022 
5023     // should not be there. Will remove it in the next change
5024     CODECHAL_ENCODE_FUNCTION_ENTER;
5025     if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe)
5026     {
5027         PMOS_CMD_BUF_ATTRI_VE attriExt =
5028             (PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe);
5029 
5030         memset(attriExt, 0, sizeof(MOS_CMD_BUF_ATTRI_VE));
5031         attriExt->bUseVirtualEngineHint =
5032             attriExt->VEngineHintParams.NeedSyncWithPrevious = !renderEngineInUse;
5033     }
5034 
5035     return eStatus;
5036 }
5037 
AddMediaVfeCmd(PMOS_COMMAND_BUFFER cmdBuffer,SendKernelCmdsParams * params)5038 MOS_STATUS CodechalVdencVp9StateG12::AddMediaVfeCmd(
5039     PMOS_COMMAND_BUFFER cmdBuffer,
5040     SendKernelCmdsParams *params)
5041 {
5042     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
5043 
5044     MHW_VFE_PARAMS_G12 vfeParams = {};
5045     vfeParams.pKernelState              = params->pKernelState;
5046     vfeParams.eVfeSliceDisable          = MHW_VFE_SLICE_ALL;
5047     vfeParams.dwMaximumNumberofThreads  = m_encodeVfeMaxThreads;
5048     vfeParams.bFusedEuDispatch          = false; // legacy mode
5049 
5050     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaVfeCmd(cmdBuffer, &vfeParams));
5051 
5052     return MOS_STATUS_SUCCESS;
5053 }
5054 
HuCVp9PakInt(PMOS_COMMAND_BUFFER cmdBuffer)5055 MOS_STATUS CodechalVdencVp9StateG12::HuCVp9PakInt(
5056     PMOS_COMMAND_BUFFER cmdBuffer)
5057 {
5058     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5059 
5060     CODECHAL_ENCODE_FUNCTION_ENTER;
5061 
5062     if (!IsFirstPipe())
5063     {
5064         return eStatus;
5065     }
5066 
5067     CODECHAL_DEBUG_TOOL(
5068     uint32_t    hucRegionSize[16] = { 0 };
5069     const char* hucRegionName[16] = { "\0" };
5070 
5071     hucRegionName[0] = "_MultiPakStreamout_input";
5072     hucRegionSize[0] = m_tileStatsPakIntegrationBufferSize;
5073     hucRegionName[1] = "_IntegratedStreamout_output";
5074     hucRegionSize[1] = m_frameStatsPakIntegrationBufferSize;
5075     hucRegionName[4] = "_HCPPICSTATEInputDummy";
5076     hucRegionSize[4] = sizeof(m_hucPakIntDummyBuffer);
5077     hucRegionName[5] = "_HCPPICSTATEInputDummy";
5078     hucRegionSize[5] = sizeof(m_hucPakIntDummyBuffer);
5079     hucRegionName[6] = "_HCPPICSTATEInputDummy";
5080     hucRegionSize[6] = sizeof(m_hucPakIntDummyBuffer);
5081     hucRegionName[7] = "_HCPPICSTATEInputDummy";
5082     hucRegionSize[7] = sizeof(m_hucPakIntDummyBuffer);
5083     hucRegionName[8] = "_HucStitchDataBuffer";
5084     hucRegionSize[8] = MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE);
5085     hucRegionName[9] = "_BrcDataOutputBuffer"; // This is the pak MMIO region 7 , not 4, of BRC update
5086     hucRegionSize[9] = MOS_ALIGN_CEIL(CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE, CODECHAL_PAGE_SIZE);
5087     hucRegionName[15] = "_TileRecordBuffer";
5088     hucRegionSize[15] = m_maxTileNumber * MOS_ALIGN_CEIL(m_hcpInterface->GetPakHWTileSizeRecordSize(), CODECHAL_CACHELINE_SIZE);
5089     )
5090 
5091     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
5092     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
5093     imemParams.dwKernelDescriptor = m_vdboxHucPakIntegrationKernelDescriptor;
5094     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(cmdBuffer, &imemParams));
5095 
5096     // pipe mode select
5097     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
5098     pipeModeSelectParams.Mode = m_mode;
5099     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
5100 
5101     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakInt());
5102 
5103     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
5104     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
5105     dmemParams.presHucDataSource = &m_hucPakIntDmemBuffer[m_currRecycledBufIdx][GetCurrentPass()];
5106     dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucPakIntDmem), CODECHAL_CACHELINE_SIZE);
5107     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
5108     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(cmdBuffer, &dmemParams));
5109 
5110     if (m_enableTileStitchByHW)
5111     {
5112         CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
5113     }
5114 
5115     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
5116     MOS_ZeroMemory(&virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
5117     virtualAddrParams.regionParams[0].presRegion = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource;             // Region 0 - Tile based input statistics from PAK/ VDEnc
5118     virtualAddrParams.regionParams[0].dwOffset = 0;
5119     virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;      // Region 1 - HuC Frame statistics output
5120     virtualAddrParams.regionParams[1].isWritable = true;
5121     virtualAddrParams.regionParams[4].presRegion = &m_hucPakIntDummyBuffer;             // Region 4 - Not used for VP9
5122     virtualAddrParams.regionParams[5].presRegion = &m_hucPakIntDummyBuffer;             // Region 5 - Not used for VP9
5123     virtualAddrParams.regionParams[5].isWritable = true;
5124     virtualAddrParams.regionParams[6].presRegion = &m_hucPakIntDummyBuffer;              // Region 6 - Not used for VP9
5125     virtualAddrParams.regionParams[6].isWritable = true;
5126     virtualAddrParams.regionParams[7].presRegion = &m_hucPakIntDummyBuffer;             // Region 7 - Not used for VP9
5127     if (m_enableTileStitchByHW)
5128     {
5129         virtualAddrParams.regionParams[8].presRegion  = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][GetCurrentPass()];  // Region 8 - data buffer read by HUC for stitching cmd generation
5130         virtualAddrParams.regionParams[8].isWritable = true;
5131     }
5132     virtualAddrParams.regionParams[9].presRegion = &m_hucPakIntBrcDataBuffer;              // Region 9 - HuC outputs BRC data
5133     virtualAddrParams.regionParams[9].isWritable = true;
5134     if (m_enableTileStitchByHW)
5135     {
5136         virtualAddrParams.regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource;                         // Region 10 - SLB for stitching cmd output from Huc
5137         virtualAddrParams.regionParams[10].isWritable = true;
5138     }
5139     virtualAddrParams.regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource;          // Region 15 [In/Out] - Tile Record Buffer
5140     virtualAddrParams.regionParams[15].dwOffset   = 0;
5141 
5142     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(cmdBuffer, &virtualAddrParams));
5143 
5144     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(cmdBuffer));
5145 
5146     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(cmdBuffer, true));
5147 
5148     // wait Huc completion (use HEVC bit for now)
5149     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
5150     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
5151     vdPipeFlushParams.Flags.bFlushHEVC = 1;
5152     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
5153     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams));
5154 
5155     // Flush the engine to ensure memory written out
5156     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
5157     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
5158     flushDwParams.bVideoPipelineCacheInvalidate = true;
5159     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
5160 
5161     auto mmioRegisters = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1);
5162     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, cmdBuffer, false));
5163     CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(cmdBuffer));
5164 
5165     CODECHAL_DEBUG_TOOL(
5166     // Dump input Pak Integration buffers before running HuC
5167     m_debugInterface->DumpHucRegion(
5168         virtualAddrParams.regionParams[0].presRegion,
5169         0,
5170         hucRegionSize[0],
5171         0,
5172         "_PakIntStitchBuffer",
5173         (virtualAddrParams.regionParams[0].isWritable ? true : false),
5174         GetCurrentPass(),
5175         CodechalHucRegionDumpType::hucRegionDumpPakIntegrate);
5176 
5177     m_debugInterface->DumpHucDmem(
5178         &m_hucPakIntDmemBuffer[m_currRecycledBufIdx][GetCurrentPass()],
5179         sizeof(HucPakIntDmem),
5180         GetCurrentPass(),
5181         CodechalHucRegionDumpType::hucRegionDumpPakIntegrate);
5182 
5183     for (auto i = 0; i < 16; i++)
5184     {
5185         if (virtualAddrParams.regionParams[i].presRegion)
5186         {
5187             if (m_scalableMode && m_isTilingSupported && virtualAddrParams.regionParams[i].isWritable && i != 11)
5188             {
5189                 continue;
5190             }
5191             m_debugInterface->DumpHucRegion(
5192                 virtualAddrParams.regionParams[i].presRegion,
5193                 virtualAddrParams.regionParams[i].dwOffset,
5194                 hucRegionSize[i],
5195                 i,
5196                 hucRegionName[i],
5197                 !virtualAddrParams.regionParams[i].isWritable,
5198                 GetCurrentPass(),
5199                 CodechalHucRegionDumpType::hucRegionDumpPakIntegrate);
5200         }
5201     }
5202     )
5203 
5204     return eStatus;
5205 }
5206 
ConstructPicStateBatchBuf(PMOS_RESOURCE picStateBuffer)5207 MOS_STATUS CodechalVdencVp9StateG12::ConstructPicStateBatchBuf(
5208     PMOS_RESOURCE picStateBuffer)
5209 {
5210     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5211 
5212     CODECHAL_ENCODE_FUNCTION_ENTER;
5213 
5214     CODECHAL_ENCODE_CHK_NULL_RETURN(picStateBuffer);
5215 
5216     MOS_COMMAND_BUFFER cmdBuffer;
5217     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
5218 
5219     if (!m_singleTaskPhaseSupported || m_firstTaskInPhase)
5220     {
5221         // Send command buffer header at the beginning (OS dependent)
5222         bool requestFrameTracking = false;
5223         if (!m_vp9PicParams->PicFlags.fields.super_frame) {
5224             requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
5225         }
5226         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
5227         m_firstTaskInPhase = false;
5228     }
5229 
5230     ReturnCommandBuffer(&cmdBuffer);
5231 
5232     MOS_LOCK_PARAMS lockFlagsWriteOnly;
5233     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
5234     lockFlagsWriteOnly.WriteOnly = 1;
5235     uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, picStateBuffer, &lockFlagsWriteOnly);
5236     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
5237 
5238     MOS_COMMAND_BUFFER constructedCmdBuf;
5239     MOS_ZeroMemory(&constructedCmdBuf, sizeof(constructedCmdBuf));
5240     constructedCmdBuf.pCmdBase = (uint32_t *)data;
5241     constructedCmdBuf.pCmdPtr = (uint32_t *)data;
5242     constructedCmdBuf.iOffset = 0;
5243     constructedCmdBuf.iRemaining = m_vdencPicStateSecondLevelBatchBufferSize;
5244 
5245     eStatus = AddCommandsVp9(CODECHAL_CMD1, &constructedCmdBuf);
5246     if (eStatus != MOS_STATUS_SUCCESS)
5247     {
5248         m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
5249         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add CODECHAL_CMD1 command.");
5250         return eStatus;
5251     }
5252 
5253     // HCP_VP9_PIC_STATE
5254     MHW_VDBOX_VP9_ENCODE_PIC_STATE picState;
5255     MOS_ZeroMemory(&picState, sizeof(picState));
5256     picState.pVp9PicParams                    = m_vp9PicParams;
5257     picState.pVp9SeqParams                    = m_vp9SeqParams;
5258     picState.ppVp9RefList                     = &(m_refList[0]);
5259     picState.PrevFrameParams.fields.KeyFrame  = m_prevFrameInfo.KeyFrame;
5260     picState.PrevFrameParams.fields.IntraOnly = m_prevFrameInfo.IntraOnly;
5261     picState.PrevFrameParams.fields.Display   = m_prevFrameInfo.ShowFrame;
5262     picState.dwPrevFrmWidth                   = m_prevFrameInfo.FrameWidth;
5263     picState.dwPrevFrmHeight                  = m_prevFrameInfo.FrameHeight;
5264     picState.ucTxMode = m_txMode;
5265     picState.bSSEEnable = m_vdencBrcEnabled;
5266     picState.bUseDysRefSurface = (m_dysRefFrameFlags != DYS_REF_NONE) && m_dysVdencMultiPassEnabled;
5267     picState.bVdencPakOnlyPassFlag = m_vdencPakonlyMultipassEnabled;
5268     picState.uiMaxBitRate                     = m_vp9SeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS;
5269     picState.uiMinBitRate                     = m_vp9SeqParams->MinBitRate * CODECHAL_ENCODE_BRC_KBPS;
5270     m_hucPicStateOffset = (uint16_t)constructedCmdBuf.iOffset;
5271 
5272     eStatus = m_hcpInterface->AddHcpVp9PicStateEncCmd(&constructedCmdBuf, nullptr, &picState);
5273     if (eStatus != MOS_STATUS_SUCCESS)
5274     {
5275         m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
5276         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add HCP_VP9_PIC_STATE command.");
5277         return eStatus;
5278     }
5279 
5280     // HCP_VP9_SEGMENT_STATE
5281     MHW_VDBOX_VP9_SEGMENT_STATE segmentState;
5282     MOS_ZeroMemory(&segmentState, sizeof(segmentState));
5283     segmentState.Mode = m_mode;
5284     segmentState.pVp9EncodeSegmentParams = m_vp9SegmentParams;
5285     uint8_t segmentCount                 = (m_vp9PicParams->PicFlags.fields.segmentation_enabled) ? CODEC_VP9_MAX_SEGMENTS : 1;
5286 
5287     for (uint8_t i = 0; i < segmentCount; i++)
5288     {
5289         segmentState.ucCurrentSegmentId = i;
5290         eStatus = m_hcpInterface->AddHcpVp9SegmentStateCmd(&constructedCmdBuf, nullptr, &segmentState);
5291         if (eStatus != MOS_STATUS_SUCCESS)
5292         {
5293             m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
5294             CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add MHW_VDBOX_VP9_SEGMENT_STATE command.");
5295             return eStatus;
5296         }
5297     }
5298 
5299     // Adjust cmd buffer offset to have 8 segment state blocks
5300     if (segmentCount < CODEC_VP9_MAX_SEGMENTS)
5301     {
5302         // Max 7 segments, 32 bytes each
5303         uint8_t zeroBlock[m_segmentStateBlockSize * (CODEC_VP9_MAX_SEGMENTS - 1)];
5304         MOS_ZeroMemory(zeroBlock, sizeof(zeroBlock));
5305         Mhw_AddCommandCmdOrBB(m_osInterface, &constructedCmdBuf, nullptr, zeroBlock, (CODEC_VP9_MAX_SEGMENTS - segmentCount) * m_segmentStateBlockSize);
5306     }
5307 
5308     m_slbbImgStateOffset = (uint16_t)constructedCmdBuf.iOffset;
5309     eStatus = AddCommandsVp9(CODECHAL_CMD2, &constructedCmdBuf);
5310     if (eStatus != MOS_STATUS_SUCCESS)
5311     {
5312         m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
5313         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add CODECHAL_CMD2 command.");
5314         return eStatus;
5315     }
5316 
5317     // BB_END
5318     eStatus = m_miInterface->AddMiBatchBufferEnd(&constructedCmdBuf, nullptr);
5319     if (eStatus != MOS_STATUS_SUCCESS)
5320     {
5321         m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
5322         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add MI Batch Buffer End command.");
5323         return eStatus;
5324     }
5325     m_hucSlbbSize = (uint16_t)constructedCmdBuf.iOffset;
5326 
5327     m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
5328 
5329     return eStatus;
5330 }
5331 
HuCVp9Prob()5332 MOS_STATUS CodechalVdencVp9StateG12::HuCVp9Prob()
5333 {
5334     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5335 
5336     CODECHAL_ENCODE_FUNCTION_ENTER;
5337     if (!IsFirstPipe())
5338     {
5339         return eStatus;
5340     }
5341 
5342     CODECHAL_DEBUG_TOOL(
5343         uint32_t hucRegionSize[16] = { 0 };
5344     const char* hucRegionName[16] = { "\0" };
5345 
5346     hucRegionName[0] = "_UpdatedProbBuffer";   // hucRegionName[0] is used to dump region 0 after HuC is run, which has updated probabilities. Input Region 0 is dumped separetely before HuC.
5347     hucRegionSize[0] = 32 * CODECHAL_CACHELINE_SIZE;
5348     hucRegionName[1] = "_CountersBuffer";
5349     hucRegionSize[1] = 193 * CODECHAL_CACHELINE_SIZE;
5350     hucRegionName[2] = "_ProbBuffer";
5351     hucRegionSize[2] = 32 * CODECHAL_CACHELINE_SIZE;
5352     hucRegionName[3] = "_ProbDeltaBuffer";
5353     hucRegionSize[3] = 29 * CODECHAL_CACHELINE_SIZE;
5354     hucRegionName[4] = "_UncompressedHdr";
5355     hucRegionSize[4] = CODECHAL_ENCODE_VP9_PAK_INSERT_UNCOMPRESSED_HEADER;
5356     hucRegionName[5] = "_CompressedHdr";
5357     hucRegionSize[5] = 32 * CODECHAL_CACHELINE_SIZE;
5358     hucRegionName[6] = "_SecondLevelBatchBuffer";
5359     hucRegionSize[6] = m_vdencPicStateSecondLevelBatchBufferSize;
5360     hucRegionName[7] = "_SecondLevelBatchBuffer";
5361     hucRegionSize[7] = m_vdencPicStateSecondLevelBatchBufferSize;
5362     hucRegionName[8] = "_UncompressedHdr";
5363     hucRegionSize[8] = CODECHAL_ENCODE_VP9_PAK_INSERT_UNCOMPRESSED_HEADER;
5364     hucRegionName[9] = "_DefaultProbs";
5365     hucRegionSize[9] = sizeof(Keyframe_Default_Probs) + sizeof(Inter_Default_Probs);
5366     hucRegionName[10] = "_SuperFrameBuffer";
5367     hucRegionSize[10] = CODECHAL_ENCODE_VP9_BRC_SUPER_FRAME_BUFFER_SIZE;
5368     hucRegionName[11] = "_DataExtension";
5369     hucRegionSize[11] = CODECHAL_ENCODE_VP9_VDENC_DATA_EXTENSION_SIZE;
5370     )
5371 
5372     MOS_COMMAND_BUFFER cmdBuffer;
5373     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
5374 
5375     if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && !m_scalableMode)
5376     {
5377         bool requestFrameTracking = false;
5378         // Send command buffer header at the beginning (OS dependent)
5379         // frame tracking tag is only added in the last command buffer header
5380         requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
5381         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
5382         m_firstTaskInPhase = false;
5383     }
5384     // Collect of HuC BRC Update kernel performance data
5385     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
5386 
5387     int currPass = GetCurrentPass();
5388     if (m_scalableMode && m_isTilingSupported)
5389     {
5390         // Define huc done semaphore to be empty at the start
5391         for (auto i = 0; i < m_numPipe; i++)
5392         {
5393             SetSemaphoreMem(&m_hucDoneSemaphoreMem[i].sResource, &cmdBuffer, 0);
5394         }
5395         // Wait here for pak int done from previous pass
5396         if (IsLastPass())
5397         {
5398             SendHWWaitCommand(&m_pakIntDoneSemaphoreMem.sResource, &cmdBuffer, currPass);
5399             SetSemaphoreMem(&m_pakIntDoneSemaphoreMem.sResource, &cmdBuffer, 0);
5400         }
5401     }
5402 
5403     // load kernel from WOPCM into L2 storage RAM
5404     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
5405     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
5406     imemParams.dwKernelDescriptor = m_vdboxHucVp9VdencProbKernelDescriptor;
5407     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
5408 
5409     // pipe mode select
5410     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
5411     pipeModeSelectParams.Mode = m_mode;
5412     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
5413 
5414     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCVp9Prob());
5415 
5416     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
5417     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
5418     dmemParams.presHucDataSource = &m_resHucProbDmemBuffer[currPass][m_currRecycledBufIdx];
5419     dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucProbDmem), CODECHAL_CACHELINE_SIZE);
5420     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
5421     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
5422 
5423     // Add Virtual addr
5424     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
5425     MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
5426     // Input regions
5427     virtualAddrParams.regionParams[0].presRegion = &m_resProbBuffer[m_vp9PicParams->PicFlags.fields.frame_context_idx];
5428     virtualAddrParams.regionParams[0].isWritable = true;        // Region 0 is both read and write for HuC. Has input probabilities before running HuC and updated probabilities after running HuC, which will then be input to next pass
5429     if (m_scalableMode)
5430     {
5431         virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
5432         virtualAddrParams.regionParams[1].dwOffset = m_frameStatsOffset.counterBuffer;
5433     }
5434     else
5435     {
5436         virtualAddrParams.regionParams[1].presRegion = &m_resProbabilityCounterBuffer;
5437         virtualAddrParams.regionParams[1].dwOffset = 0;
5438     }
5439     // If BRC enabled, BRC Pass 2 output SLBB -> input SLBB for HPU on pass 2 (HPU pass 1 and 3. BRC Update pass 1 and 2)
5440     //                 BRC Pass 1 output SLBB -> input SLBB for HPU on pass 1
5441     // If BRC not on, Driver prepared SLBB    -> input to HPU on both passes
5442 
5443     if (m_vdencBrcEnabled)
5444     {
5445         virtualAddrParams.regionParams[7].presRegion = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
5446     }
5447     else
5448     {
5449         virtualAddrParams.regionParams[7].presRegion = &m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
5450     }
5451 
5452     virtualAddrParams.regionParams[8].presRegion = &m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx];
5453     virtualAddrParams.regionParams[9].presRegion = &m_resHucDefaultProbBuffer;
5454 
5455     // Output regions
5456     virtualAddrParams.regionParams[2].presRegion = &m_resHucProbOutputBuffer;  // Final probability output from HuC after each pass
5457     virtualAddrParams.regionParams[2].isWritable = true;
5458     virtualAddrParams.regionParams[3].presRegion = &m_resProbabilityDeltaBuffer;
5459     virtualAddrParams.regionParams[3].isWritable = true;
5460     virtualAddrParams.regionParams[4].presRegion = &m_resHucPakInsertUncompressedHeaderWriteBuffer;
5461     virtualAddrParams.regionParams[4].isWritable = true;
5462     virtualAddrParams.regionParams[5].presRegion = &m_resCompressedHeaderBuffer;
5463     virtualAddrParams.regionParams[5].isWritable = true;
5464     virtualAddrParams.regionParams[6].presRegion  = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
5465     virtualAddrParams.regionParams[6].isWritable = true;
5466     virtualAddrParams.regionParams[10].presRegion = &m_resBitstreamBuffer;
5467     virtualAddrParams.regionParams[10].isWritable = true;
5468     virtualAddrParams.regionParams[11].presRegion = &m_resVdencDataExtensionBuffer;
5469     virtualAddrParams.regionParams[11].isWritable = true;
5470 
5471     m_hpuVirtualAddrParams = virtualAddrParams;
5472     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
5473     // Store HUC_STATUS2 register bit 6 before HUC_Start command
5474     // This bit will be cleared by HW at the end of a HUC workload
5475     // (HUC_Start command with last start bit set).
5476     CODECHAL_DEBUG_TOOL(
5477         CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
5478     )
5479 
5480     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
5481 
5482     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
5483 
5484     // wait Huc completion (use HEVC bit for now)
5485     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
5486     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
5487     vdPipeFlushParams.Flags.bFlushHEVC = 1;
5488     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
5489     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
5490 
5491     // Flush the engine to ensure memory written out
5492     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
5493     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
5494     flushDwParams.bVideoPipelineCacheInvalidate = true;
5495     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
5496 
5497     // Write HUC_STATUS mask: DW1 (mask value)
5498     MHW_MI_STORE_DATA_PARAMS storeDataParams;
5499     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
5500     storeDataParams.pOsResource = &m_resHucPakMmioBuffer;
5501     storeDataParams.dwResourceOffset = sizeof(uint32_t);
5502     storeDataParams.dwValue = 1 << 31; //Repak bit for HUC is bit 31
5503     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
5504 
5505     // store HUC_STATUS register
5506     MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
5507     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
5508     storeRegParams.presStoreBuffer = &m_resHucPakMmioBuffer;
5509     storeRegParams.dwOffset = 0;
5510     storeRegParams.dwRegister = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1)->hucStatusRegOffset;
5511     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
5512 
5513     auto mmioRegisters = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1);
5514     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, &cmdBuffer, false));
5515     CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(&cmdBuffer));
5516 
5517     // In case of other pipes running other tiles, signal the vdenc/pak hw commands there to proceed because huc done
5518     if (m_scalableMode && m_isTilingSupported)
5519     {
5520         for (auto i = 1; i < m_numPipe; i++)
5521         {
5522             if (!Mos_ResourceIsNull(&m_hucDoneSemaphoreMem[i].sResource))
5523             {
5524                 CODECHAL_ENCODE_CHK_STATUS_RETURN(
5525                     SetSemaphoreMem(
5526                         &m_hucDoneSemaphoreMem[i].sResource,
5527                         &cmdBuffer,
5528                         (currPass + 1))
5529                 );
5530             }
5531         }
5532     }
5533 
5534     // For superframe pass, after HuC executes, write the updated size (combined frame size) to status report
5535     // So app knows total size instead of just the showframe size
5536     if (m_superFrameHucPass)
5537     {
5538         EncodeStatusBuffer* encodeStatusBuf = &m_encodeStatusBuf;
5539         uint32_t baseOffset =
5540             (encodeStatusBuf->wCurrIndex * m_encodeStatusBuf.dwReportSize) +
5541             sizeof(uint32_t) * 2;  // encodeStatus is offset by 2 DWs in the resource
5542 
5543         MHW_MI_COPY_MEM_MEM_PARAMS copyMemMemParams;
5544         MOS_ZeroMemory(&copyMemMemParams, sizeof(copyMemMemParams));
5545 
5546         copyMemMemParams.presSrc = virtualAddrParams.regionParams[11].presRegion;
5547         copyMemMemParams.dwSrcOffset = 0; // Updated framesize is 1st DW in buffer
5548         copyMemMemParams.presDst = &encodeStatusBuf->resStatusBuffer;
5549         copyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf->dwBSByteCountOffset;
5550 
5551         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(
5552             &cmdBuffer,
5553             &copyMemMemParams));
5554     }
5555     // Ending collect of HuC BRC Update kernel performance data
5556     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
5557 
5558     if ((!m_singleTaskPhaseSupported && !m_scalableMode) || m_superFrameHucPass)
5559     {
5560         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
5561     }
5562 
5563     // Dump input probabilites before running HuC
5564     CODECHAL_DEBUG_TOOL(
5565         CodechalHucRegionDumpType dumpType = m_superFrameHucPass ? CodechalHucRegionDumpType::hucRegionDumpHpuSuperFrame : CodechalHucRegionDumpType::hucRegionDumpHpu;
5566         m_debugInterface->DumpHucRegion(
5567             virtualAddrParams.regionParams[0].presRegion,
5568             0,
5569             hucRegionSize[0],
5570             0,
5571             "_ProbBuffer",
5572             (virtualAddrParams.regionParams[0].isWritable ? true : false),
5573             currPass,
5574             dumpType);
5575     )
5576 
5577         ReturnCommandBuffer(&cmdBuffer);
5578 
5579     // For Temporal scaling, super frame pass is initiated after the command buffer submission in ExecuteSliceLevel.
5580     // So if Single Task Phase is enabled, then we need to explicitly submit the command buffer here to call HuC
5581     if ((!m_singleTaskPhaseSupported && !m_scalableMode) || m_superFrameHucPass)
5582     {
5583         bool renderFlags = m_videoContextUsesNullHw;
5584 
5585         CODECHAL_DEBUG_TOOL(
5586             std::string nameCmdPass = (m_superFrameHucPass ? "HPU_SuperFramePass" : "HPU_Pass") + std::to_string(currPass);
5587 
5588             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
5589                 &cmdBuffer,
5590                 CODECHAL_NUM_MEDIA_STATES,
5591                 nameCmdPass.c_str()));
5592         )
5593 
5594         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
5595         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, m_vp9PicParams->PicFlags.fields.super_frame));
5596         ReturnCommandBuffer(&cmdBuffer);
5597 
5598         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderFlags));
5599 
5600         CODECHAL_DEBUG_TOOL(
5601             CodechalHucRegionDumpType dumpType = m_superFrameHucPass ? CodechalHucRegionDumpType::hucRegionDumpHpuSuperFrame : CodechalHucRegionDumpType::hucRegionDumpHpu;
5602             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
5603                 &m_resHucProbDmemBuffer[currPass][m_currRecycledBufIdx],
5604                 sizeof(HucProbDmem),
5605                 currPass,
5606                 dumpType));
5607 
5608         for (auto i = 0; i < 16; i++)
5609         {
5610             if (virtualAddrParams.regionParams[i].presRegion)
5611             {
5612                 if (m_scalableMode && m_isTilingSupported && virtualAddrParams.regionParams[i].isWritable && i != 11)
5613                 {
5614                     continue;
5615                 }
5616                 m_debugInterface->DumpHucRegion(
5617                     virtualAddrParams.regionParams[i].presRegion,
5618                     virtualAddrParams.regionParams[i].dwOffset,
5619                     hucRegionSize[i],
5620                     i,
5621                     hucRegionName[i],
5622                     !virtualAddrParams.regionParams[i].isWritable,
5623                     currPass,
5624                     dumpType);
5625             }
5626         })
5627     }
5628 
5629     return eStatus;
5630 }
5631 
5632 /*----------------------------------------------------------------------------
5633 | Name      : HuCBrcInitReset
5634 | Purpose   : Start/Submit VP9 HuC BrcInit kernel to HW
5635 |
5636 | Returns   : MOS_STATUS
5637 \---------------------------------------------------------------------------*/
HuCBrcInitReset()5638 MOS_STATUS CodechalVdencVp9StateG12::HuCBrcInitReset()
5639 {
5640     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5641 
5642     CODECHAL_ENCODE_FUNCTION_ENTER;
5643 
5644     int currPass = GetCurrentPass();
5645 
5646     CODECHAL_DEBUG_TOOL(
5647         uint32_t hucRegionSize[16];
5648     const char* hucRegionName[16];
5649 
5650     hucRegionName[0] = "_BrcHistoryBuffer";
5651     hucRegionSize[0] = m_brcHistoryBufferSize;
5652     )
5653 
5654         MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
5655 #if (_DEBUG || _RELEASE_INTERNAL)
5656     if (m_swBrcMode)
5657     {
5658         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
5659         CODECHAL_ENCODE_CHK_STATUS_RETURN(SoftwareBRC(false));
5660         // Set region params for dumping only
5661         MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
5662         virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
5663         virtualAddrParams.regionParams[0].isWritable = true;
5664         m_inputBitsPerFrame = ((m_vp9SeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS) * 100.) / ((m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiNumerator * 100.) / m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiDenominator);
5665         m_curTargetFullness = m_vp9SeqParams->TargetBitRate[m_vp9SeqParams->NumTemporalLayersMinus1] * CODECHAL_ENCODE_BRC_KBPS;
5666 
5667         CODECHAL_DEBUG_TOOL(
5668             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
5669                 &m_resVdencBrcInitDmemBuffer,
5670                 sizeof(HucBrcInitDmem),
5671                 0,
5672                 CodechalHucRegionDumpType::hucRegionDumpInit));
5673 
5674         for (auto i = 0; i < 16; i++)
5675         {
5676             if (virtualAddrParams.regionParams[i].presRegion)
5677             {
5678                 m_debugInterface->DumpHucRegion(
5679                     virtualAddrParams.regionParams[i].presRegion,
5680                     virtualAddrParams.regionParams[i].dwOffset,
5681                     hucRegionSize[i],
5682                     i,
5683                     hucRegionName[i],
5684                     !virtualAddrParams.regionParams[i].isWritable,
5685                     currPass,
5686                     CodechalHucRegionDumpType::hucRegionDumpInit);
5687             }
5688         }
5689         )
5690             return eStatus;
5691     }
5692 #endif
5693     MOS_COMMAND_BUFFER cmdBuffer;
5694     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
5695 
5696     if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && !m_scalableMode)
5697     {
5698         // Send command buffer header at the beginning (OS dependent)
5699         bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : false;
5700         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
5701 
5702         m_firstTaskInPhase = false;
5703     }
5704     // Collect HuC Init/Reset kernel performance data
5705     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
5706 
5707     // load kernel from WOPCM into L2 storage RAM
5708     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
5709     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
5710     imemParams.dwKernelDescriptor = m_vdboxHucVp9VdencBrcInitKernelDescriptor;
5711     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
5712 
5713     // pipe mode select
5714     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
5715     pipeModeSelectParams.Mode = m_mode;
5716     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
5717 
5718     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
5719 
5720     m_inputBitsPerFrame = ((m_vp9SeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS) * 100.) / ((m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiNumerator * 100.) / m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiDenominator);
5721     m_curTargetFullness = m_vp9SeqParams->TargetBitRate[m_vp9SeqParams->NumTemporalLayersMinus1] * CODECHAL_ENCODE_BRC_KBPS;
5722 
5723     // set HuC DMEM param
5724     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
5725     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
5726     dmemParams.presHucDataSource = &m_resVdencBrcInitDmemBuffer;
5727     dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucBrcInitDmem), CODECHAL_CACHELINE_SIZE);
5728     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
5729     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
5730 
5731     MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
5732     virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
5733     virtualAddrParams.regionParams[0].isWritable = true;
5734     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
5735 
5736     // Store HUC_STATUS2 register bit 6 before HUC_Start command
5737     // This bit will be cleared by HW at the end of a HUC workload
5738     // (HUC_Start command with last start bit set).
5739     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
5740 
5741     // HuC Status 2 report in Status Report
5742     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
5743 
5744     // Check HuC_STATUS2 bit6, if bit6 > 0 HW continue execution following cmd, otherwise it send a COND BB END cmd.
5745     uint32_t compareOperation = mhw_mi_g12_X::MI_CONDITIONAL_BATCH_BUFFER_END_CMD::COMPARE_OPERATION_MADGREATERTHANIDD;
5746     auto hwInterface = dynamic_cast<CodechalHwInterfaceG12 *>(m_hwInterface);
5747     CODECHAL_ENCODE_CHK_NULL_RETURN(hwInterface);
5748     CODECHAL_ENCODE_CHK_STATUS_RETURN(hwInterface->SendCondBbEndCmd(
5749         &m_resHucStatus2Buffer, 0, 0, false, false, compareOperation, &cmdBuffer));
5750 
5751     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
5752 
5753     // wait Huc completion (use HEVC bit for now)
5754     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
5755     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
5756     vdPipeFlushParams.Flags.bFlushHEVC = 1;
5757     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
5758     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
5759 
5760     // Flush the engine to ensure memory written out
5761     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
5762     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
5763     flushDwParams.bVideoPipelineCacheInvalidate = true;
5764     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
5765 
5766     auto mmioRegisters = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1);
5767     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, &cmdBuffer, false));
5768     CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(&cmdBuffer));
5769     // End: Collect HuC Init/Reset kernel performance data
5770     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
5771 
5772     if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd) && !m_scalableMode)
5773     {
5774         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
5775     }
5776 
5777     ReturnCommandBuffer(&cmdBuffer);
5778 
5779     if (!m_singleTaskPhaseSupported)
5780     {
5781         bool renderingFlags = m_videoContextUsesNullHw;
5782 
5783         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
5784 
5785         CODECHAL_DEBUG_TOOL(
5786             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
5787                 &m_resVdencBrcInitDmemBuffer,
5788                 sizeof(HucBrcInitDmem),
5789                 0,
5790                 CodechalHucRegionDumpType::hucRegionDumpInit));
5791 
5792         for (auto i = 0; i < 16; i++)
5793         {
5794             if (virtualAddrParams.regionParams[i].presRegion)
5795             {
5796                 m_debugInterface->DumpHucRegion(
5797                     virtualAddrParams.regionParams[i].presRegion,
5798                     virtualAddrParams.regionParams[i].dwOffset,
5799                     hucRegionSize[i],
5800                     i,
5801                     hucRegionName[i],
5802                     !virtualAddrParams.regionParams[i].isWritable,
5803                     0,
5804                     CodechalHucRegionDumpType::hucRegionDumpInit);
5805             }
5806         }
5807         )
5808     }
5809 
5810     return eStatus;
5811 }
5812 
5813 /*----------------------------------------------------------------------------
5814 | Name      : HuCBrcUpdate
5815 | Purpose   : Start/Submit VP9 HuC BrcUpdate kernel to HW
5816 |
5817 | Returns   : MOS_STATUS
5818 \---------------------------------------------------------------------------*/
HuCBrcUpdate()5819 MOS_STATUS CodechalVdencVp9StateG12::HuCBrcUpdate()
5820 {
5821     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5822 
5823     CODECHAL_ENCODE_FUNCTION_ENTER;
5824 
5825     int currPass = GetCurrentPass();
5826 
5827     CODECHAL_DEBUG_TOOL(
5828         uint32_t hucRegionSize[16];
5829     const char* hucRegionName[16];
5830 
5831     hucRegionName[0] = "_BrcHistory";
5832     hucRegionSize[0] = m_brcHistoryBufferSize;
5833     hucRegionName[1] = "_VDEncStats";
5834     hucRegionSize[1] = m_vdencBrcStatsBufferSize;
5835     hucRegionName[2] = "_PAKStats";
5836     hucRegionSize[2] = m_vdencBrcPakStatsBufferSize;
5837     hucRegionName[3] = "_InputSLBB";
5838     hucRegionSize[3] = m_vdencPicStateSecondLevelBatchBufferSize;
5839     hucRegionName[4] = "_BRCData";
5840     hucRegionSize[4] = CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE;
5841     hucRegionName[5] = "_ConstData";
5842     hucRegionSize[5] = m_brcConstantSurfaceSize;
5843     hucRegionName[6] = "_OutputSLBB";
5844     hucRegionSize[6] = m_vdencPicStateSecondLevelBatchBufferSize;
5845     hucRegionName[7] = "_PAKMMIO";
5846     hucRegionSize[7] = MOS_ALIGN_CEIL(CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE, CODECHAL_PAGE_SIZE);
5847     )
5848 
5849         MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
5850 #if (_DEBUG || _RELEASE_INTERNAL)
5851     if (m_swBrcMode)
5852     {
5853         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
5854         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitBrcConstantBuffer(&m_brcBuffers.resBrcConstantDataBuffer[0], m_pictureCodingType));
5855         // Set region params for dumping only
5856         MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
5857         virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
5858         virtualAddrParams.regionParams[0].isWritable = true;
5859         virtualAddrParams.regionParams[1].presRegion = &m_resVdencBrcStatsBuffer;
5860         virtualAddrParams.regionParams[2].presRegion = &m_resFrameStatStreamOutBuffer;
5861         virtualAddrParams.regionParams[3].presRegion = &m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
5862         virtualAddrParams.regionParams[4].presRegion = &m_brcBuffers.resBrcHucDataBuffer;
5863         virtualAddrParams.regionParams[4].isWritable = true;
5864         virtualAddrParams.regionParams[5].presRegion = GetBrcConstantBuffer(&m_brcBuffers.resBrcConstantDataBuffer[0], m_pictureCodingType);
5865         virtualAddrParams.regionParams[6].presRegion = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
5866         virtualAddrParams.regionParams[6].isWritable = true;
5867         virtualAddrParams.regionParams[7].presRegion = &m_brcBuffers.resBrcBitstreamSizeBuffer;
5868         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(// Dump history IN since it's both IN/OUT, OUT will dump at end of function, rest of buffers are IN XOR OUT (not both)
5869             virtualAddrParams.regionParams[0].presRegion,
5870             virtualAddrParams.regionParams[0].dwOffset,
5871             hucRegionSize[0],
5872             0,
5873             hucRegionName[0],
5874             true,
5875             currPass,
5876             CodechalHucRegionDumpType::hucRegionDumpUpdate));
5877         CODECHAL_ENCODE_CHK_STATUS_RETURN(SoftwareBRC(true));
5878 
5879         CODECHAL_DEBUG_TOOL(
5880             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
5881                 &m_resVdencBrcUpdateDmemBuffer[currPass][m_currRecycledBufIdx],
5882                 sizeof(HucBrcUpdateDmem), // Change buffer and size to update dmem
5883                 currPass,
5884                 CodechalHucRegionDumpType::hucRegionDumpUpdate));
5885 
5886         for (auto i = 0; i < 16; i++)
5887         {
5888             if (virtualAddrParams.regionParams[i].presRegion)
5889             {
5890                 m_debugInterface->DumpHucRegion(
5891                     virtualAddrParams.regionParams[i].presRegion,
5892                     virtualAddrParams.regionParams[i].dwOffset,
5893                     hucRegionSize[i],
5894                     i,
5895                     hucRegionName[i],
5896                     !virtualAddrParams.regionParams[i].isWritable,
5897                     currPass,
5898                     CodechalHucRegionDumpType::hucRegionDumpUpdate);
5899             }
5900         }
5901         );
5902         // We increment by the average frame value once for each frame
5903         if (IsFirstPass())
5904         {
5905             m_curTargetFullness += m_inputBitsPerFrame;
5906         }
5907 
5908         return eStatus;
5909     }
5910 #endif
5911 
5912     MOS_COMMAND_BUFFER cmdBuffer;
5913     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
5914 
5915     if ((!m_singleTaskPhaseSupported || (m_firstTaskInPhase && !m_brcInit)) && !m_scalableMode)
5916     {
5917         // Send command buffer header at the beginning (OS dependent)
5918         bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : false;
5919         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
5920 
5921         m_firstTaskInPhase = false;
5922     }
5923     // Collect HuC BRC Update kernel performance data
5924     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
5925 
5926     // For Scalability, wait here for previous pass PAK int done
5927     if (m_scalableMode && !IsFirstPass() && m_isTilingSupported && !m_brcInit && !m_brcReset)
5928     {
5929         SendHWWaitCommand(&m_pakIntDoneSemaphoreMem.sResource, &cmdBuffer, currPass);
5930         SetSemaphoreMem(&m_pakIntDoneSemaphoreMem.sResource, &cmdBuffer, 0);
5931     }
5932 
5933     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitBrcConstantBuffer(&m_brcBuffers.resBrcConstantDataBuffer[0], m_pictureCodingType));
5934 
5935     // load kernel from WOPCM into L2 storage RAM
5936     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
5937     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
5938     imemParams.dwKernelDescriptor = m_vdboxHucVp9VdencBrcUpdateKernelDescriptor;
5939     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
5940 
5941     // pipe mode select
5942     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
5943     pipeModeSelectParams.Mode = m_mode;
5944     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
5945 
5946     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
5947 
5948     // set HuC DMEM param
5949     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
5950     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
5951     dmemParams.presHucDataSource = &m_resVdencBrcUpdateDmemBuffer[currPass][m_currRecycledBufIdx];
5952     dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucBrcUpdateDmem), CODECHAL_CACHELINE_SIZE);
5953     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS; // how to set?
5954     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
5955 
5956     // Set surfaces to HuC regions
5957     MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
5958 
5959     // History Buffer - IN/OUT
5960     virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
5961     virtualAddrParams.regionParams[0].isWritable = true;
5962 
5963     if (IsFirstPass()) //First BRC pass needs stats from last frame
5964     {
5965         if (m_lastFrameScalableMode) // Frame (n-1) Scalable mode output -> input for frame n, BRC pass 0
5966         {
5967             //VDEnc Stats Buffer - IN
5968             virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
5969             virtualAddrParams.regionParams[1].dwOffset = m_frameStatsOffset.vdencStats;
5970             // Frame (not PAK) Stats Buffer - IN
5971             virtualAddrParams.regionParams[2].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
5972             virtualAddrParams.regionParams[2].dwOffset = m_frameStatsOffset.pakStats;
5973             // PAK MMIO - IN
5974             virtualAddrParams.regionParams[7].presRegion = &m_hucPakIntBrcDataBuffer;
5975         }
5976         else
5977         {
5978             virtualAddrParams.regionParams[1].presRegion = &m_resVdencBrcStatsBuffer;
5979             virtualAddrParams.regionParams[1].dwOffset = 0;
5980             virtualAddrParams.regionParams[2].presRegion = &m_resFrameStatStreamOutBuffer;
5981             virtualAddrParams.regionParams[2].dwOffset = 0;
5982             virtualAddrParams.regionParams[7].presRegion = &m_brcBuffers.resBrcBitstreamSizeBuffer;
5983         }
5984     }
5985     else // Second BRC Update Pass
5986     {
5987         if (m_scalableMode)
5988         {
5989             // VDEnc Stats Buffer - IN
5990             virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
5991             virtualAddrParams.regionParams[1].dwOffset = m_frameStatsOffset.vdencStats;
5992             // Frame (not PAK) Stats Buffer - IN
5993             virtualAddrParams.regionParams[2].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
5994             virtualAddrParams.regionParams[2].dwOffset = m_frameStatsOffset.pakStats;
5995             // PAK MMIO - IN
5996             virtualAddrParams.regionParams[7].presRegion = &m_hucPakIntBrcDataBuffer;
5997         }
5998         else
5999         {
6000             virtualAddrParams.regionParams[1].presRegion = &m_resVdencBrcStatsBuffer;
6001             virtualAddrParams.regionParams[1].dwOffset = 0;
6002             virtualAddrParams.regionParams[2].presRegion = &m_resFrameStatStreamOutBuffer;
6003             virtualAddrParams.regionParams[2].dwOffset = 0;
6004             virtualAddrParams.regionParams[7].presRegion = &m_brcBuffers.resBrcBitstreamSizeBuffer;
6005         }
6006     }
6007 
6008     // Input SLBB (second level batch buffer) - IN
6009     virtualAddrParams.regionParams[3].presRegion = &m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
6010 
6011     // BRC Data - OUT
6012     virtualAddrParams.regionParams[4].presRegion = &m_brcBuffers.resBrcHucDataBuffer;
6013     virtualAddrParams.regionParams[4].isWritable = true;
6014 
6015     // Const Data - IN
6016     virtualAddrParams.regionParams[5].presRegion = GetBrcConstantBuffer(&m_brcBuffers.resBrcConstantDataBuffer[0], m_pictureCodingType);
6017 
6018     // Output SLBB - OUT
6019     virtualAddrParams.regionParams[6].presRegion = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
6020     virtualAddrParams.regionParams[6].isWritable = true;
6021 
6022     // Load HuC Regions into Cmd Buf
6023     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
6024 
6025     // Store HUC_STATUS2 register bit 6 before HUC_Start command
6026     // This bit will be cleared by HW at the end of a HUC workload
6027     // (HUC_Start command with last start bit set).
6028     CODECHAL_DEBUG_TOOL(
6029         CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
6030     )
6031 
6032     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
6033 
6034     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
6035 
6036     // wait Huc completion (use HEVC bit for now)
6037     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
6038     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
6039     vdPipeFlushParams.Flags.bFlushHEVC = 1;
6040     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
6041     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
6042 
6043     // Flush the engine to ensure memory written out
6044     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
6045     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
6046     flushDwParams.bVideoPipelineCacheInvalidate = true;
6047     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
6048 
6049     MHW_MI_STORE_DATA_PARAMS storeDataParams;
6050     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
6051     storeDataParams.pOsResource      = &m_resHucPakMmioBuffer;
6052     storeDataParams.dwResourceOffset = sizeof(uint32_t);
6053     storeDataParams.dwValue          = 1 << 31;
6054     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
6055 
6056     MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
6057     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
6058     storeRegParams.presStoreBuffer = &m_resHucPakMmioBuffer;
6059     storeRegParams.dwOffset        = 0;
6060     storeRegParams.dwRegister      = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1)->hucStatusRegOffset;
6061     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
6062 
6063     auto mmioRegisters = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1);
6064     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, &cmdBuffer, false));
6065     CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(&cmdBuffer));
6066 
6067     // Ending collect of HuC BRC Update kernel performance data
6068     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
6069 
6070     if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd) && !m_scalableMode)
6071     {
6072         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
6073     }
6074 
6075     ReturnCommandBuffer(&cmdBuffer);
6076 
6077     if (!m_singleTaskPhaseSupported)
6078     {
6079         bool renderingFlags = m_videoContextUsesNullHw;
6080 
6081         // Dump history input before HuC runs
6082         CODECHAL_DEBUG_TOOL(
6083             m_debugInterface->DumpHucRegion(
6084                 virtualAddrParams.regionParams[0].presRegion,
6085                 0,
6086                 hucRegionSize[0],
6087                 0,
6088                 hucRegionName[0],
6089                 true,
6090                 currPass,
6091                 CodechalHucRegionDumpType::hucRegionDumpUpdate);
6092         );
6093 
6094         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
6095 
6096         CODECHAL_DEBUG_TOOL(
6097             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
6098                 &m_resVdencBrcUpdateDmemBuffer[currPass][m_currRecycledBufIdx],
6099                 sizeof(HucBrcUpdateDmem), // Change buffer and size to update dmem
6100                 currPass,
6101                 CodechalHucRegionDumpType::hucRegionDumpUpdate));
6102 
6103         for (auto i = 0; i < 16; i++)
6104         {
6105             if (virtualAddrParams.regionParams[i].presRegion)
6106             {
6107                 m_debugInterface->DumpHucRegion(
6108                     virtualAddrParams.regionParams[i].presRegion,
6109                     virtualAddrParams.regionParams[i].dwOffset,
6110                     hucRegionSize[i],
6111                     i,
6112                     hucRegionName[i],
6113                     !virtualAddrParams.regionParams[i].isWritable,
6114                     currPass,
6115                     CodechalHucRegionDumpType::hucRegionDumpUpdate);
6116             }
6117         }
6118         )
6119     }
6120 
6121     // We increment by the average frame value once for each frame
6122     if (IsFirstPass())
6123     {
6124         m_curTargetFullness += m_inputBitsPerFrame;
6125     }
6126 
6127     return eStatus;
6128 }
6129 
InitMmcState()6130 MOS_STATUS CodechalVdencVp9StateG12::InitMmcState()
6131 {
6132     CODECHAL_ENCODE_FUNCTION_ENTER;
6133 #ifdef _MMC_SUPPORTED
6134     m_mmcState = MOS_New(CodechalMmcEncodeVp9G12, m_hwInterface, &m_reconSurface, &m_rawSurface);
6135     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
6136 #endif
6137     return MOS_STATUS_SUCCESS;
6138 }
6139 
AddCommandsVp9(uint32_t commandType,PMOS_COMMAND_BUFFER cmdBuffer)6140 MOS_STATUS CodechalVdencVp9StateG12::AddCommandsVp9(uint32_t commandType, PMOS_COMMAND_BUFFER cmdBuffer )
6141 {
6142     auto qp = m_vp9PicParams->LumaACQIndex;
6143     auto vp9FrameType = m_vp9PicParams->PicFlags.fields.frame_type;
6144     double QPScale = (vp9FrameType == CODEC_VP9_KEY_FRAME) ? 0.31 : 0.33;
6145     double lambda = QPScale * CODECHAL_VP9_QUANT_AC[qp] / 8;
6146 
6147     auto sadQpLambda = lambda * 4 + 0.5;
6148     auto rdQpLambda = lambda * lambda *4 + 0.5;
6149 
6150     if (commandType == CODECHAL_CMD1)
6151     {
6152         MHW_VDBOX_VDENC_CMD1_PARAMS cmd1Params;
6153         MOS_ZeroMemory(&cmd1Params, sizeof(cmd1Params));
6154         cmd1Params.Mode = CODECHAL_ENCODE_MODE_VP9;
6155         cmd1Params.usSADQPLambda = (uint16_t)sadQpLambda;
6156         cmd1Params.usRDQPLambda = (uint16_t)rdQpLambda;
6157         cmd1Params.pVp9EncPicParams = m_vp9PicParams;
6158         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencCmd1Cmd(cmdBuffer, nullptr, &cmd1Params));
6159 
6160     }
6161     else if (commandType == CODECHAL_CMD2)
6162     {
6163         PMHW_VDBOX_VDENC_CMD2_STATE cmd2Params(new MHW_VDBOX_VDENC_CMD2_STATE);
6164         CODECHAL_ENCODE_CHK_NULL_RETURN(cmd2Params);
6165 
6166         MHW_VDBOX_VP9_SEGMENT_STATE segmentState;
6167         MOS_ZeroMemory(&segmentState, sizeof(segmentState));
6168         segmentState.Mode = m_mode;
6169         segmentState.pVp9EncodeSegmentParams = m_vp9SegmentParams;
6170         cmd2Params->Mode = m_mode;
6171         cmd2Params->pVp9EncPicParams = m_vp9PicParams;
6172         cmd2Params->pVp9EncSeqParams = m_vp9SeqParams;
6173         cmd2Params->bSegmentationEnabled = m_vp9PicParams->PicFlags.fields.segmentation_enabled;
6174         cmd2Params->pVp9SegmentState = &segmentState;
6175         cmd2Params->bPrevFrameSegEnabled = m_prevFrameSegEnabled;
6176         cmd2Params->bStreamInEnabled = m_segmentMapProvided || m_16xMeEnabled;
6177         cmd2Params->ucNumRefIdxL0ActiveMinus1 = (m_vp9PicParams->PicFlags.fields.frame_type) ? m_numRefFrames - 1 : 0;
6178         cmd2Params->usSADQPLambda = (uint16_t)sadQpLambda;
6179         cmd2Params->usRDQPLambda = (uint16_t)rdQpLambda;
6180         cmd2Params->bPakOnlyMultipassEnable = m_vdencPakonlyMultipassEnabled;
6181         cmd2Params->bDynamicScalingEnabled = ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled);
6182         cmd2Params->temporalMVpEnable = m_vp9PicParams->PicFlags.fields.frame_type && !m_prevFrameInfo.KeyFrame;
6183         if ((m_vp9PicParams->RefFlags.fields.LastRefIdx == m_vp9PicParams->RefFlags.fields.AltRefIdx
6184             && m_vp9PicParams->RefFlags.fields.AltRefIdx == m_vp9PicParams->RefFlags.fields.GoldenRefIdx
6185             && m_vp9PicParams->RefFlags.fields.GoldenRefIdx == m_vp9PicParams->RefFlags.fields.LastRefIdx)
6186             || (m_vp9SeqParams->TargetUsage == TU_QUALITY)) {
6187             cmd2Params->temporalMVpEnable = 0;
6188         }
6189         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencCmd2Cmd(cmdBuffer, nullptr, cmd2Params));
6190     }
6191 
6192     return MOS_STATUS_SUCCESS;
6193 }
6194 
ConfigStitchDataBuffer()6195 MOS_STATUS CodechalVdencVp9StateG12::ConfigStitchDataBuffer()
6196 {
6197     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6198     CODECHAL_ENCODE_FUNCTION_ENTER;
6199     int32_t currentPass = GetCurrentPass();
6200 
6201     MOS_LOCK_PARAMS lockFlagsWriteOnly;
6202     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
6203     lockFlagsWriteOnly.WriteOnly = 1;
6204 
6205     HucCommandData *hucStitchDataBuf = (HucCommandData *)m_osInterface->pfnLockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass], &lockFlagsWriteOnly);
6206     CODECHAL_ENCODE_CHK_NULL_RETURN(hucStitchDataBuf);
6207 
6208     MOS_ZeroMemory(hucStitchDataBuf, sizeof(HucCommandData));
6209     hucStitchDataBuf->TotalCommands          = 1;
6210     hucStitchDataBuf->InputCOM[0].SizeOfData = 0xF;
6211 
6212     HucInputCmdG12 hucInputCmd;
6213     MOS_ZeroMemory(&hucInputCmd, sizeof(HucInputCmdG12));
6214 
6215     CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
6216     hucInputCmd.SelectionForIndData = m_osInterface->osCpInterface->IsCpEnabled() ? 4 : 0;
6217     hucInputCmd.CmdMode             = HUC_CMD_LIST_MODE;
6218     hucInputCmd.LengthOfTable       = (uint8_t)GetNumTilesInFrame();
6219     hucInputCmd.CopySize            = m_hwInterface->m_tileRecordSize;
6220 
6221     PMOS_RESOURCE presSrc = &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource;
6222 
6223     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
6224         m_osInterface,
6225         presSrc,
6226         false,
6227         false));
6228     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
6229         m_osInterface,
6230         &m_resBitstreamBuffer,
6231         true,
6232         true));
6233 
6234     uint64_t srcAddr          = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, presSrc);
6235     uint64_t destAddr         = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, &m_resBitstreamBuffer);
6236     hucInputCmd.SrcAddrBottom = (uint32_t)(srcAddr & 0x00000000FFFFFFFF);
6237     hucInputCmd.SrcAddrTop    = (uint32_t)((srcAddr & 0xFFFFFFFF00000000) >> 32);
6238 
6239     hucInputCmd.DestAddrBottom = (uint32_t)(destAddr & 0x00000000FFFFFFFF);
6240     hucInputCmd.DestAddrTop    = (uint32_t)((destAddr & 0xFFFFFFFF00000000) >> 32);
6241 
6242     MOS_SecureMemcpy(hucStitchDataBuf->InputCOM[0].data, sizeof(HucInputCmdG12), &hucInputCmd, sizeof(HucInputCmdG12));
6243 
6244     m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]);
6245 
6246     return eStatus;
6247 }
6248 
SetDmemHuCVp9Prob()6249 MOS_STATUS CodechalVdencVp9StateG12::SetDmemHuCVp9Prob()
6250 {
6251     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6252 
6253     CODECHAL_ENCODE_FUNCTION_ENTER;
6254 
6255     MOS_LOCK_PARAMS lockFlagsWriteOnly;
6256     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
6257     lockFlagsWriteOnly.WriteOnly = 1;
6258 
6259     HucProbDmem *dmem     = nullptr;
6260     HucProbDmem *dmemTemp = nullptr;
6261     int          currPass = GetCurrentPass();
6262     if (IsFirstPass())
6263     {
6264         for (auto i = 0; i < 3; i++)
6265         {
6266             dmem = (HucProbDmem *)m_osInterface->pfnLockResource(
6267                 m_osInterface, &m_resHucProbDmemBuffer[i][m_currRecycledBufIdx], &lockFlagsWriteOnly);
6268             CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
6269 
6270             if (i == 0)
6271             {
6272                 dmemTemp = dmem;
6273             }
6274 
6275             MOS_SecureMemcpy(dmem, sizeof(HucProbDmem), m_probDmem, sizeof(HucProbDmem));
6276 
6277             if (i != 0)
6278             {
6279                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucProbDmemBuffer[i][m_currRecycledBufIdx]));
6280                 dmem = dmemTemp;
6281             }
6282         }
6283     }
6284     else
6285     {
6286         dmem = (HucProbDmem *)m_osInterface->pfnLockResource(
6287             m_osInterface, &m_resHucProbDmemBuffer[currPass][m_currRecycledBufIdx], &lockFlagsWriteOnly);
6288         CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
6289     }
6290 
6291     // for BRC cases, HuC needs to be called on Pass 1
6292     if (m_superFrameHucPass)
6293     {
6294         dmem->HuCPassNum = CODECHAL_ENCODE_VP9_HUC_SUPERFRAME_PASS;
6295     }
6296     else
6297     {
6298         if (m_dysBrc)
6299         {
6300             //For BRC+Dynamic Scaling, we need to run as HUC pass 1 in the last pass since the curr_pass was changed to 0.
6301             dmem->HuCPassNum = currPass != 0;
6302         }
6303         else
6304         {
6305             //For Non-dynamic scaling BRC cases, HuC needs to run as HuC pass one only in last pass.
6306             dmem->HuCPassNum = ((m_vdencBrcEnabled && currPass == 1) ? 0 : (currPass != 0));
6307         }
6308     }
6309 
6310     dmem->FrameWidth  = m_oriFrameWidth;
6311     dmem->FrameHeight = m_oriFrameHeight;
6312 
6313     for (auto i = 0; i < CODEC_VP9_MAX_SEGMENTS; i++)
6314     {
6315         dmem->SegmentRef[i]  = (m_vp9SegmentParams->SegData[i].SegmentFlags.fields.SegmentReferenceEnabled == true) ? m_vp9SegmentParams->SegData[i].SegmentFlags.fields.SegmentReference : CODECHAL_ENCODE_VP9_REF_SEGMENT_DISABLED;
6316         dmem->SegmentSkip[i] = m_vp9SegmentParams->SegData[i].SegmentFlags.fields.SegmentSkipped;
6317     }
6318 
6319     if (m_vp9PicParams->PicFlags.fields.frame_type == CODEC_VP9_KEY_FRAME && m_currPass == 0)
6320     {
6321         for (auto i = 1; i < CODEC_VP9_NUM_CONTEXTS; i++)
6322         {
6323             uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
6324                 m_osInterface,
6325                 &m_resProbBuffer[i],
6326                 &lockFlagsWriteOnly);
6327 
6328             CODECHAL_ENCODE_CHK_NULL_RETURN(data);
6329 
6330             ContextBufferInit(data, 0);
6331             CtxBufDiffInit(data, 0);
6332 
6333             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
6334                 m_osInterface,
6335                 &m_resProbBuffer[i]));
6336         }
6337     }
6338 
6339     // in multipasses, only delta seg qp (SegCodeAbs = 0) is supported, confirmed by the arch team
6340     dmem->SegCodeAbs        = 0;
6341     dmem->SegTemporalUpdate = m_vp9PicParams->PicFlags.fields.segmentation_temporal_update;
6342     dmem->LastRefIndex      = m_vp9PicParams->RefFlags.fields.LastRefIdx;
6343     dmem->GoldenRefIndex    = m_vp9PicParams->RefFlags.fields.GoldenRefIdx;
6344     dmem->AltRefIndex       = m_vp9PicParams->RefFlags.fields.AltRefIdx;
6345     dmem->RefreshFrameFlags = m_vp9PicParams->RefFlags.fields.refresh_frame_flags;
6346     dmem->RefFrameFlags     = m_refFrameFlags;
6347     dmem->ContextFrameTypes = m_contextFrameTypes[m_vp9PicParams->PicFlags.fields.frame_context_idx];
6348     dmem->FrameToShow       = GetReferenceBufferSlotIndex(dmem->RefreshFrameFlags);
6349 
6350     dmem->FrameCtrl.FrameType            = m_vp9PicParams->PicFlags.fields.frame_type;
6351     dmem->FrameCtrl.ShowFrame            = m_vp9PicParams->PicFlags.fields.show_frame;
6352     dmem->FrameCtrl.ErrorResilientMode   = m_vp9PicParams->PicFlags.fields.error_resilient_mode;
6353     dmem->FrameCtrl.IntraOnly            = m_vp9PicParams->PicFlags.fields.intra_only;
6354     dmem->FrameCtrl.ContextReset         = m_vp9PicParams->PicFlags.fields.reset_frame_context;
6355     dmem->FrameCtrl.LastRefFrameBias     = m_vp9PicParams->RefFlags.fields.LastRefSignBias;
6356     dmem->FrameCtrl.GoldenRefFrameBias   = m_vp9PicParams->RefFlags.fields.GoldenRefSignBias;
6357     dmem->FrameCtrl.AltRefFrameBias      = m_vp9PicParams->RefFlags.fields.AltRefSignBias;
6358     dmem->FrameCtrl.AllowHighPrecisionMv = m_vp9PicParams->PicFlags.fields.allow_high_precision_mv;
6359     dmem->FrameCtrl.McompFilterMode      = m_vp9PicParams->PicFlags.fields.mcomp_filter_type;
6360     dmem->FrameCtrl.TxMode               = m_txMode;
6361     dmem->FrameCtrl.RefreshFrameContext  = m_vp9PicParams->PicFlags.fields.refresh_frame_context;
6362     dmem->FrameCtrl.FrameParallelDecode  = m_vp9PicParams->PicFlags.fields.frame_parallel_decoding_mode;
6363     dmem->FrameCtrl.CompPredMode         = m_vp9PicParams->PicFlags.fields.comp_prediction_mode;
6364     dmem->FrameCtrl.FrameContextIdx      = m_vp9PicParams->PicFlags.fields.frame_context_idx;
6365     dmem->FrameCtrl.SharpnessLevel       = m_vp9PicParams->sharpness_level;
6366     dmem->FrameCtrl.SegOn                = m_vp9PicParams->PicFlags.fields.segmentation_enabled;
6367     dmem->FrameCtrl.SegMapUpdate         = m_vp9PicParams->PicFlags.fields.segmentation_update_map;
6368     dmem->FrameCtrl.SegUpdateData        = m_vp9PicParams->PicFlags.fields.seg_update_data;
6369     dmem->StreamInSegEnable              = (uint8_t)m_segmentMapProvided;
6370     dmem->StreamInEnable                 = (uint8_t)m_segmentMapProvided;  // Currently unused, if used may || with HME enabled
6371 
6372     dmem->FrameCtrl.log2TileRows = m_vp9PicParams->log2_tile_rows;
6373     dmem->FrameCtrl.log2TileCols = m_vp9PicParams->log2_tile_columns;
6374 
6375     dmem->PrevFrameInfo = m_prevFrameInfo;
6376 
6377     // For DyS CQP or BRC case, there is no Repak on last pass. So Repak flag is disabled here.
6378     // We also disable repak pass in TU7 speed mode usage for performance reasons.
6379     dmem->RePak = (m_numPasses > 0 && IsLastPass() && !(m_dysCqp || m_dysBrc) && (m_vp9SeqParams->TargetUsage != TU_PERFORMANCE));
6380 
6381     if (dmem->RePak && m_adaptiveRepakSupported)
6382     {
6383         MOS_SecureMemcpy(dmem->RePakThreshold, sizeof(uint32_t) * CODEC_VP9_QINDEX_RANGE, m_rePakThreshold, sizeof(uint32_t) * CODEC_VP9_QINDEX_RANGE);
6384     }
6385 
6386     dmem->LFLevelBitOffset           = m_vp9PicParams->BitOffsetForLFLevel;
6387     dmem->QIndexBitOffset            = m_vp9PicParams->BitOffsetForQIndex;
6388     dmem->SegBitOffset               = m_vp9PicParams->BitOffsetForSegmentation + 1;  // exclude segment_enable bit
6389     dmem->SegLengthInBits            = m_vp9PicParams->BitSizeForSegmentation - 1;    // exclude segment_enable bit
6390     dmem->UnCompHdrTotalLengthInBits = m_vp9PicParams->BitOffsetForFirstPartitionSize + 16;
6391     dmem->PicStateOffset             = m_hucPicStateOffset;
6392     dmem->SLBBSize                   = m_hucSlbbSize;
6393     dmem->IVFHeaderSize              = (m_frameNum == 0) ? 44 : 12;
6394     dmem->VDEncImgStateOffset        = m_slbbImgStateOffset;
6395     dmem->PakOnlyEnable              = ((dmem->RePak) && m_vdencPakonlyMultipassEnabled) ? 1 : 0;
6396 
6397     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucProbDmemBuffer[currPass][m_currRecycledBufIdx]));
6398 
6399     return eStatus;
6400 }
6401 
InsertConditionalBBEndWithHucErrorStatus(PMOS_COMMAND_BUFFER cmdBuffer)6402 MOS_STATUS CodechalVdencVp9StateG12::InsertConditionalBBEndWithHucErrorStatus(PMOS_COMMAND_BUFFER cmdBuffer)
6403 {
6404     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6405     CODECHAL_ENCODE_FUNCTION_ENTER;
6406 
6407     MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS miEnhancedConditionalBatchBufferEndParams;
6408 
6409     MOS_ZeroMemory(
6410         &miEnhancedConditionalBatchBufferEndParams,
6411         sizeof(MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
6412 
6413     miEnhancedConditionalBatchBufferEndParams.presSemaphoreBuffer = &m_resHucErrorStatusBuffer;
6414 
6415     miEnhancedConditionalBatchBufferEndParams.dwParamsType                   = MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS::ENHANCED_PARAMS;
6416     miEnhancedConditionalBatchBufferEndParams.enableEndCurrentBatchBuffLevel = false;
6417     miEnhancedConditionalBatchBufferEndParams.compareOperation               = MAD_EQUAL_IDD;
6418     miEnhancedConditionalBatchBufferEndParams.bDisableCompareMask            = false;
6419 
6420     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
6421         cmdBuffer,
6422         (PMHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS)(&miEnhancedConditionalBatchBufferEndParams)));
6423 
6424     return eStatus;
6425 }
6426