1 /*
2 * Copyright (c) 2017-2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     codechal_encode_hevc_g12.cpp
24 //! \brief    HEVC dual-pipe encoder for GEN12.
25 //!
26 
27 #include "codechal_encode_hevc_g12.h"
28 #include "codechal_encode_csc_ds_g12.h"
29 #include "codechal_mmc_encode_hevc_g12.h"
30 #include "codechal_encode_wp_g12.h"
31 #include "codechal_kernel_header_g12.h"
32 #include "codechal_kernel_hme_g12.h"
33 #include "codechal_debug.h"
34 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
35 #include "igcodeckrn_g12.h"
36 #endif
37 #include "codeckrnheader.h"
38 #include "mhw_vdbox_hcp_g12_X.h"
39 #include "mhw_vdbox_g12_X.h"
40 #include "mhw_mi_g12_X.h"
41 #include "mhw_render_g12_X.h"
42 #include "cm_queue_rt.h"
43 #include "codechal_debug.h"
44 
45 //! \cond SKIP_DOXYGEN
46 #define CRECOST(lambda, mode, lcu, slice) (Map44LutValue((uint32_t)((lambda) * (m_modeBits[(lcu)][(mode)][(slice)]) * (m_modeBitsScale[(mode)][(slice)])), 0x8F))
47 #define RDEBITS62(mode, lcu, slice) (GetU62ModeBits((float)((m_modeBits[(lcu)][(mode)][(slice)]) * (m_modeBitsScale[(mode)][(slice)]))))
48 //! \endcond
49 
SetGpuCtxCreatOption()50 MOS_STATUS CodechalEncHevcStateG12::SetGpuCtxCreatOption()
51 {
52     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
53 
54     CODECHAL_ENCODE_FUNCTION_ENTER;
55 
56     if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
57     {
58         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SetGpuCtxCreatOption());
59     }
60     else
61     {
62         m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS_ENHANCED);
63         CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt);
64     }
65 
66     return eStatus;
67 }
68 
AddHcpPipeModeSelectCmd(MOS_COMMAND_BUFFER * cmdBuffer)69 MOS_STATUS CodechalEncHevcStateG12::AddHcpPipeModeSelectCmd(MOS_COMMAND_BUFFER *cmdBuffer)
70 {
71     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
72 
73     // call MI_VD_CONTROL_STATE before HCP_PIPE_SELECT to init the pipe.
74     {
75         MHW_MI_VD_CONTROL_STATE_PARAMS vdControlStateParams;
76         //set up VD_CONTROL_STATE command
77         {
78             MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
79             vdControlStateParams.initialization = true;
80             CODECHAL_ENCODE_CHK_STATUS_RETURN(
81                 static_cast<MhwMiInterfaceG12 *>(m_miInterface)->AddMiVdControlStateCmd(cmdBuffer, &vdControlStateParams));
82         }
83     }
84 
85     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 pipeModeSelectParams;
86     SetHcpPipeModeSelectParams(pipeModeSelectParams);
87     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
88 
89     return eStatus;
90 }
91 
SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS & vdboxPipeModeSelectParams)92 void CodechalEncHevcStateG12::SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS &vdboxPipeModeSelectParams)
93 {
94     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 &pipeModeSelectParams =
95         static_cast<MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 &>(vdboxPipeModeSelectParams);
96     pipeModeSelectParams = {};
97     CodechalEncodeHevcBase::SetHcpPipeModeSelectParams(vdboxPipeModeSelectParams);
98 
99     pipeModeSelectParams.pakPiplnStrmoutEnabled = m_pakPiplStrmOutEnable;
100     pipeModeSelectParams.pakFrmLvlStrmoutEnable = (m_brcEnabled && m_numPipe > 1);
101 
102     if (m_numPipe > 1)
103     {
104         // Running in the multiple VDBOX mode
105         if (IsFirstPipe())
106         {
107             pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT;
108         }
109         else if (IsLastPipe())
110         {
111             pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT;
112         }
113         else
114         {
115             pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE;
116         }
117         pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_CODEC_BE;
118     }
119     else
120     {
121         pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
122         pipeModeSelectParams.PipeWorkMode    = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY;
123     }
124 }
125 
SetHcpPicStateParams(MHW_VDBOX_HEVC_PIC_STATE & picStateParams)126 void CodechalEncHevcStateG12::SetHcpPicStateParams(MHW_VDBOX_HEVC_PIC_STATE &picStateParams)
127 {
128     CODECHAL_ENCODE_FUNCTION_ENTER;
129 
130     CodechalEncodeHevcBase::SetHcpPicStateParams(picStateParams);
131     picStateParams.sseEnabledInVmeEncode = m_sseEnabled;
132 }
133 
AddHcpSurfaceStateCmds(MOS_COMMAND_BUFFER * cmdBuffer)134 MOS_STATUS CodechalEncHevcStateG12::AddHcpSurfaceStateCmds(MOS_COMMAND_BUFFER *cmdBuffer)
135 {
136     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
137 
138     MHW_VDBOX_SURFACE_PARAMS srcSurfaceParams;
139     SetHcpSrcSurfaceParams(srcSurfaceParams);
140     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(cmdBuffer, &srcSurfaceParams));
141 
142     MHW_VDBOX_SURFACE_PARAMS reconSurfaceParams;
143     SetHcpReconSurfaceParams(reconSurfaceParams);
144     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(cmdBuffer, &reconSurfaceParams));
145 
146     // Add the surface state for reference picture, GEN12 HW change
147     MHW_VDBOX_SURFACE_PARAMS refSurfaceParams;
148     SetHcpRefSurfaceParams(refSurfaceParams);
149 
150     if (m_mmcState->IsMmcEnabled())
151     {
152         refSurfaceParams.refsMmcEnable       = 0;
153         refSurfaceParams.refsMmcType         = 0;
154         refSurfaceParams.dwCompressionFormat = 0;
155 
156         //add for B frame support
157         if (m_pictureCodingType != I_TYPE)
158         {
159             for (uint8_t i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
160             {
161                 if (i < CODEC_MAX_NUM_REF_FRAME_HEVC &&
162                     m_picIdx[i].bValid && m_currUsedRefPic[i])
163                 {
164                     uint8_t idx          = m_picIdx[i].ucPicIdx;
165                     uint8_t frameStoreId = m_refIdxMapping[i];
166 
167                     MOS_MEMCOMP_STATE mmcState = MOS_MEMCOMP_DISABLED;
168                     ENCODE_CHK_STATUS_RETURN(m_mmcState->GetSurfaceMmcState(const_cast<PMOS_SURFACE>(&m_refList[idx]->sRefReconBuffer), &mmcState));
169                     refSurfaceParams.refsMmcEnable |= (mmcState == MOS_MEMCOMP_RC || mmcState == MOS_MEMCOMP_MC) ? (1 << frameStoreId) : 0;
170                     refSurfaceParams.refsMmcType |= (mmcState == MOS_MEMCOMP_RC) ? (1 << frameStoreId) : 0;
171                     if (mmcState == MOS_MEMCOMP_RC || mmcState == MOS_MEMCOMP_MC)
172                     {
173                         ENCODE_CHK_STATUS_RETURN(m_mmcState->GetSurfaceMmcFormat(const_cast<PMOS_SURFACE>(&m_refList[idx]->sRefReconBuffer), &refSurfaceParams.dwCompressionFormat));
174                     }
175                 }
176             }
177         }
178     }
179 
180     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(cmdBuffer, &refSurfaceParams));
181 
182     return eStatus;
183 }
184 
AddHcpPictureStateCmd(MOS_COMMAND_BUFFER * cmdBuffer)185 MOS_STATUS CodechalEncHevcStateG12::AddHcpPictureStateCmd(MOS_COMMAND_BUFFER *cmdBuffer)
186 {
187     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
188 
189     MHW_VDBOX_HEVC_PIC_STATE_G12 picStateParams;
190 
191     SetHcpPicStateParams(picStateParams);
192 
193     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPicStateCmd(cmdBuffer, &picStateParams));
194 
195     return eStatus;
196 }
197 
UpdateYUY2SurfaceInfo(MOS_SURFACE & surface,bool is10Bit)198 MOS_STATUS CodechalEncHevcStateG12::UpdateYUY2SurfaceInfo(
199     MOS_SURFACE &surface,
200     bool         is10Bit)
201 {
202     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
203 
204     CODECHAL_ENCODE_FUNCTION_ENTER;
205 
206     if (surface.Format == Format_YUY2V)
207     {
208         // surface has been updated
209         return eStatus;
210     }
211 
212     if (surface.Format != Format_YUY2 &&
213         surface.Format != Format_Y210 &&
214         surface.Format != Format_Y216)
215     {
216         eStatus = MOS_STATUS_INVALID_PARAMETER;
217         return eStatus;
218     }
219 
220     if (surface.dwWidth < m_oriFrameWidth / 2 || surface.dwHeight < m_oriFrameHeight * 2)
221     {
222         eStatus = MOS_STATUS_INVALID_PARAMETER;
223         return eStatus;
224     }
225 
226     surface.Format   = is10Bit ? Format_Y216V : Format_YUY2V;
227     surface.dwWidth  = m_oriFrameWidth;
228     surface.dwHeight = m_oriFrameHeight;
229 
230     surface.YPlaneOffset.iSurfaceOffset = 0;
231     surface.YPlaneOffset.iXOffset       = 0;
232     surface.YPlaneOffset.iYOffset       = 0;
233 
234     surface.UPlaneOffset.iSurfaceOffset = surface.dwHeight * surface.dwPitch;
235     surface.UPlaneOffset.iXOffset       = 0;
236     surface.UPlaneOffset.iYOffset       = surface.dwHeight;
237 
238     surface.VPlaneOffset.iSurfaceOffset = surface.dwHeight * surface.dwPitch;
239     surface.VPlaneOffset.iXOffset       = 0;
240     surface.VPlaneOffset.iYOffset       = surface.dwHeight;
241 
242     return eStatus;
243 }
244 
InitializePicture(const EncoderParams & params)245 MOS_STATUS CodechalEncHevcStateG12::InitializePicture(const EncoderParams &params)
246 {
247     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
248 
249     CODECHAL_ENCODE_FUNCTION_ENTER;
250 
251     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::InitializePicture(params));
252 
253     if (m_resolutionChanged)
254     {
255         ResizeBufferOffset();
256     }
257 
258     m_sseEnabled = false;
259     // only 420 format support SSE output
260     // see TDR in scalability case, disable SSE for now before HW confirm the capability.
261     if (m_sseSupported &&
262         m_hevcSeqParams->chroma_format_idc == HCP_CHROMA_FORMAT_YUV420 &&
263         m_numPipe == 1)
264     {
265         m_sseEnabled = true;
266     }
267 
268     // for HEVC VME, HUC based WP is not supported.
269     m_hevcPicParams->bEnableGPUWeightedPrediction = false;
270 
271     m_pakPiplStrmOutEnable = m_sseEnabled || (m_brcEnabled && m_numPipe > 1);
272 
273     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData(m_tileParams, params.dwBitstreamSize));
274     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileStatistics());
275     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResourcesVariableSize());
276 
277     return eStatus;
278 }
279 
SetPictureStructs()280 MOS_STATUS CodechalEncHevcStateG12::SetPictureStructs()
281 {
282     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
283 
284     CODECHAL_ENCODE_FUNCTION_ENTER;
285 
286     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::SetPictureStructs());
287 
288     if (m_minMaxQpControlEnabled)
289     {
290         //if Min Max QP is on disable Frame Panic Mode
291         m_enableFramePanicMode = false;
292     }
293 
294     // This is an additional (the 5th) PAK pass for BRC panic mode. Enabled for the single pipe case only.
295     // Panic mode is not supported with Min/Max QP
296     if (m_brcEnabled && m_enableFramePanicMode && (false == m_hevcSeqParams->DisableHRDConformance) &&
297         (I_TYPE != m_hevcPicParams->CodingType) &&
298         (m_numPipe == 1))
299     {
300         m_numPasses++;
301     }
302 
303     m_virtualEngineBbIndex = m_currOriginalPic.FrameIdx;
304 
305     if ((uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_chromaFormat &&
306         (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat)
307     {
308         uint8_t currRefIdx = m_hevcPicParams->CurrReconstructedPic.FrameIdx;
309         UpdateYUY2SurfaceInfo(m_refList[currRefIdx]->sRefBuffer, m_is10BitHevc);
310 
311         if (m_pictureCodingType != I_TYPE)
312         {
313             for (uint32_t i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
314             {
315                 if (!m_picIdx[i].bValid || !m_currUsedRefPic[i])
316                 {
317                     continue;
318                 }
319                 uint8_t picIdx = m_picIdx[i].ucPicIdx;
320                 CODECHAL_ENCODE_ASSERT(picIdx < 127);
321 
322                 UpdateYUY2SurfaceInfo((m_refList[picIdx]->sRefBuffer), m_is10BitHevc);
323             }
324         }
325     }
326 
327     return eStatus;
328 }
329 
SetKernelParams(EncOperation encOperation,MHW_KERNEL_PARAM * kernelParams,uint32_t idx)330 MOS_STATUS CodechalEncHevcStateG12::SetKernelParams(
331     EncOperation      encOperation,
332     MHW_KERNEL_PARAM *kernelParams,
333     uint32_t          idx)
334 {
335     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
336 
337     kernelParams->iThreadCount = m_hwInterface->GetRenderInterface()->GetHwCaps()->dwMaxThreads;
338     kernelParams->iIdCount     = 1;
339 
340     uint32_t curbeAlignment = m_hwInterface->GetRenderInterface()->m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment();
341     switch (encOperation)
342     {
343     case ENC_MBENC:
344     {
345         switch (idx)
346         {
347         case MBENC_LCU32_KRNIDX:
348             kernelParams->iBTCount     = MBENC_B_FRAME_END - MBENC_B_FRAME_BEGIN;
349             kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MBENC_LCU32_BTI), (size_t)curbeAlignment);
350             kernelParams->iBlockWidth  = CODECHAL_HEVC_MAX_LCU_SIZE_G9;
351             kernelParams->iBlockHeight = CODECHAL_HEVC_MAX_LCU_SIZE_G9;
352             break;
353 
354         case MBENC_LCU64_KRNIDX:
355             kernelParams->iBTCount     = MBENC_B_FRAME_END - MBENC_B_FRAME_BEGIN;
356             kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MBENC_LCU64_BTI), (size_t)curbeAlignment);
357             kernelParams->iBlockWidth  = CODECHAL_HEVC_MAX_LCU_SIZE_G10;
358             kernelParams->iBlockHeight = CODECHAL_HEVC_MAX_LCU_SIZE_G10;
359             break;
360 
361         default:
362             CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported MBENC mode requested");
363             return MOS_STATUS_INVALID_PARAMETER;
364         }
365     }
366     break;
367 
368     case ENC_BRC:
369     {
370         switch (idx)
371         {
372         case CODECHAL_HEVC_BRC_INIT:
373         case CODECHAL_HEVC_BRC_RESET:
374             kernelParams->iBTCount     = BRC_INIT_RESET_END - BRC_INIT_RESET_BEGIN;
375             kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(BRC_INITRESET_CURBE), (size_t)curbeAlignment);
376             kernelParams->iBlockWidth  = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
377             kernelParams->iBlockHeight = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
378             break;
379 
380         case CODECHAL_HEVC_BRC_FRAME_UPDATE:
381             kernelParams->iBTCount     = BRC_UPDATE_END - BRC_UPDATE_BEGIN;
382             kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(BRCUPDATE_CURBE), (size_t)curbeAlignment);
383             kernelParams->iBlockWidth  = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
384             kernelParams->iBlockHeight = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
385             break;
386 
387         case CODECHAL_HEVC_BRC_LCU_UPDATE:
388             kernelParams->iBTCount     = BRC_LCU_UPDATE_END - BRC_LCU_UPDATE_BEGIN;
389             kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(BRCUPDATE_CURBE), (size_t)curbeAlignment);
390             kernelParams->iBlockWidth  = CODECHAL_HEVC_LCU_BRC_BLOCK_SIZE;
391             kernelParams->iBlockHeight = CODECHAL_HEVC_LCU_BRC_BLOCK_SIZE;
392             break;
393 
394         default:
395             CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested");
396             return MOS_STATUS_INVALID_PARAMETER;
397         }
398     }
399     break;
400 
401     default:
402         CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
403         return MOS_STATUS_INVALID_PARAMETER;
404     }
405 
406     return eStatus;
407 }
408 
SetBindingTable(EncOperation encOperation,PCODECHAL_ENCODE_BINDING_TABLE_GENERIC hevcEncBindingTable,uint32_t idx)409 MOS_STATUS CodechalEncHevcStateG12::SetBindingTable(
410     EncOperation                           encOperation,
411     PCODECHAL_ENCODE_BINDING_TABLE_GENERIC hevcEncBindingTable,
412     uint32_t                               idx)
413 {
414     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
415 
416     CODECHAL_ENCODE_CHK_NULL_RETURN(hevcEncBindingTable);
417 
418     MOS_ZeroMemory(hevcEncBindingTable, sizeof(*hevcEncBindingTable));
419 
420     switch (encOperation)
421     {
422     case ENC_MBENC:
423     {
424         switch (idx)
425         {
426         case MBENC_LCU32_KRNIDX:
427         case MBENC_LCU64_KRNIDX:
428             hevcEncBindingTable->dwNumBindingTableEntries  = MBENC_B_FRAME_END - MBENC_B_FRAME_BEGIN;
429             hevcEncBindingTable->dwBindingTableStartOffset = MBENC_B_FRAME_BEGIN;
430             break;
431 
432         default:
433             CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported MBENC mode requested");
434             return MOS_STATUS_INVALID_PARAMETER;
435         }
436     }
437     break;
438 
439     case ENC_BRC:
440     {
441         switch (idx)
442         {
443         case CODECHAL_HEVC_BRC_INIT:
444             hevcEncBindingTable->dwNumBindingTableEntries  = BRC_INIT_RESET_END - BRC_INIT_RESET_BEGIN;
445             hevcEncBindingTable->dwBindingTableStartOffset = BRC_INIT_RESET_BEGIN;
446             break;
447 
448         case CODECHAL_HEVC_BRC_RESET:
449             hevcEncBindingTable->dwNumBindingTableEntries  = BRC_INIT_RESET_END - BRC_INIT_RESET_BEGIN;
450             hevcEncBindingTable->dwBindingTableStartOffset = BRC_INIT_RESET_BEGIN;
451             break;
452 
453         case CODECHAL_HEVC_BRC_FRAME_UPDATE:
454             hevcEncBindingTable->dwNumBindingTableEntries  = BRC_UPDATE_END - BRC_UPDATE_BEGIN;
455             hevcEncBindingTable->dwBindingTableStartOffset = BRC_UPDATE_BEGIN;
456             break;
457 
458         case CODECHAL_HEVC_BRC_LCU_UPDATE:
459             hevcEncBindingTable->dwNumBindingTableEntries  = BRC_LCU_UPDATE_END - BRC_LCU_UPDATE_BEGIN;
460             hevcEncBindingTable->dwBindingTableStartOffset = BRC_LCU_UPDATE_BEGIN;
461             break;
462 
463         default:
464             CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested");
465             return MOS_STATUS_INVALID_PARAMETER;
466         }
467     }
468     break;
469 
470     default:
471         CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
472         return MOS_STATUS_INVALID_PARAMETER;
473     }
474 
475     for (uint32_t i = 0; i < hevcEncBindingTable->dwNumBindingTableEntries; i++)
476     {
477         hevcEncBindingTable->dwBindingTableEntries[i] = i;
478     }
479 
480     return eStatus;
481 }
482 
AllocateEncResources()483 MOS_STATUS CodechalEncHevcStateG12::AllocateEncResources()
484 {
485     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
486 
487     CODECHAL_ENCODE_FUNCTION_ENTER;
488 
489     // Surfaces used by I & B Kernels
490     uint32_t width = 0, height = 0;
491     uint32_t size = 0;
492 
493     MEDIA_WA_TABLE* waTable = m_osInterface->pfnGetWaTable(m_osInterface);
494     uint32_t memType = (MEDIA_IS_WA(waTable, WaForceAllocateLML4)) ? MOS_MEMPOOL_DEVICEMEMORY : 0;
495 
496     if (!m_useMdf)
497     {
498         // Intermediate CU Record surface
499         if (Mos_ResourceIsNull(&m_intermediateCuRecordSurfaceLcu32.OsResource))
500         {
501             width  = m_widthAlignedLcu32;
502             height = m_heightAlignedLcu32 >> 1;
503 
504             CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
505                 &m_intermediateCuRecordSurfaceLcu32,
506                 width,
507                 height,
508                 "Intermediate CU record surface",
509                 MOS_TILE_Y));
510         }
511 
512         // Scratch Surface for I-kernel
513         if (Mos_ResourceIsNull(&m_scratchSurface.OsResource))
514         {
515             width  = m_widthAlignedLcu32 >> 3;
516             height = m_heightAlignedLcu32 >> 5;
517 
518             CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
519                 &m_scratchSurface,
520                 width,
521                 height,
522                 "Scratch surface for I and B Kernels"));
523         }
524 
525         // CU based QP surface
526         if (Mos_ResourceIsNull(&m_16x16QpInputData.OsResource))
527         {
528             width  = MOS_ALIGN_CEIL(m_picWidthInMb, 64);
529             height = MOS_ALIGN_CEIL(m_picHeightInMb, 64);
530 
531             CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
532                 &m_16x16QpInputData,
533                 width,
534                 height,
535                 "16x16 QP Data Input surface"));
536         }
537 
538         // Surfaces used by B Kernels
539         // Enc constant table for B LCU32
540         if (Mos_ResourceIsNull(&m_encConstantTableForB.sResource))
541         {
542             size = m_encConstantDataLutSize;
543 
544             CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
545                 &m_encConstantTableForB,
546                 size,
547                 "Enc Constant Table surface For LCU32/LCU64"));
548         }
549 
550         //Debug surface
551         for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_debugSurface); i++)
552         {
553             if (Mos_ResourceIsNull(&m_debugSurface[i].sResource))
554             {
555                 size = m_debugSurfaceSize;
556 
557                 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
558                     &m_debugSurface[i],
559                     size,
560                     "Kernel debug surface"));
561             }
562         }
563     }
564 
565     // LCU Level Input Data
566     for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_lcuLevelInputDataSurface); i++)
567     {
568         if (Mos_ResourceIsNull(&m_lcuLevelInputDataSurface[i].OsResource))
569         {
570             width  = 16 * ((m_widthAlignedMaxLcu >> 6) << 1);
571             height = ((m_heightAlignedMaxLcu >> 6) << 1);
572 
573             CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
574                 &m_lcuLevelInputDataSurface[i],
575                 width,
576                 height,
577                 "Lcu Level Data Input surface",
578                 MOS_TILE_LINEAR));
579         }
580     }
581 
582     m_brcInputForEncKernelBuffer = nullptr;
583 
584     //Current Picture Y with Reconstructed boundary pixels
585     if (Mos_ResourceIsNull(&m_currPicWithReconBoundaryPix.OsResource))
586     {
587         width  = m_widthAlignedLcu32;
588         height = m_heightAlignedLcu32;
589 
590         if (m_isMaxLcu64)
591         {
592             width  = m_widthAlignedMaxLcu;
593             height = m_heightAlignedMaxLcu;
594         }
595 
596         uint32_t aligned_height = (uint32_t) (height * m_alignReconFactor);
597         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurface(
598             &m_currPicWithReconBoundaryPix,
599             width,
600             aligned_height,
601             "Current Picture Y with Reconstructed Boundary Pixels surface",
602             memType));
603     }
604 
605     // Encoder History Input Surface
606     if (Mos_ResourceIsNull(&m_encoderHistoryInputBuffer.OsResource))
607     {
608         width  = 32 * ((m_widthAlignedMaxLcu >> 6) << 1);
609         height = ((m_heightAlignedMaxLcu >> 6) << 1);
610 
611         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
612             &m_encoderHistoryInputBuffer,
613             width,
614             height,
615             "Encoder History Input surface"));
616     }
617 
618     // Encoder History Output Surface
619     if (Mos_ResourceIsNull(&m_encoderHistoryOutputBuffer.OsResource))
620     {
621         width  = 32 * ((m_widthAlignedMaxLcu >> 6) << 1);
622         height = ((m_heightAlignedMaxLcu >> 6) << 1);
623 
624         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
625             &m_encoderHistoryOutputBuffer,
626             width,
627             height,
628             "Encoder History Output surface"));
629     }
630 
631     if (m_hmeSupported && !m_useMdf)
632     {
633         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->AllocateResources());
634         // BRC Distortion surface
635         if (Mos_ResourceIsNull(&m_brcBuffers.sMeBrcDistortionBuffer.OsResource))
636         {
637             width  = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x << 3), 64);
638             height = MOS_ALIGN_CEIL((m_downscaledHeightInMb4x << 2), 8) << 1;
639 
640             CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
641                 &m_brcBuffers.sMeBrcDistortionBuffer,
642                 width,
643                 height,
644                 "Brc Distortion surface Buffer"));
645         }
646 
647         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateMeResources());
648     }
649 
650     for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer1); i++)
651     {
652         if (Mos_ResourceIsNull(&m_encBCombinedBuffer1[i].sResource))
653         {
654             size = sizeof(MBENC_COMBINED_BUFFER1);
655 
656             CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
657                 &m_encBCombinedBuffer1[i],
658                 size,
659                 "Enc B combined buffer1"));
660 
661             MOS_LOCK_PARAMS lockFlags;
662             MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
663             lockFlags.WriteOnly = 1;
664             uint8_t *data       = (uint8_t *)m_osInterface->pfnLockResource(
665                 m_osInterface,
666                 &m_encBCombinedBuffer1[i].sResource,
667                 &lockFlags);
668             CODECHAL_ENCODE_CHK_NULL_RETURN(data);
669 
670             MOS_ZeroMemory(data, size);
671 
672             m_osInterface->pfnUnlockResource(
673                 m_osInterface,
674                 &m_encBCombinedBuffer1[i].sResource);
675         }
676     }
677 
678     for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer2); i++)
679     {
680         if (Mos_ResourceIsNull(&m_encBCombinedBuffer2[i].sResource))
681         {
682             uint32_t               numLcu64 = m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 64 / 64;
683             MBENC_COMBINED_BUFFER2 fixedBuf;
684 
685             m_historyOutBufferSize = MOS_ALIGN_CEIL(32 * numLcu64, CODECHAL_CACHELINE_SIZE);
686             m_threadTaskBufferSize = MOS_ALIGN_CEIL(96 * numLcu64, CODECHAL_CACHELINE_SIZE);
687 
688             size = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE) + m_historyOutBufferSize + m_threadTaskBufferSize;
689 
690             m_historyOutBufferOffset = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE);
691             m_threadTaskBufferOffset = m_historyOutBufferOffset + m_historyOutBufferSize;
692 
693             CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
694                 &m_encBCombinedBuffer2[i],
695                 size,
696                 "Enc B combined buffer2"));
697 
698             MOS_LOCK_PARAMS lockFlags;
699             MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
700             lockFlags.WriteOnly = 1;
701             uint8_t *data       = (uint8_t *)m_osInterface->pfnLockResource(
702                 m_osInterface,
703                 &m_encBCombinedBuffer2[i].sResource,
704                 &lockFlags);
705             CODECHAL_ENCODE_CHK_NULL_RETURN(data);
706 
707             MOS_ZeroMemory(data, size);
708 
709             m_osInterface->pfnUnlockResource(
710                 m_osInterface,
711                 &m_encBCombinedBuffer2[i].sResource);
712         }
713     }
714 
715     return eStatus;
716 }
717 
FreeEncResources()718 MOS_STATUS CodechalEncHevcStateG12::FreeEncResources()
719 {
720     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
721 
722     CODECHAL_ENCODE_FUNCTION_ENTER;
723 
724     MOS_DeleteArray(m_mbEncKernelStates);
725     m_mbEncKernelStates = nullptr;
726     MOS_FreeMemory(m_mbEncKernelBindingTable);
727     m_mbEncKernelBindingTable = nullptr;
728 
729     MOS_DeleteArray(m_brcKernelStates);
730     m_brcKernelStates = nullptr;
731     MOS_FreeMemory(m_brcKernelBindingTable);
732     m_brcKernelBindingTable = nullptr;
733 
734     HmeParams hmeParams;
735     MOS_ZeroMemory(&hmeParams, sizeof(hmeParams));
736     hmeParams.presMvAndDistortionSumSurface = &m_mvAndDistortionSumSurface.sResource;
737     CODECHAL_ENCODE_CHK_STATUS_RETURN(DestroyMEResources(&hmeParams));
738 
739     // Surfaces used by I kernel
740     // Release Intermediate CU Record Surface
741     m_osInterface->pfnFreeResource(
742         m_osInterface,
743         &m_intermediateCuRecordSurfaceLcu32.OsResource);
744 
745     // Release Scratch Surface for I-kernel
746     m_osInterface->pfnFreeResource(
747         m_osInterface,
748         &m_scratchSurface.OsResource);
749 
750     // Release CU based QP surface
751     m_osInterface->pfnFreeResource(
752         m_osInterface,
753         &m_16x16QpInputData.OsResource);
754 
755     // Release LCU Level Input Data
756     for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_lcuLevelInputDataSurface); i++)
757     {
758         m_osInterface->pfnFreeResource(
759             m_osInterface,
760             &m_lcuLevelInputDataSurface[i].OsResource);
761     }
762 
763     // Release Current Picture Y with Reconstructed boundary pixels surface
764     m_osInterface->pfnFreeResource(
765         m_osInterface,
766         &m_currPicWithReconBoundaryPix.OsResource);
767 
768     // Release Encoder History Input Data
769     m_osInterface->pfnFreeResource(
770         m_osInterface,
771         &m_encoderHistoryInputBuffer.OsResource);
772 
773     // Release Encoder History Output Data
774     m_osInterface->pfnFreeResource(
775         m_osInterface,
776         &m_encoderHistoryOutputBuffer.OsResource);
777 
778     // Release Debug surface
779     for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_debugSurface); i++)
780     {
781         m_osInterface->pfnFreeResource(
782             m_osInterface,
783             &m_debugSurface[i].sResource);
784     }
785 
786     // Surfaces used by B Kernels
787     // Enc constant table for B LCU32
788     m_osInterface->pfnFreeResource(
789         m_osInterface,
790         &m_encConstantTableForB.sResource);
791 
792     CODECHAL_ENCODE_CHK_STATUS_RETURN(FreeMeResources());
793 
794     for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer1); i++)
795     {
796         m_osInterface->pfnFreeResource(
797             m_osInterface,
798             &m_encBCombinedBuffer1[i].sResource);
799     }
800 
801     for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer2); i++)
802     {
803         m_osInterface->pfnFreeResource(
804             m_osInterface,
805             &m_encBCombinedBuffer2[i].sResource);
806     }
807 
808     if (m_swScoreboard)
809     {
810         MOS_FreeMemory(m_swScoreboard);
811         m_swScoreboard = nullptr;
812     }
813 
814     if (m_numDelay)
815     {
816         m_osInterface->pfnFreeResource(m_osInterface, &m_resDelayMinus);
817     }
818 
819     return eStatus;
820 }
821 
AllocateMeResources()822 MOS_STATUS CodechalEncHevcStateG12::AllocateMeResources()
823 {
824     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
825 
826     CODECHAL_ENCODE_FUNCTION_ENTER;
827 
828     // Mv and Distortion Summation Surface
829     if (Mos_ResourceIsNull(&m_mvAndDistortionSumSurface.sResource))
830     {
831         uint32_t size = m_mvdistSummationSurfSize;
832 
833         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
834             &m_mvAndDistortionSumSurface,
835             size,
836             "Mv and Distortion Summation surface"));
837 
838         // Initialize the surface to zero for now till HME is updated to output the data into this surface
839         MOS_LOCK_PARAMS lockFlags;
840         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
841         lockFlags.WriteOnly = 1;
842         uint8_t *data       = (uint8_t *)m_osInterface->pfnLockResource(
843             m_osInterface,
844             &m_mvAndDistortionSumSurface.sResource,
845             &lockFlags);
846         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
847 
848         MOS_ZeroMemory(data, size);
849 
850         m_osInterface->pfnUnlockResource(
851             m_osInterface,
852             &m_mvAndDistortionSumSurface.sResource);
853     }
854 
855     return eStatus;
856 }
857 
FreeMeResources()858 MOS_STATUS CodechalEncHevcStateG12::FreeMeResources()
859 {
860     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
861 
862     CODECHAL_ENCODE_FUNCTION_ENTER;
863 
864     m_osInterface->pfnFreeResource(
865         m_osInterface,
866         &m_brcBuffers.sMeBrcDistortionBuffer.OsResource);
867 
868     return eStatus;
869 }
870 
AllocatePakResources()871 MOS_STATUS CodechalEncHevcStateG12::AllocatePakResources()
872 {
873     CODECHAL_ENCODE_FUNCTION_ENTER;
874 
875     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
876 
877     uint32_t mvt_size        = MOS_ALIGN_CEIL(((m_frameWidth + 63) >> 6) * ((m_frameHeight + 15) >> 4), 2) * CODECHAL_CACHELINE_SIZE;
878     uint32_t mvtb_size       = MOS_ALIGN_CEIL(((m_frameWidth + 31) >> 5) * ((m_frameHeight + 31) >> 5), 2) * CODECHAL_CACHELINE_SIZE;
879     m_sizeOfMvTemporalBuffer = MOS_MAX(mvt_size, mvtb_size);
880 
881     const uint32_t minLcuSize        = 16;
882     const uint32_t picWidthInMinLCU  = MOS_ROUNDUP_DIVIDE(m_frameWidth, minLcuSize);   //assume smallest LCU to get max width
883     const uint32_t picHeightInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameHeight, minLcuSize);  //assume smallest LCU to get max height
884 
885     MHW_VDBOX_HCP_BUFFER_SIZE_PARAMS hcpBufSizeParam;
886     MOS_ZeroMemory(&hcpBufSizeParam, sizeof(hcpBufSizeParam));
887     hcpBufSizeParam.ucMaxBitDepth  = m_bitDepth;
888     hcpBufSizeParam.ucChromaFormat = m_chromaFormat;
889     // We should move the buffer allocation to picture level if the size is dependent on LCU size
890     hcpBufSizeParam.dwCtbLog2SizeY = 6;  //assume Max LCU size
891     hcpBufSizeParam.dwPicWidth     = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE);
892     hcpBufSizeParam.dwPicHeight    = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE);
893 
894     MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
895     MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
896     allocParamsForBufferLinear.Type     = MOS_GFXRES_BUFFER;
897     allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
898     allocParamsForBufferLinear.Format   = Format_Buffer;
899 
900     // Deblocking Filter Row Store Scratch data surface
901     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
902         MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_LINE,
903         &hcpBufSizeParam);
904 
905     if (eStatus != MOS_STATUS_SUCCESS)
906     {
907         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Row Store Scratch Buffer.");
908         return eStatus;
909     }
910 
911     allocParamsForBufferLinear.dwBytes  = hcpBufSizeParam.dwBufferSize;
912     allocParamsForBufferLinear.pBufName = "DeblockingScratchBuffer";
913 
914     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
915         m_osInterface,
916         &allocParamsForBufferLinear,
917         &m_resDeblockingFilterRowStoreScratchBuffer);
918 
919     if (eStatus != MOS_STATUS_SUCCESS)
920     {
921         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Row Store Scratch Buffer.");
922         return eStatus;
923     }
924 
925     // Deblocking Filter Tile Row Store Scratch data surface
926     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
927         MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_LINE,
928         &hcpBufSizeParam);
929 
930     if (eStatus != MOS_STATUS_SUCCESS)
931     {
932         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Row Store Scratch Buffer.");
933         return eStatus;
934     }
935 
936     allocParamsForBufferLinear.dwBytes  = hcpBufSizeParam.dwBufferSize;
937     allocParamsForBufferLinear.pBufName = "DeblockingTileRowScratchBuffer";
938 
939     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
940         m_osInterface,
941         &allocParamsForBufferLinear,
942         &m_resDeblockingFilterTileRowStoreScratchBuffer);
943 
944     if (eStatus != MOS_STATUS_SUCCESS)
945     {
946         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Row Store Scratch Buffer.");
947         return eStatus;
948     }
949 
950     // Deblocking Filter Column Row Store Scratch data surface
951     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
952         MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_COL,
953         &hcpBufSizeParam);
954 
955     if (eStatus != MOS_STATUS_SUCCESS)
956     {
957         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Column Store Scratch Buffer.");
958         return eStatus;
959     }
960 
961     allocParamsForBufferLinear.dwBytes  = hcpBufSizeParam.dwBufferSize;
962     allocParamsForBufferLinear.pBufName = "DeblockingColumnScratchBuffer";
963 
964     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
965         m_osInterface,
966         &allocParamsForBufferLinear,
967         &m_resDeblockingFilterColumnRowStoreScratchBuffer);
968 
969     if (eStatus != MOS_STATUS_SUCCESS)
970     {
971         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Column Row Store Scratch Buffer.");
972         return eStatus;
973     }
974 
975     // Metadata Line buffer
976     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
977         MHW_VDBOX_HCP_INTERNAL_BUFFER_META_LINE,
978         &hcpBufSizeParam);
979 
980     if (eStatus != MOS_STATUS_SUCCESS)
981     {
982         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Line Buffer.");
983         return eStatus;
984     }
985 
986     allocParamsForBufferLinear.dwBytes  = hcpBufSizeParam.dwBufferSize;
987     allocParamsForBufferLinear.pBufName = "MetadataLineBuffer";
988 
989     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
990         m_osInterface,
991         &allocParamsForBufferLinear,
992         &m_resMetadataLineBuffer);
993 
994     if (eStatus != MOS_STATUS_SUCCESS)
995     {
996         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Line Buffer.");
997         return eStatus;
998     }
999 
1000     // Metadata Tile Line buffer
1001     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
1002         MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_LINE,
1003         &hcpBufSizeParam);
1004 
1005     if (eStatus != MOS_STATUS_SUCCESS)
1006     {
1007         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Line Buffer.");
1008         return eStatus;
1009     }
1010 
1011     allocParamsForBufferLinear.dwBytes  = hcpBufSizeParam.dwBufferSize;
1012     allocParamsForBufferLinear.pBufName = "MetadataTileLineBuffer";
1013 
1014     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1015         m_osInterface,
1016         &allocParamsForBufferLinear,
1017         &m_resMetadataTileLineBuffer);
1018 
1019     if (eStatus != MOS_STATUS_SUCCESS)
1020     {
1021         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Line Buffer.");
1022         return eStatus;
1023     }
1024 
1025     // Metadata Tile Column buffer
1026     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
1027         MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_COL,
1028         &hcpBufSizeParam);
1029 
1030     if (eStatus != MOS_STATUS_SUCCESS)
1031     {
1032         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Column Buffer.");
1033         return eStatus;
1034     }
1035 
1036     allocParamsForBufferLinear.dwBytes  = hcpBufSizeParam.dwBufferSize;
1037     allocParamsForBufferLinear.pBufName = "MetadataTileColumnBuffer";
1038 
1039     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1040         m_osInterface,
1041         &allocParamsForBufferLinear,
1042         &m_resMetadataTileColumnBuffer);
1043 
1044     if (eStatus != MOS_STATUS_SUCCESS)
1045     {
1046         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Column Buffer.");
1047         return eStatus;
1048     }
1049 
1050     // SAO Line buffer
1051     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
1052         MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_LINE,
1053         &hcpBufSizeParam);
1054 
1055     if (eStatus != MOS_STATUS_SUCCESS)
1056     {
1057         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Line Buffer.");
1058         return eStatus;
1059     }
1060 
1061     allocParamsForBufferLinear.dwBytes  = hcpBufSizeParam.dwBufferSize;
1062     allocParamsForBufferLinear.pBufName = "SaoLineBuffer";
1063 
1064     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1065         m_osInterface,
1066         &allocParamsForBufferLinear,
1067         &m_resSaoLineBuffer);
1068 
1069     if (eStatus != MOS_STATUS_SUCCESS)
1070     {
1071         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Line Buffer.");
1072         return eStatus;
1073     }
1074 
1075     // SAO Tile Line buffer
1076     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
1077         MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_LINE,
1078         &hcpBufSizeParam);
1079 
1080     if (eStatus != MOS_STATUS_SUCCESS)
1081     {
1082         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Line Buffer.");
1083         return eStatus;
1084     }
1085 
1086     allocParamsForBufferLinear.dwBytes  = hcpBufSizeParam.dwBufferSize;
1087     allocParamsForBufferLinear.pBufName = "SaoTileLineBuffer";
1088 
1089     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1090         m_osInterface,
1091         &allocParamsForBufferLinear,
1092         &m_resSaoTileLineBuffer);
1093 
1094     if (eStatus != MOS_STATUS_SUCCESS)
1095     {
1096         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Line Buffer.");
1097         return eStatus;
1098     }
1099 
1100     // SAO Tile Column buffer
1101     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
1102         MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_COL,
1103         &hcpBufSizeParam);
1104 
1105     if (eStatus != MOS_STATUS_SUCCESS)
1106     {
1107         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Column Buffer.");
1108         return eStatus;
1109     }
1110 
1111     allocParamsForBufferLinear.dwBytes  = hcpBufSizeParam.dwBufferSize;
1112     allocParamsForBufferLinear.pBufName = "SaoTileColumnBuffer";
1113 
1114     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1115         m_osInterface,
1116         &allocParamsForBufferLinear,
1117         &m_resSaoTileColumnBuffer);
1118 
1119     if (eStatus != MOS_STATUS_SUCCESS)
1120     {
1121         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Column Buffer.");
1122         return eStatus;
1123     }
1124 
1125     // Lcu ILDB StreamOut buffer
1126     // Allocate the buffer size
1127     // This is not enabled with HCP_PIPE_MODE_SELECT yet, placeholder here
1128     allocParamsForBufferLinear.dwBytes  = CODECHAL_CACHELINE_SIZE;
1129     allocParamsForBufferLinear.pBufName = "LcuILDBStreamOutBuffer";
1130 
1131     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1132         m_osInterface,
1133         &allocParamsForBufferLinear,
1134         &m_resLcuIldbStreamOutBuffer);
1135 
1136     if (eStatus != MOS_STATUS_SUCCESS)
1137     {
1138         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU ILDB StreamOut Buffer.");
1139         return eStatus;
1140     }
1141 
1142     // Lcu Base Address buffer
1143     // HEVC Encoder Mode: Slice size is written to this buffer when slice size conformance is enabled.
1144     // 1 CL (= 16 DWs = 64 bytes) per slice * Maximum number of slices in a frame.
1145     // Align to page for HUC requirement
1146     uint32_t maxLcu                     = picWidthInMinLCU * picHeightInMinLCU;
1147     allocParamsForBufferLinear.dwBytes  = MOS_ALIGN_CEIL(maxLcu * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
1148     allocParamsForBufferLinear.pBufName = "LcuBaseAddressBuffer";
1149 
1150     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1151         m_osInterface,
1152         &allocParamsForBufferLinear,
1153         &m_resLcuBaseAddressBuffer);
1154 
1155     if (eStatus != MOS_STATUS_SUCCESS)
1156     {
1157         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU Base Address Buffer.");
1158         return eStatus;
1159     }
1160 
1161     // SAO StreamOut buffer
1162     // size = MOS_ALIGN_CEIL(picWidthInMinLCU, 4) * 16
1163     uint32_t size = MOS_ALIGN_CEIL(picWidthInMinLCU, 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU;
1164     //extra added size to cover tile enabled case, per tile width aligned to 4.  20: max tile column No.
1165     size += 3 * 20 * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU;
1166     allocParamsForBufferLinear.dwBytes  = size;
1167     allocParamsForBufferLinear.pBufName = "SaoStreamOutBuffer";
1168 
1169     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1170         m_osInterface,
1171         &allocParamsForBufferLinear,
1172         &m_resSaoStreamOutBuffer);
1173 
1174     if (eStatus != MOS_STATUS_SUCCESS)
1175     {
1176         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO StreamOut Buffer.");
1177         return eStatus;
1178     }
1179 
1180     uint32_t maxTileNumber = (MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE) / CODECHAL_HEVC_MIN_TILE_SIZE) *
1181                              (MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_HEVC_MIN_TILE_SIZE) / CODECHAL_HEVC_MIN_TILE_SIZE);
1182 
1183     MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1184     allocParamsForBufferLinear.Type     = MOS_GFXRES_BUFFER;
1185     allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1186     allocParamsForBufferLinear.Format   = Format_Buffer;
1187 
1188     // Allocate Frame Statistics Streamout Data Destination Buffer. DW98-100 in HCP pipe buffer address command
1189     allocParamsForBufferLinear.dwBytes  = m_sizeOfHcpPakFrameStats * maxTileNumber;  //Each tile has 8 cache size bytes of data
1190     allocParamsForBufferLinear.pBufName = "FrameStatStreamOutBuffer";
1191 
1192     CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
1193         m_osInterface,
1194         &allocParamsForBufferLinear,
1195         &m_resFrameStatStreamOutBuffer));
1196 
1197     // PAK CU Level Streamout Data:   DW57-59 in HCP pipe buffer address command
1198     // One CU has 16-byte. But, each tile needs to be aliged to the cache line
1199     uint32_t frameWidthInCus            = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MIN_CU_SIZE);
1200     uint32_t frameHeightInCus           = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MIN_CU_SIZE);
1201     size                                = MOS_ALIGN_CEIL(frameWidthInCus * frameHeightInCus * 16, CODECHAL_CACHELINE_SIZE);
1202     allocParamsForBufferLinear.dwBytes  = size;
1203     allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
1204 
1205     CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
1206         m_osInterface,
1207         &allocParamsForBufferLinear,
1208         &m_resPakcuLevelStreamoutData.sResource));
1209     m_resPakcuLevelStreamoutData.dwSize = size;
1210     CODECHAL_ENCODE_VERBOSEMESSAGE("first allocate cu steam out buffer, size=0x%x.\n", size);
1211 
1212     // Allocate SSE Source Pixel Row Store Buffer. Implementation for each tile column is shown as below:
1213     //   tileWidthInLCU = ((tileWidthInLCU+3) * BYTES_PER_CACHE_LINE)*(4+4) ; tileWidthInLCU <<= 1; // double the size as RTL treats it as 10 bit data
1214     // Here, we consider each LCU column is one tile column.
1215 
1216     m_sizeOfSseSrcPixelRowStoreBufferPerLcu = (CODECHAL_CACHELINE_SIZE * (4 + 4)) << 1;                          //size per LCU plus 10-bit
1217     size                                    = m_sizeOfSseSrcPixelRowStoreBufferPerLcu * (picWidthInMinLCU + 3);  // already aligned to cacheline size
1218     allocParamsForBufferLinear.dwBytes      = size;
1219     allocParamsForBufferLinear.pBufName     = "SseSrcPixelRowStoreBuffer";
1220 
1221     CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
1222         m_osInterface,
1223         &allocParamsForBufferLinear,
1224         &m_resSseSrcPixelRowStoreBuffer));
1225 
1226     // SAO Row Store buffer, HSAO
1227     // Aligned to 4 for each tile column
1228     uint32_t maxTileColumn              = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE);
1229     allocParamsForBufferLinear.dwBytes  = MOS_ALIGN_CEIL(picWidthInMinLCU + 3 * maxTileColumn, 4) * 16;
1230     allocParamsForBufferLinear.pBufName = "SaoRowStoreBuffer";
1231 
1232     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1233         m_osInterface,
1234         &allocParamsForBufferLinear,
1235         &m_SAORowStoreBuffer);
1236 
1237     if (eStatus != MOS_STATUS_SUCCESS)
1238     {
1239         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO row store Buffer.");
1240         return eStatus;
1241     }
1242 
1243     //HCP scalability Sync buffer
1244     size                                = CODECHAL_HEVC_MAX_NUM_HCP_PIPE * CODECHAL_CACHELINE_SIZE;
1245     allocParamsForBufferLinear.dwBytes  = size;
1246     allocParamsForBufferLinear.pBufName = "GEN12 Hcp scalability Sync buffer ";
1247 
1248     CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
1249         m_osInterface,
1250         &allocParamsForBufferLinear,
1251         &m_resHcpScalabilitySyncBuffer.sResource));
1252     m_resHcpScalabilitySyncBuffer.dwSize = size;
1253 
1254     // create the tile coding state parameters
1255     m_tileParams = (PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12)MOS_AllocAndZeroMemory(sizeof(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12) * maxTileNumber);
1256 
1257     if (m_enableHWSemaphore)
1258     {
1259         // Create the HW sync objects which will be used by each reference frame and BRC in GEN12
1260         allocParamsForBufferLinear.dwBytes  = sizeof(uint32_t);
1261         allocParamsForBufferLinear.pBufName = "SemaphoreMemory";
1262 
1263         MOS_LOCK_PARAMS lockFlagsWriteOnly;
1264         MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1265         lockFlagsWriteOnly.WriteOnly = 1;
1266 
1267         for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resBrcSemaphoreMem); i++)
1268         {
1269             eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1270                 m_osInterface,
1271                 &allocParamsForBufferLinear,
1272                 &m_resBrcSemaphoreMem[i].sResource);
1273             m_resBrcSemaphoreMem[i].dwSize = allocParamsForBufferLinear.dwBytes;
1274             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Cannot create BRC HW Semaphore Memory.");
1275 
1276             uint32_t *data = (uint32_t *)m_osInterface->pfnLockResource(
1277                 m_osInterface,
1278                 &m_resBrcSemaphoreMem[i].sResource,
1279                 &lockFlagsWriteOnly);
1280 
1281             CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1282 
1283             *data = 1;
1284 
1285             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1286                 m_osInterface,
1287                 &m_resBrcSemaphoreMem[i].sResource));
1288         }
1289 
1290         eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1291             m_osInterface,
1292             &allocParamsForBufferLinear,
1293             &m_resPipeStartSemaMem);
1294         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Cannot create Scalability pipe start sync HW semaphore.");
1295 
1296         uint32_t *data = (uint32_t *)m_osInterface->pfnLockResource(
1297             m_osInterface,
1298             &m_resPipeStartSemaMem,
1299             &lockFlagsWriteOnly);
1300 
1301         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1302         *data = 0;
1303         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1304             m_osInterface,
1305             &m_resPipeStartSemaMem));
1306 
1307         eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1308             m_osInterface,
1309             &allocParamsForBufferLinear,
1310             &m_resPipeCompleteSemaMem);
1311         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Cannot create Scalability pipe completion sync HW semaphore.");
1312 
1313         data = (uint32_t *)m_osInterface->pfnLockResource(
1314             m_osInterface,
1315             &m_resPipeCompleteSemaMem,
1316             &lockFlagsWriteOnly);
1317 
1318         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1319         *data = 0;
1320         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1321             m_osInterface,
1322             &m_resPipeCompleteSemaMem));
1323     }
1324 
1325     if (m_hucPakStitchEnabled)
1326     {
1327         if (Mos_ResourceIsNull(&m_resHucStatus2Buffer))
1328         {
1329             // HUC STATUS 2 Buffer for HuC status check in COND_BB_END
1330             allocParamsForBufferLinear.dwBytes  = sizeof(uint64_t);
1331             allocParamsForBufferLinear.pBufName = "HUC STATUS 2 Buffer";
1332 
1333             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1334                 m_osInterface->pfnAllocateResource(
1335                     m_osInterface,
1336                     &allocParamsForBufferLinear,
1337                     &m_resHucStatus2Buffer),
1338                 "%s: Failed to allocate HUC STATUS 2 Buffer\n",
1339                 __FUNCTION__);
1340         }
1341 
1342         uint8_t *data;
1343 
1344         // Pak stitch DMEM
1345         allocParamsForBufferLinear.dwBytes  = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemEncG12), CODECHAL_CACHELINE_SIZE);
1346         allocParamsForBufferLinear.pBufName = "PAK Stitch Dmem Buffer";
1347         auto numOfPasses                    = CODECHAL_DP_MAX_NUM_BRC_PASSES;
1348         for (auto j = 0; j < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; j++)
1349         {
1350             for (auto i = 0; i < numOfPasses; i++)
1351             {
1352                 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1353                     m_osInterface,
1354                     &allocParamsForBufferLinear,
1355                     &m_resHucPakStitchDmemBuffer[j][i]);
1356 
1357                 if (eStatus != MOS_STATUS_SUCCESS)
1358                 {
1359                     CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate PAK Stitch Dmem Buffer.");
1360                     return eStatus;
1361                 }
1362             }
1363         }
1364         // BRC Data Buffer
1365         allocParamsForBufferLinear.dwBytes  = MOS_ALIGN_CEIL(CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
1366         allocParamsForBufferLinear.pBufName = "BRC Data Buffer";
1367 
1368         eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1369             m_osInterface,
1370             &allocParamsForBufferLinear,
1371             &m_resBrcDataBuffer);
1372 
1373         if (eStatus != MOS_STATUS_SUCCESS)
1374         {
1375             CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate BRC Data Buffer Buffer.");
1376             return eStatus;
1377         }
1378 
1379         MOS_LOCK_PARAMS lockFlags;
1380         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1381         lockFlags.WriteOnly = 1;
1382 
1383         data = (uint8_t *)m_osInterface->pfnLockResource(
1384             m_osInterface,
1385             &m_resBrcDataBuffer,
1386             &lockFlags);
1387 
1388         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1389 
1390         MOS_ZeroMemory(
1391             data,
1392             allocParamsForBufferLinear.dwBytes);
1393 
1394         m_osInterface->pfnUnlockResource(m_osInterface, &m_resBrcDataBuffer);
1395         for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
1396         {
1397             for (auto j = 0; j < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; j++)
1398             {
1399                 // HuC stitching Data buffer
1400                 allocParamsForBufferLinear.dwBytes  = MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE);
1401                 allocParamsForBufferLinear.pBufName = "HEVC HuC Stitch Data Buffer";
1402                 CODECHAL_ENCODE_CHK_STATUS_RETURN(
1403                     m_osInterface->pfnAllocateResource(
1404                         m_osInterface,
1405                         &allocParamsForBufferLinear,
1406                         &m_resHucStitchDataBuffer[i][j]));
1407 
1408                 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1409                 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1410                 lockFlagsWriteOnly.WriteOnly = 1;
1411 
1412                 uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource(
1413                     m_osInterface,
1414                     &m_resHucStitchDataBuffer[i][j],
1415                     &lockFlagsWriteOnly);
1416                 CODECHAL_ENCODE_CHK_NULL_RETURN(pData);
1417                 MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes);
1418                 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[i][j]);
1419             }
1420         }
1421 
1422         //Second level BB for huc stitching cmd
1423         MOS_ZeroMemory(&m_HucStitchCmdBatchBuffer, sizeof(m_HucStitchCmdBatchBuffer));
1424         m_HucStitchCmdBatchBuffer.bSecondLevel = true;
1425         CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
1426             m_osInterface,
1427             &m_HucStitchCmdBatchBuffer,
1428             nullptr,
1429             m_hwInterface->m_HucStitchCmdBatchBufferSize));
1430     }
1431 
1432     // Pak obj and CU records for skip frame
1433     uint32_t mbCodeSize = m_mbCodeSize + 8 * CODECHAL_CACHELINE_SIZE;  // Must reserve at least 8 cachelines after MI_BATCH_BUFFER_END_CMD since HW prefetch max 8 cachelines from BB everytime
1434 
1435     MOS_ALLOC_GFXRES_PARAMS allocParams;
1436     MOS_ZeroMemory(&allocParams, sizeof(allocParams));
1437     allocParams.Type     = MOS_GFXRES_BUFFER;
1438     allocParams.Format   = Format_Buffer;
1439     allocParams.TileType = MOS_TILE_LINEAR;
1440     allocParams.dwBytes  = mbCodeSize;
1441     allocParams.pBufName = "skipFrameMbCodeSurface";
1442 
1443     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1444         m_osInterface,
1445         &allocParams,
1446         &m_skipFrameInfo.m_resMbCodeSkipFrameSurface);
1447     if (eStatus != MOS_STATUS_SUCCESS)
1448     {
1449         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate PAK object buffer for skip frame");
1450         return eStatus;
1451     }
1452 
1453     if (m_numDelay)
1454     {
1455         allocParamsForBufferLinear.dwBytes  = sizeof(uint32_t);
1456         allocParamsForBufferLinear.pBufName = "DelayMinusMemory";
1457 
1458         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1459                                                       m_osInterface,
1460                                                       &allocParamsForBufferLinear,
1461                                                       &m_resDelayMinus),
1462             "Failed to allocate delay minus memory.");
1463 
1464         uint8_t *       data;
1465         MOS_LOCK_PARAMS lockFlags;
1466         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1467         lockFlags.WriteOnly = 1;
1468         data                = (uint8_t *)m_osInterface->pfnLockResource(
1469             m_osInterface,
1470             &m_resDelayMinus,
1471             &lockFlags);
1472 
1473         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1474 
1475         MOS_ZeroMemory(data, sizeof(uint32_t));
1476 
1477         m_osInterface->pfnUnlockResource(m_osInterface, &m_resDelayMinus);
1478     }
1479 
1480     return eStatus;
1481 }
1482 
FreePakResources()1483 MOS_STATUS CodechalEncHevcStateG12::FreePakResources()
1484 {
1485     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1486 
1487     CODECHAL_ENCODE_FUNCTION_ENTER;
1488 
1489     // Release Frame Statistics Streamout Data Destination Buffer
1490     m_osInterface->pfnFreeResource(
1491         m_osInterface,
1492         &m_resFrameStatStreamOutBuffer);
1493 
1494     // PAK CU Level Stream out buffer
1495     m_osInterface->pfnFreeResource(
1496         m_osInterface,
1497         &m_resPakcuLevelStreamoutData.sResource);
1498 
1499     // Release SSE Source Pixel Row Store Buffer
1500     m_osInterface->pfnFreeResource(
1501         m_osInterface,
1502         &m_resSseSrcPixelRowStoreBuffer);
1503 
1504     // Release Hcp scalability Sync buffer
1505     m_osInterface->pfnFreeResource(
1506         m_osInterface,
1507         &m_resHcpScalabilitySyncBuffer.sResource);
1508 
1509     m_osInterface->pfnFreeResource(
1510         m_osInterface,
1511         &m_resPakcuLevelStreamoutData.sResource);
1512 
1513     m_osInterface->pfnFreeResource(
1514         m_osInterface,
1515         &m_resPakSliceLevelStreamoutData.sResource);
1516 
1517     m_osInterface->pfnFreeResource(
1518         m_osInterface,
1519         &m_SAORowStoreBuffer);
1520 
1521     m_osInterface->pfnFreeResource(
1522         m_osInterface,
1523         &m_skipFrameInfo.m_resMbCodeSkipFrameSurface);
1524 
1525     for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resTileBasedStatisticsBuffer); i++)
1526     {
1527         m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[i].sResource);
1528     }
1529     for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileRecordBuffer); i++)
1530     {
1531         m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[i].sResource);
1532     }
1533     m_osInterface->pfnFreeResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource);
1534 
1535     MOS_FreeMemory(m_tileParams);
1536 
1537     if (m_useVirtualEngine)
1538     {
1539         for (uint32_t i = 0; i < CODECHAL_NUM_UNCOMPRESSED_SURFACE_HEVC; i++)
1540         {
1541             for (uint32_t j = 0; j < CODECHAL_HEVC_MAX_NUM_HCP_PIPE; j++)
1542             {
1543                 for (auto k = 0; k < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; k++)
1544                 {
1545                     PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[i][j][k];
1546                     if (cmdBuffer->pCmdBase)
1547                     {
1548                         m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
1549                     }
1550                     m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
1551                 }
1552             }
1553         }
1554     }
1555 
1556     for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_refSync); i++)
1557     {
1558         auto sync = &m_refSync[i];
1559 
1560         if (!Mos_ResourceIsNull(&sync->resSyncObject))
1561         {
1562             // if this object has been signaled before, we need to wait to ensure singal-wait is in pair.
1563             if (sync->uiSemaphoreObjCount || sync->bInUsed)
1564             {
1565                 MOS_SYNC_PARAMS syncParams  = g_cInitSyncParams;
1566                 syncParams.GpuContext       = m_renderContext;
1567                 syncParams.presSyncResource = &sync->resSyncObject;
1568                 syncParams.uiSemaphoreCount = sync->uiSemaphoreObjCount;
1569                 m_osInterface->pfnEngineWait(m_osInterface, &syncParams);
1570             }
1571         }
1572         m_osInterface->pfnFreeResource(m_osInterface, &sync->resSemaphoreMem.sResource);
1573     }
1574 
1575     for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resBrcSemaphoreMem); i++)
1576     {
1577         m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcSemaphoreMem[i].sResource);
1578     }
1579     m_osInterface->pfnFreeResource(m_osInterface, &m_resPipeStartSemaMem);
1580     m_osInterface->pfnFreeResource(m_osInterface, &m_resPipeCompleteSemaMem);
1581 
1582     if (m_hucPakStitchEnabled)
1583     {
1584         m_osInterface->pfnFreeResource(m_osInterface, &m_resHucStatus2Buffer);
1585         m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcDataBuffer);
1586 
1587         for (int i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
1588         {
1589             for (int j = 0; j < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; j++)
1590             {
1591                 m_osInterface->pfnFreeResource(m_osInterface, &m_resHucPakStitchDmemBuffer[i][j]);
1592                 m_osInterface->pfnFreeResource(m_osInterface, &m_resHucStitchDataBuffer[i][j]);
1593             }
1594         }
1595         Mhw_FreeBb(m_osInterface, &m_HucStitchCmdBatchBuffer, nullptr);
1596     }
1597     return CodechalEncHevcState::FreePakResources();
1598 }
1599 
GetKernelHeaderAndSize(void * binary,EncOperation operation,uint32_t krnStateIdx,void * krnHeader,uint32_t * krnSize)1600 MOS_STATUS CodechalEncHevcStateG12::GetKernelHeaderAndSize(
1601     void *       binary,
1602     EncOperation operation,
1603     uint32_t     krnStateIdx,
1604     void *       krnHeader,
1605     uint32_t *   krnSize)
1606 {
1607     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1608 
1609     CODECHAL_ENCODE_FUNCTION_ENTER;
1610 
1611     CODECHAL_ENCODE_CHK_NULL_RETURN(binary);
1612     CODECHAL_ENCODE_CHK_NULL_RETURN(krnHeader);
1613     CODECHAL_ENCODE_CHK_NULL_RETURN(krnSize);
1614 
1615     PCODECHAL_HEVC_KERNEL_HEADER kernelHeaderTable = (PCODECHAL_HEVC_KERNEL_HEADER)binary;
1616 
1617     PCODECHAL_KERNEL_HEADER currKrnHeader = nullptr;
1618     switch (operation)
1619     {
1620     case ENC_MBENC:
1621     {
1622         switch (krnStateIdx)
1623         {
1624         case MBENC_LCU32_KRNIDX:
1625             currKrnHeader = &kernelHeaderTable->HEVC_Enc_LCU32;
1626             break;
1627 
1628         case MBENC_LCU64_KRNIDX:
1629             currKrnHeader = &kernelHeaderTable->HEVC_Enc_LCU64;
1630             break;
1631 
1632         default:
1633             CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported MBENC mode requested");
1634             return MOS_STATUS_INVALID_PARAMETER;
1635         }
1636     }
1637     break;
1638 
1639     case ENC_BRC:
1640     {
1641         switch (krnStateIdx)
1642         {
1643         case CODECHAL_HEVC_BRC_INIT:
1644             currKrnHeader = &kernelHeaderTable->HEVC_brc_init;
1645             break;
1646 
1647         case CODECHAL_HEVC_BRC_RESET:
1648             currKrnHeader = &kernelHeaderTable->HEVC_brc_reset;
1649             break;
1650 
1651         case CODECHAL_HEVC_BRC_FRAME_UPDATE:
1652             currKrnHeader = &kernelHeaderTable->HEVC_brc_update;
1653             break;
1654 
1655         case CODECHAL_HEVC_BRC_LCU_UPDATE:
1656             currKrnHeader = &kernelHeaderTable->HEVC_brc_lcuqp;
1657             break;
1658 
1659         default:
1660             CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested, krnStateIdx=%d", krnStateIdx);
1661             return MOS_STATUS_INVALID_PARAMETER;
1662         }
1663         break;
1664     }
1665 
1666     default:
1667         CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
1668         return MOS_STATUS_INVALID_PARAMETER;
1669     }
1670 
1671     *((PCODECHAL_KERNEL_HEADER)krnHeader) = *currKrnHeader;
1672 
1673     PCODECHAL_KERNEL_HEADER nextKrnHeader = (currKrnHeader + 1);
1674     PCODECHAL_KERNEL_HEADER invalidEntry  = &(kernelHeaderTable->HEVC_brc_lcuqp) + 1;
1675     uint32_t                nextKrnOffset = *krnSize;
1676     if (nextKrnHeader < invalidEntry)
1677     {
1678         nextKrnOffset = nextKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT;
1679     }
1680     *krnSize = nextKrnOffset - (currKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1681 
1682     return eStatus;
1683 }
1684 
InitKernelStateMbEnc()1685 MOS_STATUS CodechalEncHevcStateG12::InitKernelStateMbEnc()
1686 {
1687     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1688 
1689     CODECHAL_ENCODE_FUNCTION_ENTER;
1690 
1691     PMHW_STATE_HEAP_INTERFACE stateHeapInterface = m_hwInterface->GetRenderInterface()->m_stateHeapInterface;
1692     m_numMbEncEncKrnStates                       = MBENC_NUM_KRN;
1693 
1694     m_mbEncKernelStates =
1695         MOS_NewArray(MHW_KERNEL_STATE, m_numMbEncEncKrnStates);
1696     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
1697 
1698     m_mbEncKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory(
1699         sizeof(GenericBindingTable) * m_numMbEncEncKrnStates);
1700     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelBindingTable);
1701 
1702     PMHW_KERNEL_STATE kernelStatePtr = m_mbEncKernelStates;
1703 
1704     for (uint32_t krnStateIdx = 0; krnStateIdx < m_numMbEncEncKrnStates; krnStateIdx++)
1705     {
1706         auto                   kernelSize = m_combinedKernelSize;
1707         CODECHAL_KERNEL_HEADER currKrnHeader;
1708         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetKernelHeaderAndSize(
1709             m_kernelBinary,
1710             ENC_MBENC,
1711             krnStateIdx,
1712             &currKrnHeader,
1713             &kernelSize));
1714 
1715         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
1716             ENC_MBENC,
1717             &kernelStatePtr->KernelParams,
1718             krnStateIdx));
1719 
1720         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
1721             ENC_MBENC,
1722             &m_mbEncKernelBindingTable[krnStateIdx],
1723             krnStateIdx));
1724 
1725         kernelStatePtr->dwCurbeOffset = stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
1726         kernelStatePtr->KernelParams.pBinary =
1727             m_kernelBinary +
1728             (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1729         kernelStatePtr->KernelParams.iSize   = kernelSize;
1730         kernelStatePtr->dwCurbeOffset        = stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
1731         kernelStatePtr->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1732         kernelStatePtr->KernelParams.iSize   = kernelSize;
1733 
1734         CODECHAL_ENCODE_CHK_STATUS_RETURN(stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
1735             stateHeapInterface,
1736             kernelStatePtr->KernelParams.iBTCount,
1737             &kernelStatePtr->dwSshSize,
1738             &kernelStatePtr->dwBindingTableSize));
1739 
1740         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(stateHeapInterface, kernelStatePtr));
1741 
1742         kernelStatePtr++;
1743     }
1744 
1745     return eStatus;
1746 }
1747 
InitKernelStateBrc()1748 MOS_STATUS CodechalEncHevcStateG12::InitKernelStateBrc()
1749 {
1750     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1751 
1752     CODECHAL_ENCODE_FUNCTION_ENTER;
1753 
1754     PMHW_STATE_HEAP_INTERFACE stateHeapInterface = m_hwInterface->GetRenderInterface()->m_stateHeapInterface;
1755     m_numBrcKrnStates                            = CODECHAL_HEVC_BRC_NUM;
1756 
1757     m_brcKernelStates = MOS_NewArray(MHW_KERNEL_STATE, m_numBrcKrnStates);
1758     CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
1759 
1760     m_brcKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory(
1761         sizeof(GenericBindingTable) * m_numBrcKrnStates);
1762 
1763     PMHW_KERNEL_STATE kernelStatePtr = m_brcKernelStates;
1764 
1765     kernelStatePtr++;  // Skipping BRC_COARSE_INTRA as it not in Gen11
1766 
1767     // KrnStateIdx initialization starts at 1 as Gen11 does not support BRC_COARSE_INTRA kernel in BRC. It is part of the Combined Common Kernel
1768     for (uint32_t krnStateIdx = 1; krnStateIdx < m_numBrcKrnStates; krnStateIdx++)
1769     {
1770         auto                   kernelSize = m_combinedKernelSize;
1771         CODECHAL_KERNEL_HEADER currKrnHeader;
1772 
1773         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetKernelHeaderAndSize(
1774             m_kernelBinary,
1775             ENC_BRC,
1776             krnStateIdx,
1777             &currKrnHeader,
1778             (uint32_t *)&kernelSize));
1779 
1780         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
1781             ENC_BRC,
1782             &kernelStatePtr->KernelParams,
1783             krnStateIdx));
1784 
1785         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
1786             ENC_BRC,
1787             &m_brcKernelBindingTable[krnStateIdx],
1788             krnStateIdx));
1789 
1790         kernelStatePtr->dwCurbeOffset        = stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
1791         kernelStatePtr->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1792         kernelStatePtr->KernelParams.iSize   = kernelSize;
1793 
1794         CODECHAL_ENCODE_CHK_STATUS_RETURN(stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
1795             stateHeapInterface,
1796             kernelStatePtr->KernelParams.iBTCount,
1797             &kernelStatePtr->dwSshSize,
1798             &kernelStatePtr->dwBindingTableSize));
1799 
1800         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(stateHeapInterface, kernelStatePtr));
1801 
1802         kernelStatePtr++;
1803     }
1804 
1805     return eStatus;
1806 }
1807 
GetFrameBrcLevel()1808 MOS_STATUS CodechalEncHevcStateG12::GetFrameBrcLevel()
1809 {
1810     CODECHAL_ENCODE_FUNCTION_ENTER;
1811 
1812     //if L0/L1 both points to previous frame, then its LBD otherwise its is level 1 RA B.
1813     auto                               B_or_LDB_brclevel = m_lowDelay ? HEVC_BRC_FRAME_TYPE_P_OR_LB : HEVC_BRC_FRAME_TYPE_B;
1814     std::map<int, HEVC_BRC_FRAME_TYPE> codingtype_to_brclevel{
1815         {I_TYPE, HEVC_BRC_FRAME_TYPE_I},
1816         {P_TYPE, HEVC_BRC_FRAME_TYPE_P_OR_LB},
1817         {B_TYPE, B_or_LDB_brclevel},
1818         {B1_TYPE, HEVC_BRC_FRAME_TYPE_B1},
1819         {B2_TYPE, HEVC_BRC_FRAME_TYPE_B2}};
1820 
1821     //Both I or P/LDB type at same HierarchLevelPlus1
1822     auto                               intra_LDBFrame_to_Brclevel = (m_pictureCodingType == I_TYPE) ? HEVC_BRC_FRAME_TYPE_I : HEVC_BRC_FRAME_TYPE_P_OR_LB;
1823     std::map<int, HEVC_BRC_FRAME_TYPE> hierchLevelPlus1_to_brclevel{
1824         {1, intra_LDBFrame_to_Brclevel},
1825         {2, HEVC_BRC_FRAME_TYPE_B},
1826         {3, HEVC_BRC_FRAME_TYPE_B1},
1827         {4, HEVC_BRC_FRAME_TYPE_B2}};
1828 
1829     if (m_hevcSeqParams->HierarchicalFlag && m_hevcSeqParams->GopRefDist > 1 && m_hevcSeqParams->GopRefDist <= 8)
1830     {
1831         if (m_hevcPicParams->HierarchLevelPlus1 > 0)  // LDB or RAB
1832         {
1833             m_currFrameBrcLevel = hierchLevelPlus1_to_brclevel.count(m_hevcPicParams->HierarchLevelPlus1) ? hierchLevelPlus1_to_brclevel[m_hevcPicParams->HierarchLevelPlus1] : HEVC_BRC_FRAME_TYPE_INVALID;
1834             //Invalid HierarchLevelPlus1 or LBD frames at level 3 eror check.
1835             if ((m_currFrameBrcLevel == HEVC_BRC_FRAME_TYPE_INVALID) ||
1836                 (m_hevcSeqParams->LowDelayMode && m_currFrameBrcLevel == HEVC_BRC_FRAME_TYPE_B2))
1837             {
1838                 CODECHAL_ENCODE_ASSERTMESSAGE("HEVC_BRC_FRAME_TYPE_INVALID or LBD picture doesn't support Level 4\n");
1839                 return MOS_STATUS_INVALID_PARAMETER;
1840             }
1841         }
1842         else
1843         {
1844             if (!m_hevcSeqParams->LowDelayMode)  // RA B
1845             {
1846                 m_currFrameBrcLevel = codingtype_to_brclevel.count(m_pictureCodingType) ? codingtype_to_brclevel[m_pictureCodingType] : HEVC_BRC_FRAME_TYPE_INVALID;
1847                 //Invalid CodingType.
1848                 if (m_currFrameBrcLevel == HEVC_BRC_FRAME_TYPE_INVALID)
1849                 {
1850                     CODECHAL_ENCODE_ASSERTMESSAGE("Invalid CodingType\n");
1851                     return MOS_STATUS_INVALID_PARAMETER;
1852                 }
1853             }
1854             else  // Low Delay mode: Flat case
1855             {
1856                 m_currFrameBrcLevel = (m_pictureCodingType == I_TYPE) ? HEVC_BRC_FRAME_TYPE_I : HEVC_BRC_FRAME_TYPE_P_OR_LB;
1857             }
1858         }
1859     }
1860     else  // Flat B
1861     {
1862         m_currFrameBrcLevel = (m_pictureCodingType == I_TYPE) ? HEVC_BRC_FRAME_TYPE_I : B_or_LDB_brclevel;
1863     }
1864 
1865     return MOS_STATUS_SUCCESS;
1866 }
1867 
GetMaxBtCount()1868 uint32_t CodechalEncHevcStateG12::GetMaxBtCount()
1869 {
1870     uint16_t btIdxAlignment = m_hwInterface->GetRenderInterface()->m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
1871 
1872     // BRC Init kernel
1873     uint32_t btCountPhase1 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_INIT].KernelParams.iBTCount, btIdxAlignment);
1874 
1875     // SwScoreboard kernel
1876     uint32_t btCountPhase2 = MOS_ALIGN_CEIL(m_swScoreboardState->GetBTCount(), btIdxAlignment);
1877 
1878     // Csc+Ds+Conversion kernel
1879     btCountPhase2 += MOS_ALIGN_CEIL(m_cscDsState->GetBTCount(), btIdxAlignment);
1880 
1881     // Intra Distortion kernel
1882     if (m_intraDistKernel)
1883     {
1884         btCountPhase2 += MOS_ALIGN_CEIL(m_intraDistKernel->GetBTCount(), btIdxAlignment);
1885     }
1886 
1887     // HME 4x, 16x, 32x kernel
1888     if (m_hmeKernel)
1889     {
1890         btCountPhase2 += (MOS_ALIGN_CEIL(m_hmeKernel->GetBTCount(), btIdxAlignment) * 3);
1891     }
1892 
1893     // Weighted prediction kernel
1894     btCountPhase2 += MOS_ALIGN_CEIL(m_wpState->GetBTCount(), btIdxAlignment);
1895     uint32_t btCountPhase3 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
1896                              MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
1897                              MOS_ALIGN_CEIL(m_mbEncKernelStates[MBENC_LCU32_KRNIDX].KernelParams.iBTCount, btIdxAlignment);
1898 
1899     uint32_t btCountPhase4 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
1900                              MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
1901                              MOS_ALIGN_CEIL(m_mbEncKernelStates[MBENC_LCU64_KRNIDX].KernelParams.iBTCount, btIdxAlignment);
1902 
1903     uint32_t maxBtCount = MOS_MAX(btCountPhase1, btCountPhase2);
1904     maxBtCount          = MOS_MAX(maxBtCount, btCountPhase3);
1905     maxBtCount          = MOS_MAX(maxBtCount, btCountPhase4);
1906 
1907     return maxBtCount;
1908 }
1909 
CalcScaledDimensions()1910 MOS_STATUS CodechalEncHevcStateG12::CalcScaledDimensions()
1911 {
1912     return MOS_STATUS_SUCCESS;
1913 }
1914 
GetMaxRefFrames(uint8_t & maxNumRef0,uint8_t & maxNumRef1)1915 void CodechalEncHevcStateG12::GetMaxRefFrames(uint8_t &maxNumRef0, uint8_t &maxNumRef1)
1916 {
1917     maxNumRef0 = m_maxNumVmeL0Ref;
1918     maxNumRef1 = m_maxNumVmeL1Ref;
1919 
1920     return;
1921 }
1922 
GetStatusReport(EncodeStatus * encodeStatus,EncodeStatusReport * encodeStatusReport)1923 MOS_STATUS CodechalEncHevcStateG12::GetStatusReport(
1924     EncodeStatus *      encodeStatus,
1925     EncodeStatusReport *encodeStatusReport)
1926 {
1927     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1928 
1929     CODECHAL_ENCODE_FUNCTION_ENTER;
1930 
1931     CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus);
1932     CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport);
1933 
1934     if (encodeStatusReport->UsedVdBoxNumber <= 1)
1935     {
1936         return CodechalEncodeHevcBase::GetStatusReport(encodeStatus, encodeStatusReport);
1937     }
1938 
1939     PCODECHAL_ENCODE_BUFFER tileSizeStatusReport = &m_tileRecordBuffer[encodeStatusReport->CurrOriginalPic.FrameIdx];
1940 
1941     MOS_LOCK_PARAMS lockFlags;
1942     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1943     CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface);
1944     HCPPakHWTileSizeRecord_G12 *tileStatusReport = (HCPPakHWTileSizeRecord_G12 *)m_osInterface->pfnLockResource(
1945         m_osInterface,
1946         &tileSizeStatusReport->sResource,
1947         &lockFlags);
1948     CODECHAL_ENCODE_CHK_NULL_RETURN(tileStatusReport);
1949 
1950     encodeStatusReport->CodecStatus                                      = CODECHAL_STATUS_SUCCESSFUL;
1951     encodeStatusReport->PanicMode                                        = false;
1952     encodeStatusReport->AverageQp                                        = 0;
1953     encodeStatusReport->QpY                                              = 0;
1954     encodeStatusReport->SuggestedQpYDelta                                = 0;
1955     encodeStatusReport->NumberPasses                                     = 1;
1956     encodeStatusReport->bitstreamSize                                    = 0;
1957     encodeStatus->ImageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQp = 0;
1958 
1959     uint32_t totalCU = 0;
1960     double   sumQp   = 0.0;
1961     for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
1962     {
1963         if (tileStatusReport[i].Length == 0)
1964         {
1965             encodeStatusReport->CodecStatus = CODECHAL_STATUS_INCOMPLETE;
1966             return eStatus;
1967         }
1968 
1969         encodeStatusReport->bitstreamSize += tileStatusReport[i].Length;
1970         totalCU += (m_tileParams[i].TileHeightInMinCbMinus1 + 1) * (m_tileParams[i].TileWidthInMinCbMinus1 + 1);
1971         sumQp += tileStatusReport[i].Hcp_Qp_Status_Count;
1972     }
1973 
1974     encodeStatusReport->NumberPasses = (uint8_t)encodeStatus->dwNumberPasses + 1;
1975     CODECHAL_ENCODE_VERBOSEMESSAGE("BRC Scalability Mode Exectued PAK Pass number: %d.\n", encodeStatusReport->NumberPasses);
1976 
1977     if (encodeStatusReport->bitstreamSize == 0 ||
1978         encodeStatusReport->bitstreamSize > m_bitstreamUpperBound)
1979     {
1980         encodeStatusReport->CodecStatus   = CODECHAL_STATUS_ERROR;
1981         encodeStatusReport->bitstreamSize = 0;
1982         CODECHAL_ENCODE_ASSERTMESSAGE("Bit-stream size exceeds upper bound!");
1983         return MOS_STATUS_INVALID_FILE_SIZE;
1984     }
1985 
1986     if (m_sseEnabled)
1987     {
1988         CODECHAL_ENCODE_CHK_STATUS_RETURN(CalculatePSNR(encodeStatus, encodeStatusReport));
1989     }
1990 
1991     CODECHAL_ENCODE_CHK_COND_RETURN(totalCU == 0, "Invalid totalCU count");
1992     encodeStatusReport->QpY = encodeStatusReport->AverageQp =
1993         (uint8_t)((sumQp / (double)totalCU) / 4.0);  // due to TU is 4x4 and there are 4 TUs in one CU
1994 
1995     if (m_enableTileStitchByHW)
1996     {
1997         return eStatus;
1998     }
1999 
2000     uint8_t *tempBsBuffer = nullptr, *bufPtr = nullptr;
2001     tempBsBuffer = bufPtr = (uint8_t *)MOS_AllocAndZeroMemory(encodeStatusReport->bitstreamSize);
2002     CODECHAL_ENCODE_CHK_NULL_RETURN(tempBsBuffer);
2003 
2004     CODEC_REF_LIST currRefList = *(encodeStatus->encodeStatusReport.pCurrRefList);
2005     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2006     lockFlags.ReadOnly = 1;
2007     uint8_t *bitstream = (uint8_t *)m_osInterface->pfnLockResource(
2008         m_osInterface,
2009         &currRefList.resBitstreamBuffer,
2010         &lockFlags);
2011     if (bitstream == nullptr)
2012     {
2013         MOS_SafeFreeMemory(tempBsBuffer);
2014         CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr);
2015     }
2016 
2017     for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
2018     {
2019         uint32_t offset = m_tileParams[i].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
2020         uint32_t len    = tileStatusReport[i].Length;
2021 
2022         MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len);
2023         bufPtr += len;
2024     }
2025 
2026     MOS_SecureMemcpy(bitstream, encodeStatusReport->bitstreamSize, tempBsBuffer, encodeStatusReport->bitstreamSize);
2027     MOS_ZeroMemory(&bitstream[encodeStatusReport->bitstreamSize],
2028         m_bitstreamUpperBound - encodeStatusReport->bitstreamSize);
2029 
2030     if (tempBsBuffer)
2031     {
2032         MOS_FreeMemory(tempBsBuffer);
2033     }
2034 
2035     if (m_osInterface && bitstream)
2036     {
2037         m_osInterface->pfnUnlockResource(m_osInterface, &currRefList.resBitstreamBuffer);
2038     }
2039 
2040     if (m_osInterface && tileStatusReport)
2041     {
2042         // clean-up the tile status report buffer
2043         MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * encodeStatusReport->NumberTilesInFrame);
2044 
2045         m_osInterface->pfnUnlockResource(m_osInterface, &tileSizeStatusReport->sResource);
2046     }
2047 
2048     return eStatus;
2049 }
2050 
AllocateResourcesVariableSize()2051 MOS_STATUS CodechalEncHevcStateG12::AllocateResourcesVariableSize()
2052 {
2053     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2054 
2055     CODECHAL_ENCODE_FUNCTION_ENTER;
2056 
2057     if (!m_hevcPicParams->tiles_enabled_flag)
2058     {
2059         return eStatus;
2060     }
2061 
2062     uint32_t bufSize = 0;
2063     if (m_pakPiplStrmOutEnable)
2064     {
2065         // PAK CU Level Streamout Data:   DW57-59 in HCP pipe buffer address command
2066         // One CU has 16-byte. But, each tile needs to be aliged to the cache line
2067         uint32_t tileWidthInCus  = 0;
2068         uint32_t tileHeightInCus = 0;
2069         uint32_t numTileColumns  = m_hevcPicParams->num_tile_columns_minus1 + 1;
2070         uint32_t numTileRows     = m_hevcPicParams->num_tile_rows_minus1 + 1;
2071         for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
2072         {
2073             for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
2074             {
2075                 uint32_t idx = tileRow * numTileColumns + tileCol;
2076 
2077                 tileHeightInCus = m_tileParams[idx].TileHeightInMinCbMinus1 + 1;
2078                 tileWidthInCus  = m_tileParams[idx].TileWidthInMinCbMinus1 + 1;
2079                 bufSize += (tileWidthInCus * tileHeightInCus * 16);
2080                 bufSize = MOS_ALIGN_CEIL(bufSize, CODECHAL_CACHELINE_SIZE);
2081             }
2082         }
2083         if (Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource) ||
2084             (bufSize > m_resPakcuLevelStreamoutData.dwSize))
2085         {
2086             if (!Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource))
2087             {
2088                 m_osInterface->pfnFreeResource(m_osInterface, &m_resPakcuLevelStreamoutData.sResource);
2089             }
2090 
2091             MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
2092             MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
2093             allocParamsForBufferLinear.Type     = MOS_GFXRES_BUFFER;
2094             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
2095             allocParamsForBufferLinear.Format   = Format_Buffer;
2096             allocParamsForBufferLinear.dwBytes  = bufSize;
2097             allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
2098 
2099             CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
2100                 m_osInterface,
2101                 &allocParamsForBufferLinear,
2102                 &m_resPakcuLevelStreamoutData.sResource));
2103             m_resPakcuLevelStreamoutData.dwSize = bufSize;
2104             CODECHAL_ENCODE_VERBOSEMESSAGE("reallocate cu steam out buffer, size=0x%x.\n", bufSize);
2105         }
2106     }
2107 
2108     return eStatus;
2109 }
2110 
ExecutePictureLevel()2111 MOS_STATUS CodechalEncHevcStateG12::ExecutePictureLevel()
2112 {
2113     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2114 
2115     m_firstTaskInPhase = m_singleTaskPhaseSupported ? IsFirstPass() : true;
2116     m_lastTaskInPhase  = m_singleTaskPhaseSupported ? IsLastPass() : true;
2117 
2118     PerfTagSetting perfTag;
2119     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE);
2120 
2121     CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifyCommandBufferSize());
2122 
2123     if (!m_singleTaskPhaseSupportedInPak)
2124     {
2125         // Command buffer or patch list size are too small and so we cannot submit multiple pass of PAKs together
2126         m_firstTaskInPhase = true;
2127         m_lastTaskInPhase  = true;
2128     }
2129 
2130     if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex())
2131     {
2132         CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum");
2133         eStatus = MOS_STATUS_INVALID_PARAMETER;
2134         return eStatus;
2135     }
2136 
2137     MOS_COMMAND_BUFFER cmdBuffer;
2138     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2139 
2140     if ((!m_singleTaskPhaseSupported) || m_firstTaskInPhase)
2141     {
2142         // Send command buffer header at the beginning (OS dependent)
2143         // frame tracking tag is only added in the last command buffer header
2144         bool bRequestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
2145 
2146         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, bRequestFrameTracking));
2147     }
2148 
2149     // clean-up per VDBOX semaphore memory
2150     int32_t currentPipe = GetCurrentPipe();
2151     if (currentPipe < 0)
2152     {
2153         eStatus = MOS_STATUS_INVALID_PARAMETER;
2154         return eStatus;
2155     }
2156 
2157     if (m_numPipe >= 2 &&
2158         ((m_singleTaskPhaseSupported && IsFirstPass()) ||
2159             !m_singleTaskPhaseSupported))
2160     {
2161         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStopCmd(&cmdBuffer));
2162         //HW Semaphore cmd to make sure all pipes start encode at the same time
2163         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeStartSemaMem, 1, MHW_MI_ATOMIC_INC, &cmdBuffer));
2164         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
2165             &m_resPipeStartSemaMem,
2166             &cmdBuffer,
2167             m_numPipe));
2168 
2169         // Program some placeholder cmds to resolve the hazard between BEs sync
2170         MHW_MI_STORE_DATA_PARAMS dataParams;
2171         dataParams.pOsResource      = &m_resDelayMinus;
2172         dataParams.dwResourceOffset = 0;
2173         dataParams.dwValue          = 0xDE1A;
2174         for (uint32_t i = 0; i < m_numDelay; i++)
2175         {
2176             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2177                 &cmdBuffer,
2178                 &dataParams));
2179         }
2180 
2181         //clean HW semaphore memory
2182         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeStartSemaMem, 1, MHW_MI_ATOMIC_DEC, &cmdBuffer));
2183 
2184         //Start Watchdog Timer
2185         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStartCmd(&cmdBuffer));
2186 
2187         //To help test media reset, this hw semaphore wait will never be reached.
2188         if (m_enableTestMediaReset)
2189         {
2190             CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
2191                 &m_resPipeStartSemaMem,
2192                 &cmdBuffer,
2193                 m_numPipe + 2));
2194         }
2195     }
2196 
2197     if (m_brcEnabled && !IsFirstPass())  // Only the regular BRC passes have the conditional batch buffer end
2198     {
2199         // Ensure the previous PAK BRC pass is done, mainly for pipes other than pipe0.
2200         if (m_singleTaskPhaseSupported && m_numPipe >= 2 &&
2201             !Mos_ResourceIsNull(&m_resBrcSemaphoreMem[currentPipe].sResource))
2202         {
2203             CODECHAL_ENCODE_CHK_STATUS_RETURN(
2204                 SendHWWaitCommand(
2205                     &m_resBrcSemaphoreMem[currentPipe].sResource,
2206                     &cmdBuffer,
2207                     1));
2208         }
2209 
2210         // Insert conditional batch buffer end
2211         MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
2212         MOS_ZeroMemory(
2213             &miConditionalBatchBufferEndParams,
2214             sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
2215         uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
2216                               sizeof(uint32_t) * 2;  // pEncodeStatus is offset by 2 DWs in the resource       ;
2217 
2218         if (m_hucPakStitchEnabled && m_numPipe >= 2)  //BRC scalability
2219         {
2220             CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwHuCStatusMaskOffset & 7) == 0);  // Make sure uint64_t aligned
2221             CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwHuCStatusMaskOffset + sizeof(uint32_t)) == m_encodeStatusBuf.dwHuCStatusRegOffset);
2222 
2223             miConditionalBatchBufferEndParams.presSemaphoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2224             miConditionalBatchBufferEndParams.dwOffset            = baseOffset + m_encodeStatusBuf.dwHuCStatusMaskOffset;
2225         }
2226         else
2227         {
2228             CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwImageStatusMaskOffset & 7) == 0);  // Make sure uint64_t aligned
2229             CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwImageStatusMaskOffset + sizeof(uint32_t)) == m_encodeStatusBuf.dwImageStatusCtrlOffset);
2230 
2231             miConditionalBatchBufferEndParams.presSemaphoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2232             miConditionalBatchBufferEndParams.dwOffset            = baseOffset + m_encodeStatusBuf.dwImageStatusMaskOffset;
2233         }
2234 
2235         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
2236             &cmdBuffer,
2237             &miConditionalBatchBufferEndParams));
2238 
2239         auto                             mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
2240         MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
2241         MHW_MI_COPY_MEM_MEM_PARAMS       miCpyMemMemParams;
2242         if (m_hucPakStitchEnabled && m_numPipe >= 2)
2243         {
2244             // Write back the HCP image control register with HUC PAK Int Kernel output
2245             MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
2246             MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
2247             miLoadRegMemParams.presStoreBuffer = &m_resBrcDataBuffer;
2248             miLoadRegMemParams.dwOffset        = CODECHAL_OFFSETOF(PakIntegrationBrcData, HCP_ImageStatusControl);
2249             miLoadRegMemParams.dwRegister      = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2250             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(&cmdBuffer, &miLoadRegMemParams));
2251 
2252             if (IsFirstPipe())
2253             {
2254                 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
2255                 miCpyMemMemParams.presSrc     = &m_resBrcDataBuffer;
2256                 miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, HCP_ImageStatusControl);
2257                 miCpyMemMemParams.presDst     = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
2258                 miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
2259                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
2260 
2261                 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2262                 miStoreRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2263                 miStoreRegMemParams.dwOffset        = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset;
2264                 miStoreRegMemParams.dwRegister      = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2265                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2266             }
2267         }
2268         else
2269         {
2270             // Write back the HCP image control register for RC6 may clean it out
2271             MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
2272             MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
2273             miLoadRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2274             miLoadRegMemParams.dwOffset        = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOffset;
2275             miLoadRegMemParams.dwRegister      = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2276             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(&cmdBuffer, &miLoadRegMemParams));
2277 
2278             MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2279             miStoreRegMemParams.presStoreBuffer = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
2280             miStoreRegMemParams.dwOffset        = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
2281             miStoreRegMemParams.dwRegister      = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2282             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2283 
2284             MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2285             miStoreRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2286             miStoreRegMemParams.dwOffset        = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset;
2287             miStoreRegMemParams.dwRegister      = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2288             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2289         }
2290     }
2291 
2292     if (IsFirstPipe() && IsFirstPass() && m_osInterface->bTagResourceSync)
2293     {
2294         // This is a short term solution to solve the sync tag issue: the sync tag write for PAK is inserted at the end of 2nd pass PAK BB
2295         // which may be skipped in multi-pass PAK enabled case. The idea here is to insert the previous frame's tag at the beginning
2296         // of the BB and keep the current frame's tag at the end of the BB. There will be a delay for tag update but it should be fine
2297         // as long as Dec/VP/Enc won't depend on this PAK so soon.
2298 
2299         PMOS_RESOURCE globalGpuContextSyncTagBuffer = nullptr;
2300         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource(
2301             m_osInterface,
2302             globalGpuContextSyncTagBuffer));
2303         CODECHAL_ENCODE_CHK_NULL_RETURN(globalGpuContextSyncTagBuffer);
2304 
2305         MHW_MI_STORE_DATA_PARAMS params;
2306         params.pOsResource      = globalGpuContextSyncTagBuffer;
2307         params.dwResourceOffset = m_osInterface->pfnGetGpuStatusTagOffset(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
2308         uint32_t value          = m_osInterface->pfnGetGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
2309         params.dwValue          = (value > 0) ? (value - 1) : 0;
2310         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &params));
2311     }
2312 
2313     if (IsFirstPipe())
2314     {
2315         CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2316     }
2317 
2318     if (m_numPipe >= 2)
2319     {
2320         // clean up hw semaphore for BRC PAK pass sync, used only in single task phase.
2321         if (m_singleTaskPhaseSupported &&
2322             m_brcEnabled &&
2323             !Mos_ResourceIsNull(&m_resBrcSemaphoreMem[currentPipe].sResource))
2324         {
2325             MHW_MI_STORE_DATA_PARAMS storeDataParams;
2326             MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2327             storeDataParams.pOsResource      = &m_resBrcSemaphoreMem[currentPipe].sResource;
2328             storeDataParams.dwResourceOffset = 0;
2329             storeDataParams.dwValue          = 0;
2330 
2331             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2332                 &cmdBuffer,
2333                 &storeDataParams));
2334         }
2335     }
2336 
2337     CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPipeModeSelectCmd(&cmdBuffer));
2338 
2339     CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpSurfaceStateCmds(&cmdBuffer));
2340 
2341     CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPipeBufAddrCmd(&cmdBuffer));
2342 
2343     MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
2344     SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
2345     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams));
2346 
2347     MHW_VDBOX_QM_PARAMS fqmParams, qmParams;
2348     SetHcpQmStateParams(fqmParams, qmParams);
2349     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpFqmStateCmd(&cmdBuffer, &fqmParams));
2350     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpQmStateCmd(&cmdBuffer, &qmParams));
2351 
2352     if (m_brcEnabled)
2353     {
2354         uint32_t picStateCmdOffset;
2355         if (m_hucPakStitchEnabled && m_numPipe >= 2)
2356         {
2357             //for non fist PAK pass, always use the 2nd HCP PIC STATE cmd buffer
2358             picStateCmdOffset = IsFirstPass() ? 0 : 1;
2359         }
2360         else
2361         {
2362             picStateCmdOffset = GetCurrentPass();
2363         }
2364 
2365         MOS_RESOURCE &brcHcpStateWriteBuffer = m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx];
2366         if (IsPanicModePass())
2367         {
2368             // BRC kernel supports only 4 BrcImageStates read/write buffers.
2369             // So for panic PAK pass use HCP_PIC_STATE command from previous PAK pass.
2370             picStateCmdOffset -= 1;
2371         }
2372 
2373         MHW_BATCH_BUFFER batchBuffer;
2374         MOS_ZeroMemory(&batchBuffer, sizeof(batchBuffer));
2375         batchBuffer.OsResource   = brcHcpStateWriteBuffer;
2376         batchBuffer.dwOffset     = picStateCmdOffset * BRC_IMG_STATE_SIZE_PER_PASS_G12;
2377         batchBuffer.bSecondLevel = true;
2378 
2379         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
2380             &cmdBuffer,
2381             &batchBuffer));
2382     }
2383     else
2384     {
2385         CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPictureStateCmd(&cmdBuffer));
2386     }
2387 
2388     // Send HEVC_VP9_RDOQ_STATE command
2389     if (m_hevcRdoqEnabled)
2390     {
2391         MHW_VDBOX_HEVC_PIC_STATE picStateParams;
2392         SetHcpPicStateParams(picStateParams);
2393 
2394         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&cmdBuffer, &picStateParams));
2395     }
2396 
2397     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2398     return eStatus;
2399 }
2400 
SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceState)2401 void CodechalEncHevcStateG12::SetHcpSliceStateCommonParams(
2402     MHW_VDBOX_HEVC_SLICE_STATE &sliceState)
2403 {
2404     CodechalEncHevcState::SetHcpSliceStateCommonParams(sliceState);
2405 
2406     sliceState.RoundingIntra = m_roundingIntraInUse;
2407     sliceState.RoundingInter = m_roundingInterInUse;
2408 
2409     if ((m_hevcSliceParams->slice_type == CODECHAL_HEVC_P_SLICE && m_hevcPicParams->weighted_pred_flag) ||
2410         (m_hevcSliceParams->slice_type == CODECHAL_HEVC_B_SLICE && m_hevcPicParams->weighted_bipred_flag))
2411     {
2412         sliceState.bWeightedPredInUse = true;
2413     }
2414     else
2415     {
2416         sliceState.bWeightedPredInUse = false;
2417     }
2418 
2419     static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12 &>(sliceState).dwNumPipe = m_numPipe;
2420 
2421     sliceState.presDataBuffer = IsPanicModePass() ? &m_skipFrameInfo.m_resMbCodeSkipFrameSurface : &m_resMbCodeSurface;
2422 }
2423 
SetHcpSliceStateParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceState,PCODEC_ENCODER_SLCDATA slcData,uint16_t slcCount,PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileCodingParams,bool lastSliceInTile,uint32_t idx)2424 void CodechalEncHevcStateG12::SetHcpSliceStateParams(
2425     MHW_VDBOX_HEVC_SLICE_STATE &          sliceState,
2426     PCODEC_ENCODER_SLCDATA                slcData,
2427     uint16_t                              slcCount,
2428     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileCodingParams,
2429     bool                                  lastSliceInTile,
2430     uint32_t                              idx)
2431 {
2432     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2433 
2434     sliceState.pEncodeHevcSliceParams                                           = &m_hevcSliceParams[slcCount];
2435     sliceState.dwDataBufferOffset                                               = slcData[slcCount].CmdOffset;
2436     sliceState.dwOffset                                                         = slcData[slcCount].SliceOffset;
2437     sliceState.dwLength                                                         = slcData[slcCount].BitSize;
2438     sliceState.uiSkipEmulationCheckCount                                        = slcData[slcCount].SkipEmulationByteCount;
2439     sliceState.dwSliceIndex                                                     = (uint32_t)slcCount;
2440     sliceState.bLastSlice                                                       = (slcCount == m_numSlices - 1);
2441     sliceState.bLastSliceInTile                                                 = lastSliceInTile;
2442     sliceState.bLastSliceInTileColumn                                           = (bool)lastSliceInTile & tileCodingParams[idx].IsLastTileofColumn;
2443     sliceState.bFirstPass                                                       = IsFirstPass();
2444     sliceState.bLastPass                                                        = IsLastPass();
2445     sliceState.bInsertBeforeSliceHeaders                                        = (slcCount == 0);
2446     sliceState.bSaoLumaFlag                                                     = (m_hevcSeqParams->SAO_enabled_flag) ? m_hevcSliceParams[slcCount].slice_sao_luma_flag : 0;
2447     sliceState.bSaoChromaFlag                                                   = (m_hevcSeqParams->SAO_enabled_flag) ? m_hevcSliceParams[slcCount].slice_sao_chroma_flag : 0;
2448     static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12 &>(sliceState).pTileCodingParams = tileCodingParams + idx;
2449     static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12 &>(sliceState).dwTileID          = idx;
2450 
2451     sliceState.DeblockingFilterDisable = m_hevcSliceParams[slcCount].slice_deblocking_filter_disable_flag;
2452     sliceState.TcOffsetDiv2            = m_hevcSliceParams[slcCount].tc_offset_div2;
2453     sliceState.BetaOffsetDiv2          = m_hevcSliceParams[slcCount].beta_offset_div2;
2454 
2455     CalcTransformSkipParameters(sliceState.EncodeHevcTransformSkipParams);
2456 }
2457 
SetMfxVideoCopyCmdParams(PMOS_COMMAND_BUFFER cmdBuffer)2458 MOS_STATUS CodechalEncHevcStateG12::SetMfxVideoCopyCmdParams(
2459     PMOS_COMMAND_BUFFER cmdBuffer)
2460 {
2461     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2462 
2463     CODECHAL_ENCODE_CHK_NULL_RETURN(m_hwInterface);
2464     CODECHAL_ENCODE_CHK_NULL_RETURN(m_hwInterface->GetCpInterface());
2465     MhwCpInterface *cpInterface = m_hwInterface->GetCpInterface();
2466 
2467     uint32_t index = m_virtualEngineBbIndex;
2468 
2469     MHW_CP_COPY_PARAMS cpCopyParams;
2470     MOS_ZeroMemory(&cpCopyParams, sizeof(cpCopyParams));
2471 
2472     cpCopyParams.size          = m_hwInterface->m_tileRecordSize;
2473     cpCopyParams.presSrc       = &m_tileRecordBuffer[index].sResource;
2474     cpCopyParams.presDst       = &m_resBitstreamBuffer;
2475     cpCopyParams.lengthOfTable = (uint8_t)(m_numTiles);
2476     cpCopyParams.isEncodeInUse = true;
2477     CODECHAL_ENCODE_CHK_STATUS_RETURN(cpInterface->SetCpCopy(m_osInterface, cmdBuffer, &cpCopyParams));
2478 
2479     return eStatus;
2480 }
2481 
ExecuteSliceLevel()2482 MOS_STATUS CodechalEncHevcStateG12::ExecuteSliceLevel()
2483 {
2484     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2485 
2486     CODECHAL_ENCODE_FUNCTION_ENTER;
2487 
2488     CODECHAL_ENCODE_CHK_NULL_RETURN(m_slcData);
2489 
2490     if (m_pakOnlyTest)
2491     {
2492         CODECHAL_ENCODE_CHK_STATUS_RETURN(LoadPakCommandAndCuRecordFromFile());
2493     }
2494 
2495     if (!m_hevcPicParams->tiles_enabled_flag)
2496     {
2497         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::ExecuteSliceLevel());
2498     }
2499     else
2500     {
2501         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncTileLevel());
2502     }
2503 
2504     return eStatus;
2505 }
2506 
EncTileLevel()2507 MOS_STATUS CodechalEncHevcStateG12::EncTileLevel()
2508 {
2509     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2510 
2511     CODECHAL_ENCODE_FUNCTION_ENTER;
2512 
2513     int32_t currentPipe = GetCurrentPipe();
2514     int32_t currentPass = GetCurrentPass();
2515 
2516     if (currentPipe < 0 || currentPass < 0)
2517     {
2518         CODECHAL_ENCODE_ASSERTMESSAGE("Invalid pipe number or pass number");
2519         return MOS_STATUS_INVALID_PARAMETER;
2520     }
2521 
2522     MHW_VDBOX_HEVC_SLICE_STATE_G12 sliceState;
2523     SetHcpSliceStateCommonParams(sliceState);
2524 
2525     MOS_COMMAND_BUFFER cmdBuffer;
2526     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2527 
2528     uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
2529     uint32_t numTileRows    = m_hevcPicParams->num_tile_rows_minus1 + 1;
2530 
2531     for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
2532     {
2533         for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
2534         {
2535             PCODEC_ENCODER_SLCDATA slcData = m_slcData;
2536             uint32_t               slcCount, idx, sliceNumInTile = 0;
2537 
2538             idx = tileRow * numTileColumns + tileCol;
2539 
2540             if ((m_numPipe > 1) && (tileCol != currentPipe))
2541             {
2542                 continue;
2543             }
2544 
2545             // HCP_TILE_CODING commmand
2546             CODECHAL_ENCODE_CHK_STATUS_RETURN(
2547                 static_cast<MhwVdboxHcpInterfaceG12 *>(m_hcpInterface)->AddHcpTileCodingCmd(&cmdBuffer, &m_tileParams[idx]));
2548 
2549             for (slcCount = 0; slcCount < m_numSlices; slcCount++)
2550             {
2551                 bool lastSliceInTile = false, sliceInTile = false;
2552 
2553                 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
2554                     &m_tileParams[idx],
2555                     &sliceInTile,
2556                     &lastSliceInTile));
2557 
2558                 if (!sliceInTile)
2559                 {
2560                     continue;
2561                 }
2562 
2563                 if (IsFirstPass())
2564                 {
2565                     uint32_t startLcu = 0;
2566                     for (uint32_t ii = 0; ii < slcCount; ii++)
2567                     {
2568                         startLcu += m_hevcSliceParams[ii].NumLCUsInSlice;
2569                     }
2570                     slcData[slcCount].CmdOffset = startLcu * (m_hwInterface->GetHcpInterface()->GetHcpPakObjSize()) * sizeof(uint32_t);
2571                 }
2572 
2573                 SetHcpSliceStateParams(sliceState, slcData, (uint16_t)slcCount, m_tileParams, lastSliceInTile, idx);
2574 
2575                 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHwSliceEncodeCommand(&cmdBuffer, &sliceState));
2576 
2577                 sliceNumInTile++;
2578             }  // end of slice
2579 
2580             if (0 == sliceNumInTile)
2581             {
2582                 // One tile must have at least one slice
2583                 CODECHAL_ENCODE_ASSERT(false);
2584                 eStatus = MOS_STATUS_INVALID_PARAMETER;
2585                 return eStatus;
2586             }
2587         }  // end of row tile
2588     }      // end of column tile
2589 
2590     // Insert end of sequence/stream if set
2591     if ((m_lastPicInStream || m_lastPicInSeq) && IsLastPipe())
2592     {
2593         MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
2594         MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
2595         pakInsertObjectParams.bLastPicInSeq    = m_lastPicInSeq;
2596         pakInsertObjectParams.bLastPicInStream = m_lastPicInStream;
2597         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&cmdBuffer, &pakInsertObjectParams));
2598     }
2599 
2600     // Send VD_PIPELINE_FLUSH command
2601     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2602     MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2603     vdPipelineFlushParams.Flags.bWaitDoneHEVC           = 1;
2604     vdPipelineFlushParams.Flags.bFlushHEVC              = 1;
2605     vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2606     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
2607 
2608     // Send MI_FLUSH command
2609     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2610     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2611     flushDwParams.bVideoPipelineCacheInvalidate = true;
2612     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2613 
2614     //HW Semaphore cmd to make sure all pipes completion encode
2615     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeCompleteSemaMem, 1, MHW_MI_ATOMIC_INC, &cmdBuffer));
2616 
2617     if (IsFirstPipe())
2618     {
2619         // first pipe needs to ensure all other pipes are ready
2620         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
2621             &m_resPipeCompleteSemaMem,
2622             &cmdBuffer,
2623             m_numPipe));
2624 
2625         //clean HW semaphore memory
2626         MHW_MI_STORE_DATA_PARAMS storeDataParams;
2627         MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2628         storeDataParams.pOsResource = &m_resPipeCompleteSemaMem;
2629         storeDataParams.dwValue     = 0;
2630         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2631             &cmdBuffer,
2632             &storeDataParams));
2633 
2634         // Use HW stitch commands only in the scalable mode
2635         if (m_numPipe > 1 && m_enableTileStitchByHW)
2636         {
2637             //call PAK Int Kernel in scalability case
2638             if (m_hucPakStitchEnabled)
2639             {
2640                 CODECHAL_ENCODE_CHK_STATUS_RETURN(HucPakIntegrate(&cmdBuffer));
2641 #if 0  // Need to enable this code once Gen12 becomes open source \
2642        // 2nd level BB buffer for stitching cmd                   \
2643        // current location to add cmds in 2nd level batch buffer
2644                 m_HucStitchCmdBatchBuffer.iCurrent = 0;
2645                 // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
2646                 m_HucStitchCmdBatchBuffer.dwOffset = 0;
2647                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_HucStitchCmdBatchBuffer));
2648                 // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases
2649                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, m_osInterface->osCpInterface->IsCpEnabled() ? true : false));
2650 #endif
2651             }
2652             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMfxVideoCopyCmdParams(&cmdBuffer));
2653         }
2654 
2655         CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(&cmdBuffer));
2656 
2657         CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2658 
2659         if (m_numPipe <= 1)  // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer
2660         {
2661             CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
2662 
2663             // BRC PAK statistics different for each pass
2664             if (m_brcEnabled)
2665             {
2666                 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStats(&cmdBuffer));
2667             }
2668         }
2669         else
2670         {  //scalability mode
2671             if (m_brcEnabled)
2672             {
2673                 //MMIO register is not used in scalability BRC case. all information is in TileSizeRecord stream out buffer
2674                 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStatisticsForScalability(&cmdBuffer));
2675             }
2676             else
2677             {
2678                 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
2679             }
2680         }
2681 
2682 #if (_DEBUG || _RELEASE_INTERNAL)
2683         //this is to support BRC scalbility test to match with single pipe. Will be removed later after enhanced BRC Scalability is enabled.
2684         if (m_brcEnabled && m_forceSinglePakPass)
2685         {
2686             CODECHAL_ENCODE_CHK_STATUS_RETURN(ResetImgCtrlRegInPAKStatisticsBuffer(&cmdBuffer));
2687         }
2688 #endif
2689 
2690         if (m_singleTaskPhaseSupported &&
2691             m_brcEnabled && m_numPipe >= 2 && !IsLastPass())
2692         {
2693             // Signal HW semaphore for the BRC dependency (i.e., next BRC pass waits for the current BRC pass)
2694             for (auto i = 0; i < m_numPipe; i++)
2695             {
2696                 if (!Mos_ResourceIsNull(&m_resBrcSemaphoreMem[i].sResource))
2697                 {
2698                     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2699                     storeDataParams.pOsResource = &m_resBrcSemaphoreMem[i].sResource;
2700                     storeDataParams.dwValue     = 1;
2701 
2702                     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2703                         &cmdBuffer,
2704                         &storeDataParams));
2705                 }
2706             }
2707         }
2708     }
2709 
2710     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2711     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2712 
2713     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2714     {
2715         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
2716     }
2717 
2718     std::string pakPassName = "PAK_PASS" + std::to_string(static_cast<uint32_t>(m_currPass));
2719     CODECHAL_DEBUG_TOOL(
2720         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
2721             &cmdBuffer,
2722             CODECHAL_NUM_MEDIA_STATES,
2723             pakPassName.data()));)
2724 
2725     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2726 
2727     if (IsFirstPipe() &&
2728         (m_pakOnlyTest == 0) &&  // In the PAK only test, no need to wait for ENC's completion
2729         IsFirstPass() &&
2730         !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
2731     {
2732         MOS_SYNC_PARAMS syncParams  = g_cInitSyncParams;
2733         syncParams.GpuContext       = m_videoContext;
2734         syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
2735 
2736         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
2737     }
2738 
2739     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2740     {
2741         bool nullRendering = m_videoContextUsesNullHw;
2742 
2743         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, nullRendering));
2744 
2745         CODECHAL_DEBUG_TOOL(
2746             CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpHucDebugOutputBuffers());
2747             CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpPakOutput());
2748             if (m_mmcState) {
2749                 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
2750             })
2751 
2752         if ((IsLastPipe()) &&
2753             (IsLastPass()) &&
2754             m_signalEnc &&
2755             m_currRefSync &&
2756             !Mos_ResourceIsNull(&m_currRefSync->resSyncObject))
2757         {
2758             // signal semaphore
2759             MOS_SYNC_PARAMS syncParams;
2760             syncParams                  = g_cInitSyncParams;
2761             syncParams.GpuContext       = m_videoContext;
2762             syncParams.presSyncResource = &m_currRefSync->resSyncObject;
2763 
2764             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
2765             m_currRefSync->uiSemaphoreObjCount++;
2766             m_currRefSync->bInUsed = true;
2767         }
2768     }
2769 
2770     // Reset parameters for next PAK execution
2771     if (IsLastPipe() && IsLastPass())
2772     {
2773         if (!m_singleTaskPhaseSupported)
2774         {
2775             m_osInterface->pfnResetPerfBufferID(m_osInterface);
2776         }
2777 
2778         m_currPakSliceIdx = (m_currPakSliceIdx + 1) % CODECHAL_HEVC_NUM_PAK_SLICE_BATCH_BUFFERS;
2779 
2780         if (m_hevcSeqParams->ParallelBRC)
2781         {
2782             m_brcBuffers.uiCurrBrcPakStasIdxForWrite =
2783                 (m_brcBuffers.uiCurrBrcPakStasIdxForWrite + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
2784         }
2785 
2786         m_newPpsHeader = 0;
2787         m_newSeqHeader = 0;
2788         m_frameNum++;
2789     }
2790 
2791     return eStatus;
2792 }
2793 
DecideEncodingPipeNumber()2794 MOS_STATUS CodechalEncHevcStateG12::DecideEncodingPipeNumber()
2795 {
2796     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2797 
2798     CODECHAL_ENCODE_FUNCTION_ENTER;
2799 
2800     m_numPipe = m_numVdbox;
2801 
2802     uint8_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
2803 
2804     if (numTileColumns > m_numPipe)
2805     {
2806         m_numPipe = 1;
2807     }
2808 
2809     if (numTileColumns < m_numPipe)
2810     {
2811         if (numTileColumns >= 1 && numTileColumns <= 4)
2812         {
2813             m_numPipe = numTileColumns;
2814         }
2815         else
2816         {
2817             m_numPipe = 1;  // invalid tile column test cases and switch back to the single VDBOX mode
2818         }
2819     }
2820 
2821     m_useVirtualEngine = true;  //always use virtual engine interface for single pipe and scalability mode
2822 
2823     if (!m_forceScalability)
2824     {
2825         //resolution < 4K, always go with single pipe
2826         if (m_frameWidth * m_frameHeight < ENCODE_HEVC_4K_PIC_WIDTH * ENCODE_HEVC_4K_PIC_HEIGHT)
2827         {
2828             m_numPipe = 1;
2829         }
2830     }
2831 
2832     m_numUsedVdbox       = m_numPipe;
2833     m_numberTilesInFrame = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1);
2834 
2835     if (m_scalabilityState)
2836     {
2837         // Create/ re-use a GPU context with 2 pipes
2838         m_scalabilityState->ucScalablePipeNum = m_numPipe;
2839     }
2840     return eStatus;
2841 }
2842 
PlatformCapabilityCheck()2843 MOS_STATUS CodechalEncHevcStateG12::PlatformCapabilityCheck()
2844 {
2845     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2846 
2847     CODECHAL_ENCODE_FUNCTION_ENTER;
2848 
2849     CODECHAL_ENCODE_CHK_STATUS_RETURN(DecideEncodingPipeNumber());
2850 
2851     if (MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
2852     {
2853         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ChkGpuCtxReCreation(this, m_scalabilityState, (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
2854     }
2855 
2856     if (m_frameWidth * m_frameHeight > ENCODE_HEVC_MAX_16K_PIC_WIDTH * ENCODE_HEVC_MAX_16K_PIC_HEIGHT)
2857     {
2858         eStatus = MOS_STATUS_INVALID_PARAMETER;
2859         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Frame resolution greater than 16k not supported");
2860     }
2861 
2862     if (m_vdencEnabled && m_chromaFormat == HCP_CHROMA_FORMAT_YUV444 && m_hevcSeqParams->TargetUsage == 7)
2863     {
2864         CODECHAL_ENCODE_ASSERTMESSAGE("Speed mode is not supported in VDENC 444, resetting TargetUsage to Normal mode\n");
2865         m_hevcSeqParams->TargetUsage = 4;
2866     }
2867 
2868     if ((uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_chromaFormat &&
2869         (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat &&
2870         Format_YUY2 == m_reconSurface.Format)
2871     {
2872         if (m_reconSurface.dwHeight < m_oriFrameHeight * 2 ||
2873             m_reconSurface.dwWidth < m_oriFrameWidth / 2)
2874         {
2875             return MOS_STATUS_INVALID_PARAMETER;
2876         }
2877     }
2878 
2879     // set RDOQ Intra blocks Threshold for Gen11+
2880     m_rdoqIntraTuThreshold = 0;
2881     if (m_hevcRdoqEnabled)
2882     {
2883         if (1 == m_hevcSeqParams->TargetUsage)
2884         {
2885             m_rdoqIntraTuThreshold = 0xffff;
2886         }
2887         else if (4 == m_hevcSeqParams->TargetUsage)
2888         {
2889             m_rdoqIntraTuThreshold = m_picWidthInMb * m_picHeightInMb;
2890             m_rdoqIntraTuThreshold = MOS_MIN(m_rdoqIntraTuThreshold / 10, 0xffff);
2891         }
2892     }
2893 
2894     return eStatus;
2895 }
2896 
CheckSupportedFormat(PMOS_SURFACE surface)2897 bool CodechalEncHevcStateG12::CheckSupportedFormat(PMOS_SURFACE surface)
2898 {
2899     CODECHAL_ENCODE_FUNCTION_ENTER;
2900 
2901     bool isColorFormatSupported = false;
2902 
2903     if (nullptr == surface)
2904     {
2905         CODECHAL_ENCODE_ASSERTMESSAGE("Invalid (nullptr) Pointer.");
2906         return isColorFormatSupported;
2907     }
2908 
2909     switch (surface->Format)
2910     {
2911     case Format_NV12:
2912         isColorFormatSupported = IS_Y_MAJOR_TILE_FORMAT(surface->TileType);
2913         break;
2914     case Format_YUY2:
2915     case Format_YUYV:
2916     case Format_A8R8G8B8:
2917     case Format_P010:
2918     case Format_P016:
2919     case Format_Y210:
2920     case Format_Y216:
2921         break;
2922     default:
2923         CODECHAL_ENCODE_ASSERTMESSAGE("Input surface color format = %d not supported!", surface->Format);
2924         break;
2925     }
2926 
2927     return isColorFormatSupported;
2928 }
2929 
GetSystemPipeNumberCommon()2930 MOS_STATUS CodechalEncHevcStateG12::GetSystemPipeNumberCommon()
2931 {
2932     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2933 
2934     CODECHAL_ENCODE_FUNCTION_ENTER;
2935 
2936     MOS_USER_FEATURE_VALUE_DATA userFeatureData;
2937     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2938 
2939     MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
2940     statusKey            = MOS_UserFeature_ReadValue_ID(
2941         nullptr,
2942         __MEDIA_USER_FEATURE_VALUE_ENCODE_DISABLE_SCALABILITY,
2943         &userFeatureData,
2944         m_osInterface->pOsContext);
2945 
2946     bool disableScalability = true; // m_hwInterface->IsDisableScalability()
2947     if (statusKey == MOS_STATUS_SUCCESS)
2948     {
2949         disableScalability = userFeatureData.i32Data ? true : false;
2950     }
2951 
2952     MEDIA_SYSTEM_INFO *gtSystemInfo = m_osInterface->pfnGetGtSystemInfo(m_osInterface);
2953     CODECHAL_ENCODE_CHK_NULL_RETURN(gtSystemInfo);
2954 
2955     if (gtSystemInfo && disableScalability == false)
2956     {
2957         // Both VE mode and media solo mode should be able to get the VDBOX number via the same interface
2958         m_numVdbox = (uint8_t)(gtSystemInfo->VDBoxInfo.NumberOfVDBoxEnabled);
2959     }
2960     else
2961     {
2962         m_numVdbox = 1;
2963     }
2964 
2965     return eStatus;
2966 }
2967 
HucPakIntegrate(PMOS_COMMAND_BUFFER cmdBuffer)2968 MOS_STATUS CodechalEncHevcStateG12::HucPakIntegrate(
2969     PMOS_COMMAND_BUFFER cmdBuffer)
2970 {
2971     CODECHAL_ENCODE_FUNCTION_ENTER;
2972 
2973     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2974 
2975     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2976 
2977     CODECHAL_ENCODE_CHK_COND_RETURN(
2978         (m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()),
2979         "ERROR - vdbox index exceed the maximum");
2980 
2981     auto mmioRegisters = m_hwInterface->GetHucInterface()->GetMmioRegisters(m_vdboxIndex);
2982 
2983     // load kernel from WOPCM into L2 storage RAM
2984     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
2985     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
2986     imemParams.dwKernelDescriptor = VDBOX_HUC_PAK_INTEGRATION_KERNEL_DESCRIPTOR;
2987 
2988     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucImemStateCmd(cmdBuffer, &imemParams));
2989 
2990     // pipe mode select
2991     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
2992     pipeModeSelectParams.Mode = m_mode;
2993     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
2994 
2995     // DMEM set
2996     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
2997     if (m_brcEnabled && m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ)
2998     {
2999         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakIntegrate(&dmemParams));
3000     }
3001     else
3002     {
3003         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakIntegrateCqp(&dmemParams));
3004     }
3005     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucDmemStateCmd(cmdBuffer, &dmemParams));
3006 
3007     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
3008     if (m_brcEnabled && m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ)
3009     {
3010         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCPakIntegrate(&virtualAddrParams));
3011     }
3012     else
3013     {
3014         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCPakIntegrateCqp(&virtualAddrParams));
3015     }
3016     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucVirtualAddrStateCmd(cmdBuffer, &virtualAddrParams));
3017 
3018     // Write HUC_STATUS2 mask - bit 6 - valid IMEM loaded
3019     MHW_MI_STORE_DATA_PARAMS storeDataParams;
3020     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3021     storeDataParams.pOsResource      = &m_resHucStatus2Buffer;
3022     storeDataParams.dwResourceOffset = 0;
3023     storeDataParams.dwValue          = m_hwInterface->GetHucInterface()->GetHucStatus2ImemLoadedMask();
3024     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
3025 
3026     // Store HUC_STATUS2 register
3027     MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
3028     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
3029     storeRegParams.presStoreBuffer = &m_resHucStatus2Buffer;
3030     storeRegParams.dwOffset        = sizeof(uint32_t);
3031     storeRegParams.dwRegister      = mmioRegisters->hucStatus2RegOffset;
3032     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams));
3033 
3034     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucStartCmd(cmdBuffer, true));
3035 
3036     // wait Huc completion (use HEVC bit for now)
3037     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
3038     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
3039     vdPipeFlushParams.Flags.bFlushHEVC    = 1;
3040     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
3041     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetVdencInterface()->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams));
3042 
3043     // Flush the engine to ensure memory written out
3044     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3045     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3046     flushDwParams.bVideoPipelineCacheInvalidate = true;
3047     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
3048 
3049     EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
3050 
3051     uint32_t baseOffset =
3052         (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2;  // pEncodeStatus is offset by 2 DWs in the resource
3053 
3054     // Write HUC_STATUS mask
3055     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3056     storeDataParams.pOsResource      = &encodeStatusBuf.resStatusBuffer;
3057     storeDataParams.dwResourceOffset = baseOffset + encodeStatusBuf.dwHuCStatusMaskOffset;
3058     storeDataParams.dwValue          = m_hwInterface->GetHucInterface()->GetHucStatusReEncodeMask();
3059     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
3060         cmdBuffer,
3061         &storeDataParams));
3062 
3063     // store HUC_STATUS register
3064     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
3065     storeRegParams.presStoreBuffer = &encodeStatusBuf.resStatusBuffer;
3066     storeRegParams.dwOffset        = baseOffset + encodeStatusBuf.dwHuCStatusRegOffset;
3067     storeRegParams.dwRegister      = mmioRegisters->hucStatusRegOffset;
3068     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(
3069         cmdBuffer,
3070         &storeRegParams));
3071 
3072     return eStatus;
3073 }
3074 
Initialize(CodechalSetting * settings)3075 MOS_STATUS CodechalEncHevcStateG12::Initialize(CodechalSetting *settings)
3076 {
3077     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3078 
3079     CODECHAL_ENCODE_FUNCTION_ENTER;
3080 
3081     MOS_USER_FEATURE_VALUE_DATA userFeatureData;
3082     MOS_STATUS                  statusKey = MOS_STATUS_SUCCESS;
3083 
3084 #if (_DEBUG || _RELEASE_INTERNAL)
3085     char stringData[MOS_USER_CONTROL_MAX_DATA_SIZE];
3086     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3087     userFeatureData.StringData.pStringData = stringData;
3088     statusKey                              = MOS_UserFeature_ReadValue_ID(
3089         nullptr,
3090         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_PAK_ONLY_ID,
3091         &userFeatureData,
3092         m_osInterface->pOsContext);
3093 
3094     if (statusKey == MOS_STATUS_SUCCESS && userFeatureData.StringData.uSize > 0)
3095     {
3096         MOS_SecureStrcpy(m_pakOnlyDataFolder,
3097             sizeof(m_pakOnlyDataFolder) / sizeof(m_pakOnlyDataFolder[0]),
3098             stringData);
3099 
3100         uint32_t len = strlen(m_pakOnlyDataFolder);
3101         if (m_pakOnlyDataFolder[len - 1] == '\\')
3102         {
3103             m_pakOnlyDataFolder[len - 1] = 0;
3104         }
3105 
3106         m_pakOnlyTest = true;
3107         // PAK only mode does not need to init any kernel
3108     }
3109 
3110     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3111     userFeatureData.StringData.pStringData = stringData;
3112     statusKey                              = MOS_UserFeature_ReadValue_ID(
3113         nullptr,
3114         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_LOAD_KERNEL_INPUT_ID,
3115         &userFeatureData,
3116         m_osInterface->pOsContext);
3117 
3118     if (statusKey == MOS_STATUS_SUCCESS && userFeatureData.StringData.uSize > 0)
3119     {
3120         MOS_SecureStrcpy(m_loadKernelInputDataFolder,
3121             sizeof(m_loadKernelInputDataFolder) / sizeof(m_loadKernelInputDataFolder[0]),
3122             stringData);
3123 
3124         uint32_t len = strlen(m_loadKernelInputDataFolder);
3125         if (m_loadKernelInputDataFolder[len - 1] == '\\')
3126         {
3127             m_loadKernelInputDataFolder[len - 1] = 0;
3128         }
3129         m_loadKernelInput = true;
3130     }
3131 #endif
3132 
3133     // Common initialization
3134     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::Initialize(settings));
3135 
3136     m_numDelay                              = 15;  //Value suggested by HW team.
3137     m_bmeMethodTable                        = (uint8_t *)m_meMethod;
3138     m_b4XMeDistortionBufferSupported        = true;
3139     m_brcBuffers.dwBrcConstantSurfaceWidth  = HEVC_BRC_CONSTANT_SURFACE_WIDTH_G9;
3140     m_brcBuffers.dwBrcConstantSurfaceHeight = HEVC_BRC_CONSTANT_SURFACE_HEIGHT_G10;
3141     m_brcHistoryBufferSize                  = HEVC_BRC_HISTORY_BUFFER_SIZE_G12;
3142     m_maxNumSlicesSupported                 = CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6;
3143     m_brcBuffers.dwBrcHcpPicStateSize       = BRC_IMG_STATE_SIZE_PER_PASS_G12 * CODECHAL_ENCODE_BRC_MAXIMUM_NUM_PASSES;
3144 
3145     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3146     MOS_UserFeature_ReadValue_ID(
3147         nullptr,
3148         __MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID,
3149         &userFeatureData,
3150         m_osInterface->pOsContext);
3151     m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false;
3152 
3153     // Max ConcurrentGroup used in the ENC kernel
3154     m_numberConcurrentGroup = 4;
3155 
3156     m_sizeOfHcpPakFrameStats = 9 * CODECHAL_CACHELINE_SIZE;  //Frame statistics occupying 9 caceline on gen12
3157 
3158     // Max Subthread number used in the ENC kernel
3159     m_numberEncKernelSubThread = 3;
3160 
3161     if (m_numberEncKernelSubThread > m_hevcThreadTaskDataNum)
3162     {
3163         m_numberEncKernelSubThread = m_hevcThreadTaskDataNum;  // support up to 2 sub-threads in one LCU64x64
3164     }
3165 
3166     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3167     MOS_UserFeature_ReadValue_ID(
3168         nullptr,
3169         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_26Z_ENABLE_ID,
3170         &userFeatureData,
3171         m_osInterface->pOsContext);
3172     m_enable26WalkingPattern = (userFeatureData.i32Data) ? false : true;
3173 
3174     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3175     MOS_UserFeature_ReadValue_ID(
3176         nullptr,
3177         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_RDOQ_ENABLE_ID,
3178         &userFeatureData,
3179         m_osInterface->pOsContext);
3180     m_hevcRdoqEnabled = userFeatureData.i32Data ? true : false;
3181 
3182     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3183     MOS_UserFeature_ReadValue_ID(
3184         nullptr,
3185         __MEDIA_USER_FEATURE_VALUE_HEVC_VME_ENCODE_SSE_ENABLE_ID,
3186         &userFeatureData,
3187         m_osInterface->pOsContext);
3188     m_sseSupported = userFeatureData.i32Data ? true : false;
3189 
3190     // Overriding the defaults here with 32 aligned dimensions
3191     // 2x Scaling WxH
3192     m_downscaledWidth2x =
3193         CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_frameWidth);
3194     m_downscaledHeight2x =
3195         CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_frameHeight);
3196 
3197     // HME Scaling WxH
3198     m_downscaledWidth4x =
3199         CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_frameWidth);
3200     m_downscaledHeight4x =
3201         CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_frameHeight);
3202     m_downscaledWidthInMb4x =
3203         CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth4x);
3204     m_downscaledHeightInMb4x =
3205         CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight4x);
3206 
3207     // SuperHME Scaling WxH
3208     m_downscaledWidth16x =
3209         CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_downscaledWidth4x);
3210     m_downscaledHeight16x =
3211         CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_downscaledHeight4x);
3212     m_downscaledWidthInMb16x =
3213         CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth16x);
3214     m_downscaledHeightInMb16x =
3215         CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight16x);
3216 
3217     // UltraHME Scaling WxH
3218     m_downscaledWidth32x =
3219         CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_downscaledWidth16x);
3220     m_downscaledHeight32x =
3221         CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_downscaledHeight16x);
3222     m_downscaledWidthInMb32x =
3223         CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth32x);
3224     m_downscaledHeightInMb32x =
3225         CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight32x);
3226 
3227     // disable MMCD if we enable Codechal dump. Because dump code changes the surface state from compressed to uncompressed,
3228     // this causes mis-match issue between dump is enabled or disabled.
3229     CODECHAL_DEBUG_TOOL(
3230         if (m_mmcState && m_debugInterface && m_debugInterface->m_dbgCfgHead){
3231             //m_mmcState->SetMmcDisabled();
3232         })
3233 
3234     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetSystemPipeNumberCommon());
3235 
3236     if (MOS_VE_SUPPORTED(m_osInterface))
3237     {
3238         m_scalabilityState = (PCODECHAL_ENCODE_SCALABILITY_STATE)MOS_AllocAndZeroMemory(sizeof(CODECHAL_ENCODE_SCALABILITY_STATE));
3239         CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalabilityState);
3240         //scalability initialize
3241         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_InitializeState(m_scalabilityState, m_hwInterface));
3242     }
3243 
3244     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3245     statusKey = MOS_UserFeature_ReadValue_ID(
3246         nullptr,
3247         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_STITCH,
3248         &userFeatureData,
3249         m_osInterface->pOsContext);
3250     m_enableTileStitchByHW = userFeatureData.i32Data ? true : false;
3251 
3252     statusKey = MOS_STATUS_SUCCESS;
3253     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3254     statusKey = MOS_UserFeature_ReadValue_ID(
3255         nullptr,
3256         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_SEMAPHORE,
3257         &userFeatureData,
3258         m_osInterface->pOsContext);
3259     m_enableHWSemaphore = userFeatureData.i32Data ? true : false;
3260 
3261     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3262     statusKey = MOS_UserFeature_ReadValue_ID(
3263         nullptr,
3264         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_WP_SUPPORT_ID,
3265         &userFeatureData,
3266         m_osInterface->pOsContext);
3267     m_weightedPredictionSupported = userFeatureData.i32Data ? true : false;
3268 
3269 #if (_DEBUG || _RELEASE_INTERNAL)
3270     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3271     statusKey = MOS_UserFeature_ReadValue_ID(
3272         nullptr,
3273         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE,
3274         &userFeatureData,
3275         m_osInterface->pOsContext);
3276     m_kmdVeOveride.Value = (uint64_t)userFeatureData.i64Data;
3277 
3278     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3279     MOS_UserFeature_ReadValue_ID(
3280         nullptr,
3281         __MEDIA_USER_FEATURE_VALUE_HEVC_VME_FORCE_SCALABILITY_ID,
3282         &userFeatureData,
3283         m_osInterface->pOsContext);
3284     m_forceScalability = userFeatureData.i32Data ? true : false;
3285 
3286     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3287     statusKey = MOS_UserFeature_ReadValue_ID(
3288         nullptr,
3289         __MEDIA_USER_FEATURE_VALUE_HEVC_VME_DISABLE_PANIC_MODE_ID,
3290         &userFeatureData,
3291         m_osInterface->pOsContext);
3292     if (statusKey == MOS_STATUS_SUCCESS)
3293     {
3294         m_enableFramePanicMode = userFeatureData.i32Data ? false : true;
3295     }
3296 
3297     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3298     MOS_UserFeature_ReadValue_ID(
3299         nullptr,
3300         __MEDIA_USER_FEATURE_VALUE_HEVC_VME_BRC_LTR_INTERVAL_ID,
3301         &userFeatureData,
3302         m_osInterface->pOsContext);
3303     m_ltrInterval = (uint32_t)(userFeatureData.i32Data);
3304 
3305     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3306     MOS_UserFeature_ReadValue_ID(
3307         nullptr,
3308         __MEDIA_USER_FEATURE_VALUE_HEVC_VME_BRC_LTR_DISABLE_ID,
3309         &userFeatureData,
3310         m_osInterface->pOsContext);
3311     m_enableBrcLTR = (userFeatureData.i32Data) ? false : true;
3312 #endif
3313 
3314     if (m_codecFunction != CODECHAL_FUNCTION_PAK)
3315     {
3316         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3317         MOS_UserFeature_ReadValue_ID(
3318             nullptr,
3319             __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ME_ENABLE_ID,
3320             &userFeatureData,
3321             m_osInterface->pOsContext);
3322         m_hmeSupported = (userFeatureData.i32Data) ? true : false;
3323 
3324         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3325         MOS_UserFeature_ReadValue_ID(
3326             nullptr,
3327             __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_16xME_ENABLE_ID,
3328             &userFeatureData,
3329             m_osInterface->pOsContext);
3330         m_16xMeSupported = (userFeatureData.i32Data) ? true : false;
3331 
3332         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3333         MOS_UserFeature_ReadValue_ID(
3334             nullptr,
3335             __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_32xME_ENABLE_ID,
3336             &userFeatureData,
3337             m_osInterface->pOsContext);
3338         // Keeping UHME by Default ON for Gen12
3339         m_32xMeSupported = (userFeatureData.i32Data) ? false : true;
3340 
3341         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3342         MOS_UserFeature_ReadValue_ID(
3343             nullptr,
3344             __MEDIA_USER_FEATURE_VALUE_HEVC_NUM_THREADS_PER_LCU_ID,
3345             &userFeatureData,
3346             m_osInterface->pOsContext);
3347         m_totalNumThreadsPerLcu = (uint16_t)userFeatureData.i32Data;
3348 
3349         if (m_totalNumThreadsPerLcu < m_minThreadsPerLcuB || m_totalNumThreadsPerLcu > m_maxThreadsPerLcuB)
3350         {
3351             return MOS_STATUS_INVALID_PARAMETER;
3352         }
3353     }
3354 
3355     if (m_frameWidth < 128 || m_frameHeight < 128)
3356     {
3357         m_16xMeSupported = false;
3358         m_32xMeSupported = false;
3359     }
3360     else if (m_frameWidth < 512 || m_frameHeight < 512)
3361     {
3362         m_32xMeSupported = false;
3363     }
3364 
3365     return eStatus;
3366 }
3367 
LoadCosts(uint8_t sliceType,uint8_t qp)3368 void CodechalEncHevcStateG12::LoadCosts(uint8_t sliceType, uint8_t qp)
3369 {
3370     if (sliceType >= CODECHAL_HEVC_NUM_SLICE_TYPES)
3371     {
3372         CODECHAL_ENCODE_ASSERTMESSAGE("Invalid slice type");
3373         sliceType = CODECHAL_HEVC_I_SLICE;
3374     }
3375 
3376     double  qpScale   = 0.60;
3377     int32_t qpMinus12 = qp - 12;
3378     double  lambda    = sqrt(qpScale * pow(2.0, MOS_MAX(0, qpMinus12) / 3.0));
3379     uint8_t lcuIdx    = ((m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3) == 6) ? 1 : 0;
3380     m_lambdaRD        = (uint16_t)(qpScale * pow(2.0, MOS_MAX(0, qpMinus12) / 3.0) * 4 + 0.5);
3381 
3382     m_modeCostCre[LUTCREMODE_INTRA_32X32]       = CRECOST(lambda, LUTMODEBITS_INTRA_32X32, lcuIdx, sliceType);
3383     m_modeCostCre[LUTCREMODE_INTRA_16X16]       = CRECOST(lambda, LUTMODEBITS_INTRA_16X16, lcuIdx, sliceType);
3384     m_modeCostCre[LUTCREMODE_INTRA_8X8]         = CRECOST(lambda, LUTMODEBITS_INTRA_8X8, lcuIdx, sliceType);
3385     m_modeCostCre[LUTCREMODE_INTRA_CHROMA]      = CRECOST(lambda, LUTMODEBITS_INTRA_CHROMA, lcuIdx, sliceType);
3386     m_modeCostCre[LUTCREMODE_INTER_32X32]       = CRECOST(lambda, LUTMODEBITS_INTER_32X32, lcuIdx, sliceType);
3387     m_modeCostCre[LUTCREMODE_INTER_32X16]       = CRECOST(lambda, LUTMODEBITS_INTER_32X16, lcuIdx, sliceType);
3388     m_modeCostCre[LUTCREMODE_INTER_16X16]       = CRECOST(lambda, LUTMODEBITS_INTER_16X16, lcuIdx, sliceType);
3389     m_modeCostCre[LUTCREMODE_INTER_16X8]        = CRECOST(lambda, LUTMODEBITS_INTER_16X8, lcuIdx, sliceType);
3390     m_modeCostCre[LUTCREMODE_INTER_8X8]         = CRECOST(lambda, LUTMODEBITS_INTER_8X8, lcuIdx, sliceType);
3391     m_modeCostCre[LUTCREMODE_INTER_BIDIR]       = CRECOST(lambda, LUTMODEBITS_INTER_BIDIR, lcuIdx, sliceType);
3392     m_modeCostCre[LUTCREMODE_INTER_SKIP]        = CRECOST(lambda, LUTMODEBITS_INTER_SKIP, lcuIdx, sliceType);
3393     m_modeCostCre[LUTCREMODE_INTRA_NONDC_32X32] = CRECOST(lambda, LUTMODEBITS_INTRA_NONDC_32X32, lcuIdx, sliceType);
3394     m_modeCostCre[LUTCREMODE_INTRA_NONDC_16X16] = CRECOST(lambda, LUTMODEBITS_INTRA_NONDC_16X16, lcuIdx, sliceType);
3395     m_modeCostCre[LUTCREMODE_INTRA_NONDC_8X8]   = CRECOST(lambda, LUTMODEBITS_INTRA_NONDC_8X8, lcuIdx, sliceType);
3396     m_modeCostCre[LUTCREMODE_INTRA_NONPRED]     = CRECOST(lambda, LUTMODEBITS_INTRA_MPM, lcuIdx, sliceType);
3397 
3398     m_modeCostRde[LUTRDEMODE_INTRA_64X64]       = RDEBITS62(LUTMODEBITS_INTRA_64X64, lcuIdx, sliceType);
3399     m_modeCostRde[LUTRDEMODE_INTRA_32X32]       = RDEBITS62(LUTMODEBITS_INTRA_32X32, lcuIdx, sliceType);
3400     m_modeCostRde[LUTRDEMODE_INTRA_16X16]       = RDEBITS62(LUTMODEBITS_INTRA_16X16, lcuIdx, sliceType);
3401     m_modeCostRde[LUTRDEMODE_INTRA_8X8]         = RDEBITS62(LUTMODEBITS_INTRA_8X8, lcuIdx, sliceType);
3402     m_modeCostRde[LUTRDEMODE_INTRA_NXN]         = RDEBITS62(LUTMODEBITS_INTRA_NXN, lcuIdx, sliceType);
3403     m_modeCostRde[LUTRDEMODE_INTRA_MPM]         = RDEBITS62(LUTMODEBITS_INTRA_MPM, lcuIdx, sliceType);
3404     m_modeCostRde[LUTRDEMODE_INTRA_DC_32X32]    = RDEBITS62(LUTMODEBITS_INTRA_DC_32X32, lcuIdx, sliceType);
3405     m_modeCostRde[LUTRDEMODE_INTRA_DC_8X8]      = RDEBITS62(LUTMODEBITS_INTRA_DC_8X8, lcuIdx, sliceType);
3406     m_modeCostRde[LUTRDEMODE_INTRA_NONDC_32X32] = RDEBITS62(LUTMODEBITS_INTRA_NONDC_32X32, lcuIdx, sliceType);
3407     m_modeCostRde[LUTRDEMODE_INTRA_NONDC_8X8]   = RDEBITS62(LUTMODEBITS_INTRA_NONDC_8X8, lcuIdx, sliceType);
3408     m_modeCostRde[LUTRDEMODE_INTER_BIDIR]       = RDEBITS62(LUTMODEBITS_INTER_BIDIR, lcuIdx, sliceType);
3409     m_modeCostRde[LUTRDEMODE_INTER_REFID]       = RDEBITS62(LUTMODEBITS_INTER_REFID, lcuIdx, sliceType);
3410     m_modeCostRde[LUTRDEMODE_SKIP_64X64]        = RDEBITS62(LUTMODEBITS_SKIP_64X64, lcuIdx, sliceType);
3411     m_modeCostRde[LUTRDEMODE_SKIP_32X32]        = RDEBITS62(LUTMODEBITS_SKIP_32X32, lcuIdx, sliceType);
3412     m_modeCostRde[LUTRDEMODE_SKIP_16X16]        = RDEBITS62(LUTMODEBITS_SKIP_16X16, lcuIdx, sliceType);
3413     m_modeCostRde[LUTRDEMODE_SKIP_8X8]          = RDEBITS62(LUTMODEBITS_SKIP_8X8, lcuIdx, sliceType);
3414     m_modeCostRde[LUTRDEMODE_MERGE_64X64]       = RDEBITS62(LUTMODEBITS_MERGE_64X64, lcuIdx, sliceType);
3415     m_modeCostRde[LUTRDEMODE_MERGE_32X32]       = RDEBITS62(LUTMODEBITS_MERGE_32X32, lcuIdx, sliceType);
3416     m_modeCostRde[LUTRDEMODE_MERGE_16X16]       = RDEBITS62(LUTMODEBITS_MERGE_16X16, lcuIdx, sliceType);
3417     m_modeCostRde[LUTRDEMODE_MERGE_8X8]         = RDEBITS62(LUTMODEBITS_MERGE_8X8, lcuIdx, sliceType);
3418     m_modeCostRde[LUTRDEMODE_INTER_32X32]       = RDEBITS62(LUTMODEBITS_INTER_32X32, lcuIdx, sliceType);
3419     m_modeCostRde[LUTRDEMODE_INTER_32X16]       = RDEBITS62(LUTMODEBITS_INTER_32X16, lcuIdx, sliceType);
3420     m_modeCostRde[LUTRDEMODE_INTER_16X16]       = RDEBITS62(LUTMODEBITS_INTER_16X16, lcuIdx, sliceType);
3421     m_modeCostRde[LUTRDEMODE_INTER_16X8]        = RDEBITS62(LUTMODEBITS_INTER_16X8, lcuIdx, sliceType);
3422     m_modeCostRde[LUTRDEMODE_INTER_8X8]         = RDEBITS62(LUTMODEBITS_INTER_8X8, lcuIdx, sliceType);
3423     m_modeCostRde[LUTRDEMODE_TU_DEPTH_0]        = RDEBITS62(LUTMODEBITS_TU_DEPTH_0, lcuIdx, sliceType);
3424     m_modeCostRde[LUTRDEMODE_TU_DEPTH_1]        = RDEBITS62(LUTMODEBITS_TU_DEPTH_1, lcuIdx, sliceType);
3425 
3426     for (uint8_t i = 0; i < 8; i++)
3427     {
3428         m_modeCostRde[LUTRDEMODE_CBF + i] = RDEBITS62(LUTMODEBITS_CBF + i, lcuIdx, sliceType);
3429     }
3430 }
3431 
3432 // ------------------------------------------------------------------------------
3433 //| Purpose:    Setup curbe for HEVC MbEnc B Kernels
3434 //| Return:     N/A
3435 //------------------------------------------------------------------------------
SetCurbeMbEncBKernel()3436 MOS_STATUS CodechalEncHevcStateG12::SetCurbeMbEncBKernel()
3437 {
3438     uint32_t        curIdx = m_currRecycledBufIdx;
3439     MOS_LOCK_PARAMS lockFlags;
3440     MOS_STATUS      eStatus = MOS_STATUS_SUCCESS;
3441 
3442     uint8_t tuMapping = ((m_hevcSeqParams->TargetUsage) / 3) % 3;  // Map TU 1,4,6 to 0,1,2
3443 
3444     // Initialize the CURBE data
3445     MBENC_CURBE curbe;
3446 
3447     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
3448     {
3449         curbe.QPType    = QP_TYPE_CONSTANT;
3450         curbe.ROIEnable = m_hevcPicParams->NumROI ? true : false;
3451     }
3452     else
3453     {
3454         curbe.QPType = m_lcuBrcEnabled ? QP_TYPE_CU_LEVEL : QP_TYPE_FRAME;
3455     }
3456 
3457     // TU based settings
3458     curbe.EnableCu64Check        = m_tuSettings[EnableCu64CheckTuParam][tuMapping];
3459     curbe.MaxNumIMESearchCenter  = m_tuSettings[MaxNumIMESearchCenterTuParam][tuMapping];
3460     curbe.MaxTransformDepthInter = m_tuSettings[Log2TUMaxDepthInterTuParam][tuMapping];
3461     curbe.MaxTransformDepthIntra = m_tuSettings[Log2TUMaxDepthIntraTuParam][tuMapping];
3462     curbe.Dynamic64Order         = m_tuSettings[Dynamic64OrderTuParam][tuMapping];
3463     curbe.DynamicOrderTh         = m_tuSettings[DynamicOrderThTuParam][tuMapping];
3464     curbe.Dynamic64Enable        = m_tuSettings[Dynamic64EnableTuParam][tuMapping];
3465     curbe.Dynamic64Th            = m_tuSettings[Dynamic64ThTuParam][tuMapping];
3466     curbe.IncreaseExitThresh     = m_tuSettings[IncreaseExitThreshTuParam][tuMapping];
3467     curbe.IntraSpotCheck         = m_tuSettings[IntraSpotCheckFlagTuParam][tuMapping];
3468     curbe.Fake32Enable           = m_tuSettings[Fake32EnableTuParam][tuMapping];
3469 
3470     curbe.FrameWidthInSamples  = m_frameWidth;
3471     curbe.FrameHeightInSamples = m_frameHeight;
3472 
3473     curbe.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
3474     curbe.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
3475     curbe.Log2MaxTUSize = m_hevcSeqParams->log2_max_transform_block_size_minus2 + 2;
3476     curbe.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
3477 
3478     curbe.ChromaFormatType = m_hevcSeqParams->chroma_format_idc;
3479 
3480     curbe.TUDepthControl = curbe.MaxTransformDepthInter;
3481 
3482     int32_t sliceQp   = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
3483     curbe.FrameQP     = abs(sliceQp);
3484     curbe.FrameQPSign = (sliceQp > 0) ? 0 : 1;
3485 
3486 #if 0  // no need in the optimized kernel because kernel does the table look-up
3487     LoadCosts(CODECHAL_HEVC_B_SLICE, (uint8_t)sliceQp);
3488     curbe.DW4_ModeIntra32x32Cost = m_modeCostCre[LUTCREMODE_INTRA_32X32];
3489     curbe.DW4_ModeIntraNonDC32x32Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_32X32];
3490 
3491     curbe.DW5_ModeIntra16x16Cost = m_modeCostCre[LUTCREMODE_INTRA_16X16];
3492     curbe.DW5_ModeIntraNonDC16x16Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_16X16];
3493     curbe.DW5_ModeIntra8x8Cost = m_modeCostCre[LUTCREMODE_INTRA_8X8];
3494     curbe.DW5_ModeIntraNonDC8x8Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_8X8];
3495 
3496     curbe.DW6_ModeIntraNonPred = m_modeCostCre[LUTCREMODE_INTRA_NONPRED];
3497 
3498     curbe.DW7_ChromaIntraModeCost = m_modeCostCre[LUTCREMODE_INTRA_CHROMA];
3499 
3500     curbe.DW12_IntraModeCostMPM = m_modeCostRde[LUTRDEMODE_INTRA_MPM];
3501 
3502     curbe.DW13_IntraTUDept0Cost = m_modeCostRde[LUTRDEMODE_TU_DEPTH_0];
3503     curbe.DW13_IntraTUDept1Cost = m_modeCostRde[LUTRDEMODE_TU_DEPTH_1];
3504 
3505     curbe.DW14_IntraTU4x4CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_4X4];
3506     curbe.DW14_IntraTU8x8CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_8X8];
3507     curbe.DW14_IntraTU16x16CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_16X16];
3508     curbe.DW14_IntraTU32x32CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_32X32];
3509     curbe.DW15_LambdaRD = (uint16_t)m_lambdaRD;
3510     curbe.DW17_IntraNonDC8x8Penalty = m_modeCostRde[LUTRDEMODE_INTRA_NONDC_8X8];
3511     curbe.DW17_IntraNonDC32x32Penalty = m_modeCostRde[LUTRDEMODE_INTRA_NONDC_32X32];
3512 #endif
3513 
3514     curbe.NumofColumnTile = m_hevcPicParams->num_tile_columns_minus1 + 1;
3515     curbe.NumofRowTile    = m_hevcPicParams->num_tile_rows_minus1 + 1;
3516     curbe.HMEFlag         = m_hmeSupported ? 3 : 0;
3517 
3518     curbe.MaxRefIdxL0  = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10 - 1;
3519     curbe.MaxRefIdxL1  = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L1_REF_G10 - 1;
3520     curbe.MaxBRefIdxL0 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10 - 1;
3521 
3522     // Check whether Last Frame is I frame or not
3523     if (m_frameNum == 0 || m_picHeightInMb == I_TYPE || (m_frameNum && m_lastPictureCodingType == I_TYPE))
3524     {
3525         // This is the flag to notify kernel not to use the history buffer
3526         curbe.LastFrameIsIntra = true;
3527     }
3528     else
3529     {
3530         curbe.LastFrameIsIntra = false;
3531     }
3532 
3533     curbe.SliceType             = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
3534     curbe.TemporalMvpEnableFlag = m_hevcSliceParams->slice_temporal_mvp_enable_flag;
3535     curbe.CollocatedFromL0Flag  = m_hevcSliceParams->collocated_from_l0_flag;
3536     curbe.theSameRefList        = m_sameRefList;
3537     curbe.IsLowDelay            = m_lowDelay;
3538     curbe.MaxNumMergeCand       = m_hevcSliceParams->MaxNumMergeCand;
3539     curbe.NumRefIdxL0           = m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1;
3540     curbe.NumRefIdxL1           = m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1;
3541 
3542     if (m_hevcSeqParams->TargetUsage == 1)
3543     {
3544         // MaxNumMergeCand C Model uses 4 for TU1,
3545         // for quality consideration, make sure not larger than the value from App as it will be used in PAK
3546         curbe.MaxNumMergeCand = MOS_MIN(m_hevcSliceParams->MaxNumMergeCand, 4);
3547     }
3548     else
3549     {
3550         // MaxNumMergeCand C Model uses 2 for TU4 and TU7,
3551         // for quality consideration, make sure not larger than the value from App as it will be used in PAK
3552         curbe.MaxNumMergeCand = MOS_MIN(m_hevcSliceParams->MaxNumMergeCand, 2);
3553     }
3554 
3555     int32_t tbRefListL0[CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10] = {0}, tbRefListL1[CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L1_REF_G10] = {0};
3556     curbe.FwdPocNumber_L0_mTb_0 = tbRefListL0[0] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][0]);
3557     curbe.BwdPocNumber_L1_mTb_0 = tbRefListL1[0] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][0]);
3558     curbe.FwdPocNumber_L0_mTb_1 = tbRefListL0[1] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][1]);
3559     curbe.BwdPocNumber_L1_mTb_1 = tbRefListL1[1] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][1]);
3560 
3561     curbe.FwdPocNumber_L0_mTb_2 = tbRefListL0[2] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][2]);
3562     curbe.BwdPocNumber_L1_mTb_2 = tbRefListL1[2] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][2]);
3563     curbe.FwdPocNumber_L0_mTb_3 = tbRefListL0[3] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][3]);
3564     curbe.BwdPocNumber_L1_mTb_3 = tbRefListL1[3] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][3]);
3565 
3566     curbe.RefFrameWinHeight = m_frameHeight;
3567     curbe.RefFrameWinWidth  = m_frameWidth;
3568 
3569     // Hard coding for now from Gen10HEVC_TU4_default.par
3570     curbe.RoundingInter      = (m_roundingInter + 1) << 4;  // Should be an input from par(slice state)
3571     curbe.RoundingIntra      = (m_roundingIntra + 1) << 4;  // Should be an input from par(slice state)
3572     curbe.RDEQuantRoundValue = (m_roundingInter + 1) << 4;
3573 
3574     uint32_t gopP = (m_hevcSeqParams->GopRefDist) ? ((m_hevcSeqParams->GopPicSize - 1) / m_hevcSeqParams->GopRefDist) : 0;
3575     uint32_t gopB = m_hevcSeqParams->GopPicSize - 1 - gopP;
3576 
3577     curbe.CostScalingForRA = 1;  // default setting
3578 
3579     // get the min distance between current pic and ref pics
3580     uint32_t minPocDist     = 255;
3581     uint32_t costTableIndex = 0;
3582     if (curbe.CostScalingForRA == 1)
3583     {
3584         for (uint8_t ref = 0; ref < curbe.NumRefIdxL0; ref++)
3585         {
3586             if ((uint32_t)abs(tbRefListL0[ref]) < minPocDist)
3587                 minPocDist = abs(tbRefListL0[ref]);
3588         }
3589         for (uint8_t ref = 0; ref < curbe.NumRefIdxL1; ref++)
3590         {
3591             if ((uint32_t)abs(tbRefListL1[ref]) < minPocDist)
3592                 minPocDist = abs(tbRefListL1[ref]);
3593         }
3594 
3595         if (gopB == 4)
3596         {
3597             if (minPocDist == 1 || minPocDist == 2 || minPocDist == 4)
3598                 costTableIndex = minPocDist;
3599         }
3600         if (gopB == 8)
3601         {
3602             if (minPocDist == 1 || minPocDist == 2 || minPocDist == 4 || minPocDist == 8)
3603                 costTableIndex = minPocDist + 3;
3604         }
3605     }
3606 
3607     curbe.CostTableIndex = costTableIndex;
3608 
3609     // the following fields are needed by the new optimized kernel in v052417
3610     curbe.Log2ParallelMergeLevel  = m_hevcPicParams->log2_parallel_merge_level_minus2 + 2;
3611     curbe.MaxIntraRdeIter         = 1;
3612     curbe.CornerNeighborPixel     = 0;
3613     curbe.IntraNeighborAvailFlags = 0;
3614     curbe.SubPelMode              = 3;  // qual-pel search
3615     curbe.InterSADMeasure         = 2;  // Haar transform
3616     curbe.IntraSADMeasure         = 2;  // Haar transform
3617     curbe.IntraPrediction         = 0;  // enable 32x32, 16x16, and 8x8 luma intra prediction
3618     curbe.RefIDCostMode           = 1;  // 0: AVC and 1: linear method
3619     curbe.TUBasedCostSetting      = 0;
3620     curbe.ConcurrentGroupNum      = m_numberConcurrentGroup;
3621      curbe.WaveFrontSplitVQFix = ((1 << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)) == 64) ? 1 : 0;
3622     curbe.NumofUnitInWaveFront    = m_numWavefrontInOneRegion;
3623     curbe.LoadBalenceEnable       = 0;  // when this flag is false, kernel does not use LoadBalance (or MBENC_B_FRAME_CONCURRENT_TG_DATA) buffe
3624     curbe.ThreadNumber            = MOS_MIN(2, m_numberEncKernelSubThread);
3625     curbe.Pic_init_qp_B           = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
3626     curbe.Pic_init_qp_P           = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
3627     curbe.Pic_init_qp_I           = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
3628     curbe.WaveFrontSplitsEnable   = (m_numberConcurrentGroup == 1) ? false : true;
3629     curbe.SuperHME                = m_16xMeSupported;
3630     curbe.UltraHME                = m_32xMeSupported;
3631     curbe.PerBFrameQPOffset       = 0;
3632 
3633     switch (m_hevcSeqParams->TargetUsage)
3634     {
3635     case 1:
3636         curbe.Degree45          = 0;
3637         curbe.Break12Dependency = 0;
3638         break;
3639     case 4:
3640     default:
3641         curbe.Degree45          = 1;
3642         curbe.Break12Dependency = 1;
3643         break;
3644     }
3645 
3646     curbe.LongTermReferenceFlags_L0 = 0;
3647     for (uint32_t i = 0; i < curbe.NumRefIdxL0; i++)
3648     {
3649         curbe.LongTermReferenceFlags_L0 |= (m_hevcSliceParams->RefPicList[0][i].PicFlags & PICTURE_LONG_TERM_REFERENCE) << i;
3650     }
3651     curbe.LongTermReferenceFlags_L1 = 0;
3652     for (uint32_t i = 0; i < curbe.NumRefIdxL1; i++)
3653     {
3654         curbe.LongTermReferenceFlags_L1 |= (m_hevcSliceParams->RefPicList[1][i].PicFlags & PICTURE_LONG_TERM_REFERENCE) << i;
3655     }
3656 
3657     curbe.Stepping           = 0;
3658     curbe.Cu64SkipCheckOnly  = 0;
3659     curbe.Cu642Nx2NCheckOnly = 0;
3660     curbe.EnableCu64AmpCheck = 1;
3661     curbe.IntraSpeedMode     = 0;  // 35 mode
3662     curbe.DisableIntraNxN    = 0;
3663 
3664     if (m_hwInterface->GetPlatform().usRevId == 0)
3665     {
3666         curbe.Stepping               = 1;
3667         curbe.TUDepthControl         = 1;
3668         curbe.MaxTransformDepthInter = 1;
3669         curbe.MaxTransformDepthIntra = 0;
3670         //buf->curbe.EnableCu64Check       = 1;
3671         curbe.Cu64SkipCheckOnly  = 0;
3672         curbe.Cu642Nx2NCheckOnly = 1;
3673         curbe.EnableCu64AmpCheck = 0;
3674         curbe.IntraSpeedMode     = 0;  // 35 mode
3675         curbe.DisableIntraNxN    = 1;
3676         curbe.MaxNumMergeCand    = 1;
3677     }
3678 
3679     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3680     lockFlags.WriteOnly = 1;
3681     auto buf            = (PMBENC_COMBINED_BUFFER1)m_osInterface->pfnLockResource(
3682         m_osInterface,
3683         &m_encBCombinedBuffer1[curIdx].sResource,
3684         &lockFlags);
3685     CODECHAL_ENCODE_CHK_NULL_RETURN(buf);
3686 
3687     if (curbe.Degree45)
3688     {
3689         MOS_ZeroMemory(&buf->concurrent, sizeof(buf->concurrent));
3690     }
3691     buf->Curbe = curbe;
3692 
3693     m_osInterface->pfnUnlockResource(
3694         m_osInterface,
3695         &m_encBCombinedBuffer1[curIdx].sResource);
3696 
3697     // clean-up the thread dependency buffer in the second combined buffer
3698     if (m_numberEncKernelSubThread > 1)
3699     {
3700         MOS_LOCK_PARAMS lockFlags;
3701 
3702         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3703         lockFlags.WriteOnly = 1;
3704         auto data           = (uint8_t *)m_osInterface->pfnLockResource(
3705             m_osInterface,
3706             &m_encBCombinedBuffer2[curIdx].sResource,
3707             &lockFlags);
3708         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
3709 
3710         MOS_ZeroMemory(&data[m_threadTaskBufferOffset], m_threadTaskBufferSize);
3711 
3712         m_osInterface->pfnUnlockResource(
3713             m_osInterface,
3714             &m_encBCombinedBuffer2[curIdx].sResource);
3715     }
3716 
3717     if (m_initEncConstTable)
3718     {
3719         // Initialize the Enc Constant Table surface
3720         MOS_LOCK_PARAMS lockFlags;
3721         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3722         lockFlags.WriteOnly = 1;
3723 
3724         auto data = (uint8_t *)m_osInterface->pfnLockResource(
3725             m_osInterface,
3726             &m_encConstantTableForB.sResource,
3727             &lockFlags);
3728         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
3729 
3730         if (m_isMaxLcu64)
3731         {
3732             MOS_SecureMemcpy(data, m_encConstantTableForB.dwSize, (const void *)m_encLcu64ConstantDataLut, sizeof(m_encLcu64ConstantDataLut));
3733         }
3734         else
3735         {
3736             MOS_SecureMemcpy(data, m_encConstantTableForB.dwSize, (const void *)m_encLcu32ConstantDataLut, sizeof(m_encLcu32ConstantDataLut));
3737         }
3738 
3739         m_osInterface->pfnUnlockResource(
3740             m_osInterface,
3741             &m_encConstantTableForB.sResource);
3742         m_initEncConstTable = false;
3743     }
3744 
3745     // binding table index
3746     MBENC_COMBINED_BTI params;
3747     if (m_isMaxLcu64)
3748     {
3749         for (uint32_t i = 0; i < MAX_MULTI_FRAME_NUMBER; i++)
3750         {
3751             params.BTI_LCU64.Combined1DSurIndexMF1[i]           = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER1;
3752             params.BTI_LCU64.Combined1DSurIndexMF2[i]           = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER2;
3753             params.BTI_LCU64.VMEInterPredictionSurfIndexMF[i]   = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0;
3754             params.BTI_LCU64.SrcSurfIndexMF[i]                  = MBENC_B_FRAME_CURR_Y;
3755             params.BTI_LCU64.SrcReconSurfIndexMF[i]             = MBENC_B_FRAME_CURR_Y_WITH_RECON_BOUNDARY_PIX;
3756             params.BTI_LCU64.CURecordSurfIndexMF[i]             = MBENC_B_FRAME_ENC_CU_RECORD;
3757             params.BTI_LCU64.PAKObjectSurfIndexMF[i]            = MBENC_B_FRAME_PAK_OBJ;
3758             params.BTI_LCU64.CUPacketSurfIndexMF[i]             = MBENC_B_FRAME_PAK_CU_RECORD;
3759             params.BTI_LCU64.SWScoreBoardSurfIndexMF[i]         = MBENC_B_FRAME_SW_SCOREBOARD;
3760             params.BTI_LCU64.QPCU16SurfIndexMF[i]               = MBENC_B_FRAME_CU_QP_DATA;
3761             params.BTI_LCU64.LCULevelDataSurfIndexMF[i]         = MBENC_B_FRAME_LCU_LEVEL_DATA_INPUT;
3762             params.BTI_LCU64.TemporalMVSurfIndexMF[i]           = MBENC_B_FRAME_COLOCATED_CU_MV_DATA;
3763             params.BTI_LCU64.HmeDataSurfIndexMF[i]              = MBENC_B_FRAME_HME_MOTION_PREDICTOR_DATA;
3764             params.BTI_LCU64.VME2XInterPredictionSurfIndexMF[i] = MBENC_B_FRAME_VME_PRED_FOR_2X_DS_CURR;
3765         }
3766         params.BTI_LCU64.DebugSurfIndexMF[0]  = MBENC_B_FRAME_DEBUG_SURFACE;
3767         params.BTI_LCU64.DebugSurfIndexMF[1]  = MBENC_B_FRAME_DEBUG_SURFACE1;
3768         params.BTI_LCU64.DebugSurfIndexMF[2]  = MBENC_B_FRAME_DEBUG_SURFACE2;
3769         params.BTI_LCU64.DebugSurfIndexMF[3]  = MBENC_B_FRAME_DEBUG_SURFACE3;
3770         params.BTI_LCU64.HEVCCnstLutSurfIndex = MBENC_B_FRAME_ENC_CONST_TABLE;
3771         params.BTI_LCU64.LoadBalenceSurfIndex = MBENC_B_FRAME_CONCURRENT_TG_DATA;
3772     }
3773     else
3774     {
3775         for (uint32_t i = 0; i < MAX_MULTI_FRAME_NUMBER; i++)
3776         {
3777             params.BTI_LCU32.Combined1DSurIndexMF1[i]         = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER1;
3778             params.BTI_LCU32.Combined1DSurIndexMF2[i]         = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER2;
3779             params.BTI_LCU32.VMEInterPredictionSurfIndexMF[i] = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0;
3780             params.BTI_LCU32.SrcSurfIndexMF[i]                = MBENC_B_FRAME_CURR_Y;
3781             params.BTI_LCU32.SrcReconSurfIndexMF[i]           = MBENC_B_FRAME_CURR_Y_WITH_RECON_BOUNDARY_PIX;
3782             params.BTI_LCU32.CURecordSurfIndexMF[i]           = MBENC_B_FRAME_ENC_CU_RECORD;
3783             params.BTI_LCU32.PAKObjectSurfIndexMF[i]          = MBENC_B_FRAME_PAK_OBJ;
3784             params.BTI_LCU32.CUPacketSurfIndexMF[i]           = MBENC_B_FRAME_PAK_CU_RECORD;
3785             params.BTI_LCU32.SWScoreBoardSurfIndexMF[i]       = MBENC_B_FRAME_SW_SCOREBOARD;
3786             params.BTI_LCU32.QPCU16SurfIndexMF[i]             = MBENC_B_FRAME_CU_QP_DATA;
3787             params.BTI_LCU32.LCULevelDataSurfIndexMF[i]       = MBENC_B_FRAME_LCU_LEVEL_DATA_INPUT;
3788             params.BTI_LCU32.TemporalMVSurfIndexMF[i]         = MBENC_B_FRAME_COLOCATED_CU_MV_DATA;
3789             params.BTI_LCU32.HmeDataSurfIndexMF[i]            = MBENC_B_FRAME_HME_MOTION_PREDICTOR_DATA;
3790         }
3791         params.BTI_LCU32.DebugSurfIndexMF[0]  = MBENC_B_FRAME_DEBUG_SURFACE;
3792         params.BTI_LCU32.DebugSurfIndexMF[1]  = MBENC_B_FRAME_DEBUG_SURFACE1;
3793         params.BTI_LCU32.DebugSurfIndexMF[2]  = MBENC_B_FRAME_DEBUG_SURFACE2;
3794         params.BTI_LCU32.DebugSurfIndexMF[3]  = MBENC_B_FRAME_DEBUG_SURFACE3;
3795         params.BTI_LCU32.HEVCCnstLutSurfIndex = MBENC_B_FRAME_ENC_CONST_TABLE;
3796         params.BTI_LCU32.LoadBalenceSurfIndex = MBENC_B_FRAME_CONCURRENT_TG_DATA;
3797     }
3798 
3799     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
3800     PMHW_KERNEL_STATE kernelState = m_isMaxLcu64 ? &m_mbEncKernelStates[MBENC_LCU64_KRNIDX] : &m_mbEncKernelStates[MBENC_LCU32_KRNIDX];
3801     CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
3802         &params,
3803         kernelState->dwCurbeOffset,
3804         sizeof(params)));
3805 
3806     return eStatus;
3807 }
3808 
3809 // ------------------------------------------------------------------------------
3810 //| Purpose:    Setup curbe for HEVC BrcInitReset Kernel
3811 //| Return:     N/A
3812 //------------------------------------------------------------------------------
SetCurbeBrcInitReset(CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx)3813 MOS_STATUS CodechalEncHevcStateG12::SetCurbeBrcInitReset(
3814     CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx)
3815 {
3816     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3817 
3818     CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
3819 
3820     if (brcKrnIdx != CODECHAL_HEVC_BRC_INIT && brcKrnIdx != CODECHAL_HEVC_BRC_RESET)
3821     {
3822         CODECHAL_ENCODE_ASSERTMESSAGE("Brc kernel requested is not init or reset\n");
3823         return MOS_STATUS_INVALID_PARAMETER;
3824     }
3825 
3826     // Initialize the CURBE data
3827     BRC_INITRESET_CURBE curbe = m_brcInitResetCurbeInit;
3828 
3829     uint32_t profileLevelMaxFrame = GetProfileLevelMaxFrameSize();
3830 
3831     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR ||
3832         m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR ||
3833         m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
3834     {
3835         if (m_hevcSeqParams->InitVBVBufferFullnessInBit == 0)
3836         {
3837             CODECHAL_ENCODE_ASSERTMESSAGE("Initial VBV Buffer Fullness is zero\n");
3838             return MOS_STATUS_INVALID_PARAMETER;
3839         }
3840 
3841         if (m_hevcSeqParams->VBVBufferSizeInBit == 0)
3842         {
3843             CODECHAL_ENCODE_ASSERTMESSAGE("VBV buffer size in bits is zero\n");
3844             return MOS_STATUS_INVALID_PARAMETER;
3845         }
3846     }
3847 
3848     curbe.DW0_ProfileLevelMaxFrame = profileLevelMaxFrame;
3849     curbe.DW1_InitBufFull          = m_hevcSeqParams->InitVBVBufferFullnessInBit;
3850     curbe.DW2_BufSize              = m_hevcSeqParams->VBVBufferSizeInBit;
3851     curbe.DW3_TargetBitRate        = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;  //DDI in Kbits
3852     curbe.DW4_MaximumBitRate       = m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS;
3853     curbe.DW5_MinimumBitRate       = 0;
3854     curbe.DW6_FrameRateM           = m_hevcSeqParams->FrameRate.Numerator;
3855     curbe.DW7_FrameRateD           = m_hevcSeqParams->FrameRate.Denominator;
3856     curbe.DW8_BRCFlag              = BRCINIT_IGNORE_PICTURE_HEADER_SIZE;  // always ignore the picture header size set in BRC Update curbe;
3857 
3858     if (m_hevcPicParams->NumROI)
3859     {
3860         curbe.DW8_BRCFlag |= BRCINIT_DISABLE_MBBRC;  // BRC ROI need disable MBBRC logic in LcuBrc Kernel
3861     }
3862     else
3863     {
3864         curbe.DW8_BRCFlag |= (m_lcuBrcEnabled) ? 0 : BRCINIT_DISABLE_MBBRC;
3865     }
3866 
3867     curbe.DW8_BRCFlag |= (m_brcEnabled && m_numPipe > 1) ? BRCINIT_USEHUCBRC : 0;
3868     // For non-ICQ, ACQP Buffer always set to 1
3869     curbe.DW25_ACQPBuffer        = 1;
3870     curbe.DW25_SlidingWindowSize = m_slidingWindowSize;
3871 
3872     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR)
3873     {
3874         curbe.DW4_MaximumBitRate = curbe.DW3_TargetBitRate;
3875         curbe.DW8_BRCFlag |= BRCINIT_ISCBR;
3876     }
3877     else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR)
3878     {
3879         if (curbe.DW4_MaximumBitRate < curbe.DW3_TargetBitRate)
3880         {
3881             curbe.DW4_MaximumBitRate = 2 * curbe.DW3_TargetBitRate;
3882         }
3883         curbe.DW8_BRCFlag |= BRCINIT_ISVBR;
3884     }
3885     else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
3886     {
3887         curbe.DW8_BRCFlag |= BRCINIT_ISAVBR;
3888         // For AVBR, max bitrate = target bitrate,
3889         curbe.DW3_TargetBitRate  = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;  //DDI in Kbits
3890         curbe.DW4_MaximumBitRate = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
3891     }
3892     else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_ICQ)
3893     {
3894         curbe.DW8_BRCFlag |= BRCINIT_ISICQ;
3895         curbe.DW25_ACQPBuffer = m_hevcSeqParams->ICQQualityFactor;
3896     }
3897     else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_VCM)
3898     {
3899         curbe.DW4_MaximumBitRate = curbe.DW3_TargetBitRate;
3900         curbe.DW8_BRCFlag |= BRCINIT_ISVCM;
3901     }
3902     else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
3903     {
3904         curbe.DW8_BRCFlag = BRCINIT_ISCQP;
3905     }
3906     else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_QVBR)
3907     {
3908         if (curbe.DW4_MaximumBitRate < curbe.DW3_TargetBitRate)
3909         {
3910             curbe.DW4_MaximumBitRate = curbe.DW3_TargetBitRate;  // Use max bit rate for HRD compliance
3911         }
3912         curbe.DW8_BRCFlag = curbe.DW8_BRCFlag | BRCINIT_ISQVBR | BRCINIT_ISVBR;  // We need to make sure that VBR is used for QP determination.
3913         // use ICQQualityFactor to determine the larger Qp for each MB
3914         curbe.DW25_ACQPBuffer = m_hevcSeqParams->ICQQualityFactor;
3915     }
3916     curbe.DW9_FrameWidth       = m_oriFrameWidth;
3917     curbe.DW10_FrameHeight     = m_oriFrameHeight;
3918     curbe.DW10_AVBRAccuracy    = m_usAvbrAccuracy;
3919     curbe.DW11_AVBRConvergence = m_usAvbrConvergence;
3920     curbe.DW12_NumberSlice     = m_numSlices;
3921 
3922     /**********************************************************************
3923     In case of non-HB/BPyramid Structure
3924     BRC_Param_A = GopP
3925     BRC_Param_B = GopB
3926     In case of HB/BPyramid GOP Structure
3927     BRC_Param_A, BRC_Param_B, BRC_Param_C, BRC_Param_D are
3928     BRC Parameters set as follows as per CModel equation
3929     ***********************************************************************/
3930     // BPyramid GOP
3931     if (m_HierchGopBRCEnabled)
3932     {
3933         curbe.DW8_BRCGopP   = ((m_hevcSeqParams->GopPicSize + m_hevcSeqParams->GopRefDist - 1) / m_hevcSeqParams->GopRefDist);
3934         curbe.DW9_BRCGopB   = curbe.DW8_BRCGopP;
3935         curbe.DW13_BRCGopB1 = curbe.DW8_BRCGopP * 2;
3936         curbe.DW14_BRCGopB2 = ((m_hevcSeqParams->GopPicSize) - (curbe.DW8_BRCGopP) - (curbe.DW13_BRCGopB1) - (curbe.DW9_BRCGopB));
3937         // B1 Level GOP
3938         if (m_hevcSeqParams->GopRefDist <= 4 || curbe.DW14_BRCGopB2 == 0)
3939         {
3940             curbe.DW14_MaxBRCLevel = 3;
3941         }
3942         // B2 Level GOP
3943         else
3944         {
3945             curbe.DW14_MaxBRCLevel = 4;
3946         }
3947     }
3948     // For Regular GOP - No BPyramid
3949     else
3950     {
3951         curbe.DW14_MaxBRCLevel = 1;
3952         curbe.DW8_BRCGopP      = (m_hevcSeqParams->GopRefDist) ? ((m_hevcSeqParams->GopPicSize - 1) / m_hevcSeqParams->GopRefDist) : 0;
3953         curbe.DW9_BRCGopB      = m_hevcSeqParams->GopPicSize - 1 - curbe.DW8_BRCGopP;
3954     }
3955 
3956     // Set dynamic thresholds
3957     double inputBitsPerFrame = (double)((double)curbe.DW4_MaximumBitRate * (double)curbe.DW7_FrameRateD);
3958     inputBitsPerFrame        = (double)(inputBitsPerFrame / curbe.DW6_FrameRateM);
3959 
3960     if (curbe.DW2_BufSize < (uint32_t)inputBitsPerFrame * 4)
3961     {
3962         curbe.DW2_BufSize = (uint32_t)inputBitsPerFrame * 4;
3963     }
3964 
3965     if (curbe.DW1_InitBufFull == 0)
3966     {
3967         curbe.DW1_InitBufFull = 7 * curbe.DW2_BufSize / 8;
3968     }
3969     if (curbe.DW1_InitBufFull < (uint32_t)(inputBitsPerFrame * 2))
3970     {
3971         curbe.DW1_InitBufFull = (uint32_t)(inputBitsPerFrame * 2);
3972     }
3973     if (curbe.DW1_InitBufFull > curbe.DW2_BufSize)
3974     {
3975         curbe.DW1_InitBufFull = curbe.DW2_BufSize;
3976     }
3977 
3978     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
3979     {
3980         // For AVBR, Buffer size =  2*Bitrate, InitVBV = 0.75 * BufferSize
3981         curbe.DW2_BufSize     = 2 * m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
3982         curbe.DW1_InitBufFull = (uint32_t)(0.75 * curbe.DW2_BufSize);
3983     }
3984 
3985     if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)
3986     {
3987         curbe.DW15_LongTermInterval = 0;  // no LTR for low delay brc
3988     }
3989     else
3990     {
3991         curbe.DW15_LongTermInterval = (m_enableBrcLTR && m_ltrInterval) ? m_ltrInterval : m_enableBrcLTR ? HEVC_BRC_LONG_TERM_REFRENCE_FLAG : 0;
3992     }
3993 
3994     double bpsRatio = ((double)inputBitsPerFrame / ((double)(curbe.DW2_BufSize) / 30));
3995     bpsRatio        = (bpsRatio < 0.1) ? 0.1 : (bpsRatio > 3.5) ? 3.5 : bpsRatio;
3996 
3997     curbe.DW19_DeviationThreshold0_PBframe = (uint32_t)(-50 * pow(0.90, bpsRatio));
3998     curbe.DW19_DeviationThreshold1_PBframe = (uint32_t)(-50 * pow(0.66, bpsRatio));
3999     curbe.DW19_DeviationThreshold2_PBframe = (uint32_t)(-50 * pow(0.46, bpsRatio));
4000     curbe.DW19_DeviationThreshold3_PBframe = (uint32_t)(-50 * pow(0.3, bpsRatio));
4001 
4002     curbe.DW20_DeviationThreshold4_PBframe = (uint32_t)(50 * pow(0.3, bpsRatio));
4003     curbe.DW20_DeviationThreshold5_PBframe = (uint32_t)(50 * pow(0.46, bpsRatio));
4004     curbe.DW20_DeviationThreshold6_PBframe = (uint32_t)(50 * pow(0.7, bpsRatio));
4005     curbe.DW20_DeviationThreshold7_PBframe = (uint32_t)(50 * pow(0.9, bpsRatio));
4006 
4007     curbe.DW21_DeviationThreshold0_VBRcontrol = (uint32_t)(-50 * pow(0.9, bpsRatio));
4008     curbe.DW21_DeviationThreshold1_VBRcontrol = (uint32_t)(-50 * pow(0.7, bpsRatio));
4009     curbe.DW21_DeviationThreshold2_VBRcontrol = (uint32_t)(-50 * pow(0.5, bpsRatio));
4010     curbe.DW21_DeviationThreshold3_VBRcontrol = (uint32_t)(-50 * pow(0.3, bpsRatio));
4011 
4012     curbe.DW22_DeviationThreshold4_VBRcontrol = (uint32_t)(100 * pow(0.4, bpsRatio));
4013     curbe.DW22_DeviationThreshold5_VBRcontrol = (uint32_t)(100 * pow(0.5, bpsRatio));
4014     curbe.DW22_DeviationThreshold6_VBRcontrol = (uint32_t)(100 * pow(0.75, bpsRatio));
4015     curbe.DW22_DeviationThreshold7_VBRcontrol = (uint32_t)(100 * pow(0.9, bpsRatio));
4016 
4017     curbe.DW23_DeviationThreshold0_Iframe = (uint32_t)(-50 * pow(0.8, bpsRatio));
4018     curbe.DW23_DeviationThreshold1_Iframe = (uint32_t)(-50 * pow(0.6, bpsRatio));
4019     curbe.DW23_DeviationThreshold2_Iframe = (uint32_t)(-50 * pow(0.34, bpsRatio));
4020     curbe.DW23_DeviationThreshold3_Iframe = (uint32_t)(-50 * pow(0.2, bpsRatio));
4021 
4022     curbe.DW24_DeviationThreshold4_Iframe = (uint32_t)(50 * pow(0.2, bpsRatio));
4023     curbe.DW24_DeviationThreshold5_Iframe = (uint32_t)(50 * pow(0.4, bpsRatio));
4024     curbe.DW24_DeviationThreshold6_Iframe = (uint32_t)(50 * pow(0.66, bpsRatio));
4025     curbe.DW24_DeviationThreshold7_Iframe = (uint32_t)(50 * pow(0.9, bpsRatio));
4026 
4027     if (m_hevcSeqParams->HierarchicalFlag && !m_hevcSeqParams->LowDelayMode &&
4028         (m_hevcSeqParams->GopRefDist == 4 || m_hevcSeqParams->GopRefDist == 8))
4029     {
4030         curbe.DW26_RandomAccess = true;
4031     }
4032     else
4033     {
4034         curbe.DW26_RandomAccess = false;
4035     }
4036 
4037     if (m_brcInit)
4038     {
4039         m_dBrcInitCurrentTargetBufFullInBits = curbe.DW1_InitBufFull;
4040     }
4041 
4042     m_brcInitResetBufSizeInBits      = curbe.DW2_BufSize;
4043     m_dBrcInitResetInputBitsPerFrame = inputBitsPerFrame;
4044 
4045     PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
4046     CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
4047         &curbe,
4048         kernelState->dwCurbeOffset,
4049         sizeof(curbe)));
4050 
4051     return eStatus;
4052 }
4053 
4054 // ------------------------------------------------------------------------------
4055 //| Purpose:    Setup curbe for HEVC BrcUpdate Kernel
4056 //| Return:     N/A
4057 //------------------------------------------------------------------------------
SetCurbeBrcUpdate(CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx)4058 MOS_STATUS CodechalEncHevcStateG12::SetCurbeBrcUpdate(
4059     CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx)
4060 {
4061     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4062 
4063     if (brcKrnIdx != CODECHAL_HEVC_BRC_FRAME_UPDATE && brcKrnIdx != CODECHAL_HEVC_BRC_LCU_UPDATE)
4064     {
4065         CODECHAL_ENCODE_ASSERTMESSAGE("Brc kernel requested is not frame update or LCU update\n");
4066         return MOS_STATUS_INVALID_PARAMETER;
4067     }
4068 
4069     CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
4070 
4071     // Initialize the CURBE data
4072     BRCUPDATE_CURBE curbe = m_brcUpdateCurbeInit;
4073 
4074     curbe.DW5_TargetSize_Flag = 0;
4075 
4076     if (m_dBrcInitCurrentTargetBufFullInBits > (double)m_brcInitResetBufSizeInBits)
4077     {
4078         m_dBrcInitCurrentTargetBufFullInBits -= (double)m_brcInitResetBufSizeInBits;
4079         curbe.DW5_TargetSize_Flag = 1;
4080     }
4081 
4082     if (m_numSkipFrames)
4083     {
4084         // pass num/size of skipped frames to update BRC
4085         curbe.DW6_NumSkippedFrames     = m_numSkipFrames;
4086         curbe.DW15_SizeOfSkippedFrames = m_sizeSkipFrames;
4087 
4088         // account for skipped frame in calculating CurrentTargetBufFullInBits
4089         m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame * m_numSkipFrames;
4090     }
4091 
4092     curbe.DW0_TargetSize  = (uint32_t)(m_dBrcInitCurrentTargetBufFullInBits);
4093     curbe.DW1_FrameNumber = m_storeData - 1;  // Check if we can remove this (set to 0)
4094 
4095     // BRC PAK statistic buffer from last frame, the encoded size includes header already.
4096     // in BRC Initreset kernel, curbe DW8_BRCFlag will always ignore picture header size, so no need to set picture header size here.
4097     curbe.DW2_PictureHeaderSize = 0;
4098     curbe.DW5_CurrFrameBrcLevel = m_currFrameBrcLevel;
4099     curbe.DW5_MaxNumPAKs        = m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses();
4100 
4101     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
4102     {
4103         curbe.DW6_CqpValue = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
4104     }
4105     if (m_hevcPicParams->NumROI)
4106     {
4107         curbe.DW6_ROIEnable    = m_brcEnabled ? false : true;
4108         curbe.DW6_BRCROIEnable = m_brcEnabled ? true : false;
4109         curbe.DW6_RoiRatio     = CalculateROIRatio();
4110     }
4111     curbe.DW6_SlidingWindowEnable = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_LOW);
4112 
4113     //for low delay brc
4114     curbe.DW6_LowDelayEnable    = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW);
4115     curbe.DW16_UserMaxFrameSize = GetProfileLevelMaxFrameSize();
4116     curbe.DW14_ParallelMode     = m_hevcSeqParams->ParallelBRC;
4117 
4118     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
4119     {
4120         curbe.DW3_StartGAdjFrame0 = (uint32_t)((10 * m_usAvbrConvergence) / (double)150);
4121         curbe.DW3_StartGAdjFrame1 = (uint32_t)((50 * m_usAvbrConvergence) / (double)150);
4122         curbe.DW4_StartGAdjFrame2 = (uint32_t)((100 * m_usAvbrConvergence) / (double)150);
4123         curbe.DW4_StartGAdjFrame3 = (uint32_t)((150 * m_usAvbrConvergence) / (double)150);
4124 
4125         curbe.DW11_gRateRatioThreshold0 =
4126             (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 40)));
4127         curbe.DW11_gRateRatioThreshold1 =
4128             (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 75)));
4129         curbe.DW12_gRateRatioThreshold2 = (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 97)));
4130         curbe.DW12_gRateRatioThreshold3 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (103 - 100)));
4131         curbe.DW12_gRateRatioThreshold4 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (125 - 100)));
4132         curbe.DW12_gRateRatioThreshold5 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (160 - 100)));
4133     }
4134 
4135     if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)
4136     {
4137         curbe.DW17_LongTerm_Current = 0;  // no LTR for low delay brc
4138     }
4139     else
4140     {
4141         m_isFrameLTR                = (CodecHal_PictureIsLongTermRef(m_currReconstructedPic));
4142         curbe.DW17_LongTerm_Current = (m_enableBrcLTR && m_isFrameLTR) ? 1 : 0;
4143     }
4144 
4145     PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
4146     CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
4147         &curbe,
4148         kernelState->dwCurbeOffset,
4149         sizeof(curbe)));
4150 
4151     return eStatus;
4152 }
4153 
SendMbEncSurfacesIKernel(PMOS_COMMAND_BUFFER cmdBuffer)4154 MOS_STATUS CodechalEncHevcStateG12::SendMbEncSurfacesIKernel(
4155     PMOS_COMMAND_BUFFER cmdBuffer)
4156 {
4157     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4158 
4159     uint32_t                               startBTI = 0, mbenc_I_KRNIDX = MBENC_LCU32_KRNIDX;
4160     CODECHAL_SURFACE_CODEC_PARAMS          surfaceCodecParams;
4161     PMOS_SURFACE                           inputSurface = m_rawSurfaceToEnc;
4162     PMHW_KERNEL_STATE                      kernelState  = &m_mbEncKernelStates[mbenc_I_KRNIDX];
4163     PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_mbEncKernelBindingTable[mbenc_I_KRNIDX];
4164 
4165     // Combined 1D buffer 1, which contains regular kernel curbe and concurrent map
4166     startBTI = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER1;
4167     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4168         &surfaceCodecParams,
4169         &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource,
4170         m_encBCombinedBuffer1[m_currRecycledBufIdx].dwSize,
4171         0,
4172         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4173         bindingTable->dwBindingTableEntries[startBTI++],
4174         false));
4175 
4176     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4177         m_hwInterface,
4178         cmdBuffer,
4179         &surfaceCodecParams,
4180         kernelState));
4181 
4182     CODECHAL_DEBUG_TOOL(
4183         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4184             &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource,
4185             CodechalDbgAttr::attrOutput,
4186             "Hevc_CombinedBuffer1",
4187             m_encBCombinedBuffer1[m_currRecycledBufIdx].dwSize,
4188             0,
4189             CODECHAL_MEDIA_STATE_HEVC_I_MBENC)););
4190 
4191     // VME surfaces
4192     startBTI = 0;
4193     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4194         &surfaceCodecParams,
4195         inputSurface,
4196         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4197         bindingTable->dwBindingTableEntries[startBTI++]));
4198 
4199     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4200         m_hwInterface,
4201         cmdBuffer,
4202         &surfaceCodecParams,
4203         kernelState));
4204 
4205     // Programming dummy surfaces even if not used (VME requirement), currently setting to input surface
4206     for (int32_t surface_idx = 0; surface_idx < 8; surface_idx++)
4207     {
4208         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4209             &surfaceCodecParams,
4210             inputSurface,
4211             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4212             bindingTable->dwBindingTableEntries[startBTI++]));
4213 
4214         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4215             m_hwInterface,
4216             cmdBuffer,
4217             &surfaceCodecParams,
4218             kernelState));
4219     }
4220 
4221     //Source Y and UV
4222     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4223         &surfaceCodecParams,
4224         inputSurface,
4225         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4226         bindingTable->dwBindingTableEntries[startBTI++],
4227         m_verticalLineStride,
4228         false));
4229 
4230     surfaceCodecParams.bUseUVPlane = true;
4231 
4232     surfaceCodecParams.dwUVBindingTableOffset = bindingTable->dwBindingTableEntries[startBTI++];
4233     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4234         m_hwInterface,
4235         cmdBuffer,
4236         &surfaceCodecParams,
4237         kernelState));
4238 
4239     CODECHAL_DEBUG_TOOL(
4240         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4241             inputSurface,
4242             CodechalDbgAttr::attrEncodeRawInputSurface,
4243             "MbEnc_Input_SrcSurf")));
4244     // Current Y with reconstructed boundary pixels
4245     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4246         &surfaceCodecParams,
4247         &m_currPicWithReconBoundaryPix,
4248         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4249         bindingTable->dwBindingTableEntries[startBTI++],
4250         m_verticalLineStride,
4251         true));
4252 
4253     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4254         m_hwInterface,
4255         cmdBuffer,
4256         &surfaceCodecParams,
4257         kernelState));
4258 
4259     // Enc CU Record
4260     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4261         &surfaceCodecParams,
4262         &m_intermediateCuRecordSurfaceLcu32,
4263         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4264         bindingTable->dwBindingTableEntries[startBTI++],
4265         m_verticalLineStride,
4266         true));
4267 
4268     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4269         m_hwInterface,
4270         cmdBuffer,
4271         &surfaceCodecParams,
4272         kernelState));
4273 
4274     // PAK object command surface
4275     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4276         &surfaceCodecParams,
4277         &m_resMbCodeSurface,
4278         m_mvOffset,
4279         0,
4280         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4281         bindingTable->dwBindingTableEntries[startBTI++],
4282         true));
4283 
4284     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4285         m_hwInterface,
4286         cmdBuffer,
4287         &surfaceCodecParams,
4288         kernelState));
4289 
4290     // CU packet for PAK surface
4291     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4292         &surfaceCodecParams,
4293         &m_resMbCodeSurface,
4294         m_mbCodeSize - m_mvOffset,
4295         m_mvOffset,
4296         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4297         bindingTable->dwBindingTableEntries[startBTI++],
4298         true));
4299 
4300     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4301         m_hwInterface,
4302         cmdBuffer,
4303         &surfaceCodecParams,
4304         kernelState));
4305 
4306     //Software scoreboard surface
4307     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4308         &surfaceCodecParams,
4309         m_swScoreboardState->GetCurSwScoreboardSurface(),
4310         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4311         bindingTable->dwBindingTableEntries[startBTI++],
4312         m_verticalLineStride,
4313         true));
4314 
4315     surfaceCodecParams.bUse32UINTSurfaceFormat = true;
4316     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4317         m_hwInterface,
4318         cmdBuffer,
4319         &surfaceCodecParams,
4320         kernelState));
4321 
4322     // Scratch surface for Internal Use Only
4323     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4324         &surfaceCodecParams,
4325         &m_scratchSurface,
4326         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4327         bindingTable->dwBindingTableEntries[startBTI++],
4328         m_verticalLineStride,
4329         true));
4330 
4331     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4332         m_hwInterface,
4333         cmdBuffer,
4334         &surfaceCodecParams,
4335         kernelState));
4336 
4337     // CU 16x16 QP data input surface
4338     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4339         &surfaceCodecParams,
4340         &m_brcBuffers.sBrcMbQpBuffer,
4341         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4342         bindingTable->dwBindingTableEntries[startBTI++],
4343         m_verticalLineStride,
4344         false));
4345 
4346     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4347         m_hwInterface,
4348         cmdBuffer,
4349         &surfaceCodecParams,
4350         kernelState));
4351 
4352     // Lcu level data input
4353     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4354         &surfaceCodecParams,
4355         &m_lcuLevelInputDataSurface[m_currRecycledBufIdx],
4356         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4357         bindingTable->dwBindingTableEntries[startBTI++],
4358         m_verticalLineStride,
4359         false));
4360 
4361     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4362         m_hwInterface,
4363         cmdBuffer,
4364         &surfaceCodecParams,
4365         kernelState));
4366 
4367     // Enc I Constant Table surface // CostLUT Buf
4368     startBTI = MBENC_I_FRAME_ENC_CONST_TABLE;
4369     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4370         &surfaceCodecParams,
4371         &m_encConstantTableForB.sResource,
4372         m_encConstantTableForB.dwSize,
4373         0,
4374         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4375         bindingTable->dwBindingTableEntries[startBTI++],
4376         false));
4377 
4378     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4379         m_hwInterface,
4380         cmdBuffer,
4381         &surfaceCodecParams,
4382         kernelState));
4383 
4384 #if 0
4385     // Concurrent Thread Group Data surface
4386     startBTI = MBENC_I_FRAME_CONCURRENT_TG_DATA;
4387     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4388         &surfaceCodecParams,
4389         &resConcurrentThreadGroupData.sResource,
4390         resConcurrentThreadGroupData.dwSize,
4391         0,
4392         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4393         bindingTable->dwBindingTableEntries[startBTI++],
4394         false));
4395 
4396     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4397         m_hwInterface,
4398         cmdBuffer,
4399         &surfaceCodecParams,
4400         kernelState));
4401 #endif
4402 
4403     // Brc Combined Enc parameter surface
4404     startBTI = MBENC_I_FRAME_BRC_COMBINED_ENC_PARAMETER_SURFACE;
4405     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4406         &surfaceCodecParams,
4407         &m_brcInputForEncKernelBuffer->sResource,
4408         HEVC_FRAMEBRC_BUF_CONST_SIZE,
4409         0,
4410         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4411         bindingTable->dwBindingTableEntries[startBTI++],
4412         false));
4413 
4414     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4415         m_hwInterface,
4416         cmdBuffer,
4417         &surfaceCodecParams,
4418         kernelState));
4419 
4420     // Kernel debug surface
4421     startBTI = MBENC_I_FRAME_DEBUG_DUMP;
4422     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4423         &surfaceCodecParams,
4424         &m_debugSurface[0].sResource,
4425         m_debugSurface[0].dwSize,
4426         0,
4427         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4428         bindingTable->dwBindingTableEntries[startBTI++],
4429         false));
4430 
4431     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4432         m_hwInterface,
4433         cmdBuffer,
4434         &surfaceCodecParams,
4435         kernelState));
4436 
4437     return eStatus;
4438 }
4439 
SendMbEncSurfacesBKernel(PMOS_COMMAND_BUFFER cmdBuffer)4440 MOS_STATUS CodechalEncHevcStateG12::SendMbEncSurfacesBKernel(
4441     PMOS_COMMAND_BUFFER cmdBuffer)
4442 {
4443     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4444 
4445     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
4446     PMHW_KERNEL_STATE kernelState = m_isMaxLcu64 ? &m_mbEncKernelStates[MBENC_LCU64_KRNIDX] : &m_mbEncKernelStates[MBENC_LCU32_KRNIDX];
4447 
4448     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelBindingTable);
4449     PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = m_isMaxLcu64 ? &m_mbEncKernelBindingTable[MBENC_LCU64_KRNIDX] : &m_mbEncKernelBindingTable[MBENC_LCU32_KRNIDX];
4450 
4451     PMOS_SURFACE                  inputSurface = m_rawSurfaceToEnc;
4452     uint32_t                      startBTI     = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0;
4453     CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
4454 
4455     // Combined 1D buffer 1, which contains regular kernel curbe and concurrent map
4456     startBTI = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER1;
4457     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4458         &surfaceCodecParams,
4459         &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource,
4460         m_encBCombinedBuffer1[m_currRecycledBufIdx].dwSize,
4461         0,
4462         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4463         bindingTable->dwBindingTableEntries[startBTI++],
4464         false));
4465 
4466     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4467         m_hwInterface,
4468         cmdBuffer,
4469         &surfaceCodecParams,
4470         kernelState));
4471 
4472     CODECHAL_DEBUG_TOOL(
4473         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4474             &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource,
4475             CodechalDbgAttr::attrOutput,
4476             "Hevc_CombinedBuffer1",
4477             m_encBCombinedBuffer1[m_currRecycledBufIdx].dwSize,
4478             0,
4479             CODECHAL_MEDIA_STATE_HEVC_B_MBENC)););
4480     // Combined 1D buffer 2, which contains non fixed sizes of buffers
4481     startBTI = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER2;
4482     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4483         &surfaceCodecParams,
4484         &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
4485         m_encBCombinedBuffer2[m_currRecycledBufIdx].dwSize,
4486         0,
4487         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4488         bindingTable->dwBindingTableEntries[startBTI++],
4489         false));
4490     surfaceCodecParams.bRawSurface = true;
4491 
4492     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4493         m_hwInterface,
4494         cmdBuffer,
4495         &surfaceCodecParams,
4496         kernelState));
4497 
4498     CODECHAL_DEBUG_TOOL(
4499         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4500             &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
4501             CodechalDbgAttr::attrOutput,
4502             "Hevc_CombinedBuffer2",
4503             m_encBCombinedBuffer2[m_currRecycledBufIdx].dwSize,
4504             0,
4505             CODECHAL_MEDIA_STATE_HEVC_B_MBENC)););
4506     // VME surfaces
4507     startBTI = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0;
4508     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4509         &surfaceCodecParams,
4510         inputSurface,
4511         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4512         bindingTable->dwBindingTableEntries[startBTI++]));
4513 
4514     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4515         m_hwInterface,
4516         cmdBuffer,
4517         &surfaceCodecParams,
4518         kernelState));
4519 
4520     for (int32_t surface_idx = 0; surface_idx < 4; surface_idx++)
4521     {
4522         int32_t       ll     = 0;
4523         CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
4524         if (!CodecHal_PictureIsInvalid(refPic) &&
4525             !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
4526         {
4527             int32_t      idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
4528             PMOS_SURFACE refSurfacePtr;
4529             if (surface_idx == 0 && m_useWeightedSurfaceForL0)
4530             {
4531                 refSurfacePtr = m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L0_START + surface_idx);
4532             }
4533             else
4534             {
4535                 refSurfacePtr = &m_refList[idx]->sRefBuffer;
4536             }
4537 
4538             // Picture Y VME
4539             CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4540                 &surfaceCodecParams,
4541                 refSurfacePtr,
4542                 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4543                 bindingTable->dwBindingTableEntries[startBTI++]));
4544 
4545             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4546                 m_hwInterface,
4547                 cmdBuffer,
4548                 &surfaceCodecParams,
4549                 kernelState));
4550 
4551             CODECHAL_DEBUG_TOOL(
4552                 m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
4553                 std::string refSurfName      = "RefSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex));
4554                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4555                     &m_refList[idx]->sRefBuffer,
4556                     CodechalDbgAttr::attrReferenceSurfaces,
4557                     refSurfName.data())));
4558         }
4559         else
4560         {
4561             // Providing Dummy surface as per VME requirement.
4562             CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4563                 &surfaceCodecParams,
4564                 inputSurface,
4565                 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4566                 bindingTable->dwBindingTableEntries[startBTI++]));
4567 
4568             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4569                 m_hwInterface,
4570                 cmdBuffer,
4571                 &surfaceCodecParams,
4572                 kernelState));
4573         }
4574 
4575         ll     = 1;
4576         refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
4577         if (!CodecHal_PictureIsInvalid(refPic) &&
4578             !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
4579         {
4580             int32_t      idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
4581             PMOS_SURFACE refSurfacePtr;
4582             if (surface_idx == 0 && m_useWeightedSurfaceForL1)
4583             {
4584                 refSurfacePtr = m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L1_START + surface_idx);
4585             }
4586             else
4587             {
4588                 refSurfacePtr = &m_refList[idx]->sRefBuffer;
4589             }
4590 
4591             // Picture Y VME
4592             CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4593                 &surfaceCodecParams,
4594                 refSurfacePtr,
4595                 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4596                 bindingTable->dwBindingTableEntries[startBTI++]));
4597 
4598             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4599                 m_hwInterface,
4600                 cmdBuffer,
4601                 &surfaceCodecParams,
4602                 kernelState));
4603 
4604             CODECHAL_DEBUG_TOOL(
4605                 m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
4606                 std::string refSurfName      = "RefSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex));
4607                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4608                     &m_refList[idx]->sRefBuffer,
4609                     CodechalDbgAttr::attrReferenceSurfaces,
4610                     refSurfName.data())));
4611         }
4612         else
4613         {
4614             // Providing Dummy surface as per VME requirement.
4615             CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4616                 &surfaceCodecParams,
4617                 inputSurface,
4618                 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4619                 bindingTable->dwBindingTableEntries[startBTI++]));
4620 
4621             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4622                 m_hwInterface,
4623                 cmdBuffer,
4624                 &surfaceCodecParams,
4625                 kernelState));
4626         }
4627     }
4628 
4629     //Source Y and UV
4630     startBTI = MBENC_B_FRAME_CURR_Y;
4631     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4632         &surfaceCodecParams,
4633         inputSurface,
4634         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4635         bindingTable->dwBindingTableEntries[startBTI++],
4636         m_verticalLineStride,
4637         false));
4638 
4639     surfaceCodecParams.bUseUVPlane = true;
4640 
4641     surfaceCodecParams.dwUVBindingTableOffset = bindingTable->dwBindingTableEntries[startBTI];
4642     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4643         m_hwInterface,
4644         cmdBuffer,
4645         &surfaceCodecParams,
4646         kernelState));
4647 
4648     CODECHAL_DEBUG_TOOL(
4649         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4650             inputSurface,
4651             CodechalDbgAttr::attrEncodeRawInputSurface,
4652             "MbEnc_Input_SrcSurf")));
4653 
4654     // Current Y with reconstructed boundary pixels
4655     startBTI = MBENC_B_FRAME_CURR_Y_WITH_RECON_BOUNDARY_PIX;
4656     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4657         &surfaceCodecParams,
4658         &m_currPicWithReconBoundaryPix,
4659         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4660         bindingTable->dwBindingTableEntries[startBTI],
4661         m_verticalLineStride,
4662         true));
4663 
4664     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4665         m_hwInterface,
4666         cmdBuffer,
4667         &surfaceCodecParams,
4668         kernelState));
4669 
4670     // Enc CU Record
4671     startBTI = MBENC_B_FRAME_ENC_CU_RECORD;
4672     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4673         &surfaceCodecParams,
4674         &m_intermediateCuRecordSurfaceLcu32,
4675         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4676         bindingTable->dwBindingTableEntries[startBTI],
4677         0,
4678         true));
4679 
4680     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4681         m_hwInterface,
4682         cmdBuffer,
4683         &surfaceCodecParams,
4684         kernelState));
4685 
4686     // PAK object command surface
4687     startBTI = MBENC_B_FRAME_PAK_OBJ;
4688     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4689         &surfaceCodecParams,
4690         &m_resMbCodeSurface,
4691         m_mvOffset,
4692         0,
4693         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4694         bindingTable->dwBindingTableEntries[startBTI],
4695         true));
4696 
4697     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4698         m_hwInterface,
4699         cmdBuffer,
4700         &surfaceCodecParams,
4701         kernelState));
4702 
4703     // CU packet for PAK surface
4704     startBTI = MBENC_B_FRAME_PAK_CU_RECORD;
4705     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4706         &surfaceCodecParams,
4707         &m_resMbCodeSurface,
4708         m_mbCodeSize - m_mvOffset,
4709         m_mvOffset,
4710         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4711         bindingTable->dwBindingTableEntries[startBTI],
4712         true));
4713 
4714     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4715         m_hwInterface,
4716         cmdBuffer,
4717         &surfaceCodecParams,
4718         kernelState));
4719 
4720     //Software scoreboard surface
4721     startBTI = MBENC_B_FRAME_SW_SCOREBOARD;
4722     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4723         &surfaceCodecParams,
4724         m_swScoreboardState->GetCurSwScoreboardSurface(),
4725         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4726         bindingTable->dwBindingTableEntries[startBTI],
4727         m_verticalLineStride,
4728         true));
4729 
4730     surfaceCodecParams.bUse32UINTSurfaceFormat = true;
4731     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4732         m_hwInterface,
4733         cmdBuffer,
4734         &surfaceCodecParams,
4735         kernelState));
4736 
4737     // Scratch surface for Internal Use Only
4738     startBTI = MBENC_B_FRAME_SCRATCH_SURFACE;
4739     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4740         &surfaceCodecParams,
4741         &m_scratchSurface,
4742         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4743         bindingTable->dwBindingTableEntries[startBTI],
4744         m_verticalLineStride,
4745         true));
4746 
4747     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4748         m_hwInterface,
4749         cmdBuffer,
4750         &surfaceCodecParams,
4751         kernelState));
4752 
4753     // CU 16x16 QP data input surface
4754     startBTI = MBENC_B_FRAME_CU_QP_DATA;
4755     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4756         &surfaceCodecParams,
4757         &m_brcBuffers.sBrcMbQpBuffer,
4758         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4759         bindingTable->dwBindingTableEntries[startBTI],
4760         m_verticalLineStride,
4761         false));
4762 
4763     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4764         m_hwInterface,
4765         cmdBuffer,
4766         &surfaceCodecParams,
4767         kernelState));
4768 
4769     // Lcu level data input
4770     startBTI = MBENC_B_FRAME_LCU_LEVEL_DATA_INPUT;
4771     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4772         &surfaceCodecParams,
4773         &m_lcuLevelInputDataSurface[m_currRecycledBufIdx],
4774         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4775         bindingTable->dwBindingTableEntries[startBTI],
4776         m_verticalLineStride,
4777         false));
4778 
4779     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4780         m_hwInterface,
4781         cmdBuffer,
4782         &surfaceCodecParams,
4783         kernelState));
4784 
4785     // Enc B 32x32 Constant Table surface
4786     startBTI = MBENC_B_FRAME_ENC_CONST_TABLE;
4787     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4788         &surfaceCodecParams,
4789         &m_encConstantTableForB.sResource,
4790         m_encConstantTableForB.dwSize,
4791         0,
4792         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4793         bindingTable->dwBindingTableEntries[startBTI],
4794         false));
4795 
4796     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4797         m_hwInterface,
4798         cmdBuffer,
4799         &surfaceCodecParams,
4800         kernelState));
4801 
4802     // Colocated CU Motion Vector Data surface
4803     startBTI                    = MBENC_B_FRAME_COLOCATED_CU_MV_DATA;
4804     uint8_t mbCodeIdxForTempMVP = 0xFF;
4805     if (m_hevcPicParams->CollocatedRefPicIndex != 0xFF && m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC)
4806     {
4807         uint8_t frameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx;
4808 
4809         mbCodeIdxForTempMVP = m_refList[frameIdx]->ucScalingIdx;
4810     }
4811 
4812     if (m_pictureCodingType == I_TYPE)
4813     {
4814         // No temoporal MVP in the I frame
4815         m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
4816     }
4817     else
4818     {
4819         if (mbCodeIdxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag)
4820         {
4821             // Temporal reference MV index is invalid and so disable the temporal MVP
4822             CODECHAL_ENCODE_ASSERT(false);
4823             m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
4824         }
4825     }
4826 
4827     if (mbCodeIdxForTempMVP == 0xFF)
4828     {
4829         startBTI++;
4830     }
4831     else
4832     {
4833         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4834             &surfaceCodecParams,
4835             m_trackedBuf->GetMvTemporalBuffer(mbCodeIdxForTempMVP),
4836             m_sizeOfMvTemporalBuffer,
4837             0,
4838             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
4839             bindingTable->dwBindingTableEntries[startBTI++],
4840             false));
4841 
4842         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4843             m_hwInterface,
4844             cmdBuffer,
4845             &surfaceCodecParams,
4846             kernelState));
4847     }
4848 
4849     startBTI = MBENC_B_FRAME_HME_MOTION_PREDICTOR_DATA;
4850 
4851     // HME motion predictor data
4852     if (m_hmeEnabled)
4853     {
4854         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4855             &surfaceCodecParams,
4856             m_hmeKernel->GetSurface(CodechalKernelHme::SurfaceId::me4xMvDataBuffer),
4857             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
4858             bindingTable->dwBindingTableEntries[startBTI++],
4859             m_verticalLineStride,
4860             false));
4861 
4862         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4863             m_hwInterface,
4864             cmdBuffer,
4865             &surfaceCodecParams,
4866             kernelState));
4867     }
4868     else
4869     {
4870         startBTI++;
4871     }
4872 
4873     // Brc Combined Enc parameter surface
4874     startBTI = MBENC_B_FRAME_BRC_COMBINED_ENC_PARAMETER_SURFACE;
4875     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4876         &surfaceCodecParams,
4877         &m_brcInputForEncKernelBuffer->sResource,
4878         HEVC_FRAMEBRC_BUF_CONST_SIZE,
4879         0,
4880         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4881         bindingTable->dwBindingTableEntries[startBTI++],
4882         false));
4883 
4884     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4885         m_hwInterface,
4886         cmdBuffer,
4887         &surfaceCodecParams,
4888         kernelState));
4889 
4890     startBTI = MBENC_B_FRAME_VME_PRED_FOR_2X_DS_CURR;
4891     if (m_isMaxLcu64)
4892     {
4893         PMOS_SURFACE currScaledSurface2x = m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER);
4894 
4895         //VME 2X Inter prediction surface for current frame
4896         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4897             &surfaceCodecParams,
4898             currScaledSurface2x,
4899             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4900             bindingTable->dwBindingTableEntries[startBTI++]));
4901 
4902         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4903             m_hwInterface,
4904             cmdBuffer,
4905             &surfaceCodecParams,
4906             kernelState));
4907 
4908         CODECHAL_DEBUG_TOOL(
4909             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4910                 currScaledSurface2x,
4911                 CodechalDbgAttr::attrReferenceSurfaces,
4912                 "2xScaledSurf")));
4913 
4914         // RefFrame's 2x DS surface
4915         for (int32_t surface_idx = 0; surface_idx < 4; surface_idx++)
4916         {
4917             int32_t       ll     = 0;
4918             CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
4919             if (!CodecHal_PictureIsInvalid(refPic) &&
4920                 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
4921             {
4922                 int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
4923 
4924                 // Picture Y VME
4925                 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4926                     &surfaceCodecParams,
4927                     m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
4928                     m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4929                     bindingTable->dwBindingTableEntries[startBTI++]));
4930 
4931                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4932                     m_hwInterface,
4933                     cmdBuffer,
4934                     &surfaceCodecParams,
4935                     kernelState));
4936 
4937                 CODECHAL_DEBUG_TOOL(
4938                     m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
4939                     std::string refSurfName      = "Ref2xScaledSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex));
4940                     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4941                         m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
4942                         CodechalDbgAttr::attrReferenceSurfaces,
4943                         refSurfName.data())));
4944             }
4945             else
4946             {
4947                 // Providing Dummy surface as per VME requirement.
4948                 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4949                     &surfaceCodecParams,
4950                     currScaledSurface2x,
4951                     m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4952                     bindingTable->dwBindingTableEntries[startBTI++]));
4953 
4954                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4955                     m_hwInterface,
4956                     cmdBuffer,
4957                     &surfaceCodecParams,
4958                     kernelState));
4959             }
4960 
4961             ll     = 1;
4962             refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
4963             if (!CodecHal_PictureIsInvalid(refPic) &&
4964                 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
4965             {
4966                 int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
4967 
4968                 // Picture Y VME
4969                 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4970                     &surfaceCodecParams,
4971                     m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
4972                     m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4973                     bindingTable->dwBindingTableEntries[startBTI++]));
4974 
4975                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4976                     m_hwInterface,
4977                     cmdBuffer,
4978                     &surfaceCodecParams,
4979                     kernelState));
4980 
4981                 CODECHAL_DEBUG_TOOL(
4982                     m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
4983                     std::string refSurfName      = "Ref2xScaledSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex));
4984                     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4985                         m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
4986                         CodechalDbgAttr::attrReferenceSurfaces,
4987                         refSurfName.data())));
4988             }
4989             else
4990             {
4991                 // Providing Dummy surface as per VME requirement.
4992                 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4993                     &surfaceCodecParams,
4994                     currScaledSurface2x,
4995                     m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4996                     bindingTable->dwBindingTableEntries[startBTI++]));
4997 
4998                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4999                     m_hwInterface,
5000                     cmdBuffer,
5001                     &surfaceCodecParams,
5002                     kernelState));
5003             }
5004         }
5005     }
5006 
5007     // Encoder History Input Buffer
5008     startBTI = MBENC_B_FRAME_ENCODER_HISTORY_INPUT_BUFFER;
5009     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5010         &surfaceCodecParams,
5011         &m_encoderHistoryInputBuffer,
5012         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5013         bindingTable->dwBindingTableEntries[startBTI++],
5014         m_verticalLineStride,
5015         true));
5016 
5017     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5018         m_hwInterface,
5019         cmdBuffer,
5020         &surfaceCodecParams,
5021         kernelState));
5022 
5023     // Encoder History Output Buffer
5024     startBTI = MBENC_B_FRAME_ENCODER_HISTORY_OUTPUT_BUFFER;
5025     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5026         &surfaceCodecParams,
5027         &m_encoderHistoryOutputBuffer,
5028         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5029         bindingTable->dwBindingTableEntries[startBTI++],
5030         m_verticalLineStride,
5031         true));
5032 
5033     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5034         m_hwInterface,
5035         cmdBuffer,
5036         &surfaceCodecParams,
5037         kernelState));
5038 
5039     // Kernel debug surface
5040     startBTI = MBENC_B_FRAME_DEBUG_SURFACE;
5041     for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_debugSurface); i++, startBTI++)
5042     {
5043         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5044             &surfaceCodecParams,
5045             &m_debugSurface[i].sResource,
5046             m_debugSurface[i].dwSize,
5047             0,
5048             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5049             bindingTable->dwBindingTableEntries[startBTI],
5050             false));
5051 
5052         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5053             m_hwInterface,
5054             cmdBuffer,
5055             &surfaceCodecParams,
5056             kernelState));
5057     }
5058 
5059     return eStatus;
5060 }
5061 
SendBrcInitResetSurfaces(PMOS_COMMAND_BUFFER cmdBuffer,CODECHAL_HEVC_BRC_KRNIDX krnIdx)5062 MOS_STATUS CodechalEncHevcStateG12::SendBrcInitResetSurfaces(
5063     PMOS_COMMAND_BUFFER      cmdBuffer,
5064     CODECHAL_HEVC_BRC_KRNIDX krnIdx)
5065 {
5066     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5067 
5068     if (krnIdx != CODECHAL_HEVC_BRC_INIT && krnIdx != CODECHAL_HEVC_BRC_RESET)
5069     {
5070         CODECHAL_ENCODE_ASSERTMESSAGE("Brc kernel requested is not init or reset\n");
5071         return MOS_STATUS_INVALID_PARAMETER;
5072     }
5073 
5074     PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_brcKernelBindingTable[krnIdx];
5075     uint32_t                               startBti     = 0;
5076     CODECHAL_SURFACE_CODEC_PARAMS          surfaceCodecParams;
5077     // BRC History Buffer
5078     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5079         &surfaceCodecParams,
5080         &m_brcBuffers.resBrcHistoryBuffer,
5081         m_brcHistoryBufferSize,
5082         0,
5083         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5084         bindingTable->dwBindingTableEntries[startBti++],
5085         true));
5086 
5087     PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[krnIdx];
5088     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5089         m_hwInterface,
5090         cmdBuffer,
5091         &surfaceCodecParams,
5092         kernelState));
5093 
5094     // BRC Distortion Surface
5095     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5096         &surfaceCodecParams,
5097         m_brcDistortion,
5098         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
5099         bindingTable->dwBindingTableEntries[startBti++],
5100         0,
5101         true));
5102     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5103         m_hwInterface,
5104         cmdBuffer,
5105         &surfaceCodecParams,
5106         kernelState));
5107 
5108     return eStatus;
5109 }
5110 
SetupBrcConstantTable(PMOS_SURFACE brcConstantData)5111 MOS_STATUS CodechalEncHevcStateG12::SetupBrcConstantTable(
5112     PMOS_SURFACE brcConstantData)
5113 {
5114     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5115 
5116     CODECHAL_ENCODE_FUNCTION_ENTER;
5117 
5118     MOS_LOCK_PARAMS lockFlags;
5119     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
5120     lockFlags.WriteOnly = 1;
5121     uint8_t *outputData = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &brcConstantData->OsResource, &lockFlags);
5122     CODECHAL_ENCODE_CHK_NULL_RETURN(outputData);
5123     uint8_t *inputData  = (uint8_t *)g_cInit_HEVC_BRC_QP_ADJUST;
5124     uint32_t inputSize  = sizeof(g_cInit_HEVC_BRC_QP_ADJUST);
5125     uint32_t outputSize = brcConstantData->dwHeight * brcConstantData->dwPitch;
5126 
5127     // 576-byte of Qp adjust table
5128     while ((inputSize >= brcConstantData->dwWidth) && (outputSize >= brcConstantData->dwWidth))
5129     {
5130         MOS_SecureMemcpy(outputData, outputSize, inputData, brcConstantData->dwWidth);
5131         outputData += brcConstantData->dwPitch;
5132         outputSize -= brcConstantData->dwPitch;
5133         inputData += brcConstantData->dwWidth;
5134         inputSize -= brcConstantData->dwWidth;
5135     }
5136     //lambda and mode cost
5137     if (m_isMaxLcu64)
5138     {
5139         inputData = (uint8_t *)m_brcLcu64x64LambdaModeCostInit;
5140         inputSize = sizeof(m_brcLcu64x64LambdaModeCostInit);
5141     }
5142     else
5143     {
5144         inputData = (uint8_t *)m_brcLcu32x32LambdaModeCostInit;
5145         inputSize = sizeof(m_brcLcu32x32LambdaModeCostInit);
5146     }
5147 
5148     while ((inputSize >= brcConstantData->dwWidth) && (outputSize >= brcConstantData->dwWidth))
5149     {
5150         MOS_SecureMemcpy(outputData, outputSize, inputData, brcConstantData->dwWidth);
5151         outputData += brcConstantData->dwPitch;
5152         outputSize -= brcConstantData->dwPitch;
5153         inputData += brcConstantData->dwWidth;
5154         inputSize -= brcConstantData->dwWidth;
5155     }
5156 
5157     m_osInterface->pfnUnlockResource(m_osInterface, &brcConstantData->OsResource);
5158 
5159     return eStatus;
5160 }
5161 
SendBrcFrameUpdateSurfaces(PMOS_COMMAND_BUFFER cmdBuffer)5162 MOS_STATUS CodechalEncHevcStateG12::SendBrcFrameUpdateSurfaces(
5163     PMOS_COMMAND_BUFFER cmdBuffer)
5164 {
5165     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5166 
5167     // Fill HCP_IMG_STATE so that BRC kernel can use it to generate the write buffer for PAK
5168     PMOS_RESOURCE            brcHcpStateReadBuffer = &m_brcBuffers.resBrcImageStatesReadBuffer[m_currRecycledBufIdx];
5169     MHW_VDBOX_HEVC_PIC_STATE mhwHevcPicState;
5170     mhwHevcPicState.pHevcEncSeqParams     = m_hevcSeqParams;
5171     mhwHevcPicState.pHevcEncPicParams     = m_hevcPicParams;
5172     mhwHevcPicState.bUseVDEnc             = m_vdencEnabled ? 1 : 0;
5173     mhwHevcPicState.brcNumPakPasses       = m_mfxInterface->GetBrcNumPakPasses();
5174     mhwHevcPicState.sseEnabledInVmeEncode = m_sseEnabled;
5175     mhwHevcPicState.rhodomainRCEnable     = m_brcEnabled && (m_numPipe > 1);
5176     mhwHevcPicState.bSAOEnable            = m_hevcSeqParams->SAO_enabled_flag ? (m_hevcSliceParams->slice_sao_luma_flag || m_hevcSliceParams->slice_sao_chroma_flag) : 0;
5177     mhwHevcPicState.bTransformSkipEnable  = m_hevcPicParams->transform_skip_enabled_flag;
5178     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcPicBrcBuffer(brcHcpStateReadBuffer, &mhwHevcPicState));
5179 
5180     PMOS_SURFACE brcConstantData = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx];
5181     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBrcConstantTable(brcConstantData));
5182 
5183     uint32_t                               startBti     = 0;
5184     PMHW_KERNEL_STATE                      kernelState  = &m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE];
5185     PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_brcKernelBindingTable[CODECHAL_HEVC_BRC_FRAME_UPDATE];
5186     CODECHAL_SURFACE_CODEC_PARAMS          surfaceCodecParams;
5187 
5188     // BRC History Buffer
5189     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5190         &surfaceCodecParams,
5191         &m_brcBuffers.resBrcHistoryBuffer,
5192         m_brcHistoryBufferSize,
5193         0,
5194         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5195         bindingTable->dwBindingTableEntries[startBti++],
5196         true));
5197     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5198         m_hwInterface,
5199         cmdBuffer,
5200         &surfaceCodecParams,
5201         kernelState));
5202 
5203     // BRC Prev PAK statistics output buffer
5204     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5205         &surfaceCodecParams,
5206         &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead],
5207         m_hevcBrcPakStatisticsSize,
5208         0,
5209         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5210         bindingTable->dwBindingTableEntries[startBti++],
5211         false));
5212     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5213         m_hwInterface,
5214         cmdBuffer,
5215         &surfaceCodecParams,
5216         kernelState));
5217 
5218     // BRC HCP_PIC_STATE read
5219     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5220         &surfaceCodecParams,
5221         brcHcpStateReadBuffer,
5222         m_brcBuffers.dwBrcHcpPicStateSize,
5223         0,
5224         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5225         bindingTable->dwBindingTableEntries[startBti++],
5226         false));
5227     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5228         m_hwInterface,
5229         cmdBuffer,
5230         &surfaceCodecParams,
5231         kernelState));
5232 
5233     // BRC HCP_PIC_STATE write
5234     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5235         &surfaceCodecParams,
5236         &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
5237         m_brcBuffers.dwBrcHcpPicStateSize,
5238         0,
5239         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5240         bindingTable->dwBindingTableEntries[startBti++],
5241         true));
5242     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5243         m_hwInterface,
5244         cmdBuffer,
5245         &surfaceCodecParams,
5246         kernelState));
5247 
5248     // Combined ENC-parameter buffer
5249     startBti++;
5250 
5251     // BRC Distortion Surface
5252     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5253         &surfaceCodecParams,
5254         m_brcDistortion,
5255         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
5256         bindingTable->dwBindingTableEntries[startBti++],
5257         0,
5258         true));
5259     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5260         m_hwInterface,
5261         cmdBuffer,
5262         &surfaceCodecParams,
5263         kernelState));
5264 
5265     // BRC Data Surface
5266     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5267         &surfaceCodecParams,
5268         brcConstantData,
5269         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5270         bindingTable->dwBindingTableEntries[startBti++],
5271         0,
5272         false));
5273     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5274         m_hwInterface,
5275         cmdBuffer,
5276         &surfaceCodecParams,
5277         kernelState));
5278 
5279     // Pixel MB Statistics surface
5280     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5281         &surfaceCodecParams,
5282         &m_resMbStatsBuffer,
5283         m_hwInterface->m_avcMbStatBufferSize,
5284         0,
5285         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5286         bindingTable->dwBindingTableEntries[startBti++],
5287         false));
5288     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5289         m_hwInterface,
5290         cmdBuffer,
5291         &surfaceCodecParams,
5292         kernelState));
5293 
5294     // Mv and Distortion summation surface
5295     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5296         &surfaceCodecParams,
5297         &m_mvAndDistortionSumSurface.sResource,
5298         m_mvAndDistortionSumSurface.dwSize,
5299         0,
5300         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5301         bindingTable->dwBindingTableEntries[startBti++],
5302         false));
5303     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5304         m_hwInterface,
5305         cmdBuffer,
5306         &surfaceCodecParams,
5307         kernelState));
5308 
5309     CODECHAL_DEBUG_TOOL(
5310         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5311             &m_mvAndDistortionSumSurface.sResource,
5312             CodechalDbgAttr::attrInput,
5313             "MvDistSum",
5314             m_mvAndDistortionSumSurface.dwSize,
5315             0,
5316             CODECHAL_MEDIA_STATE_BRC_UPDATE));
5317 
5318         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5319             &m_brcBuffers.resBrcImageStatesReadBuffer[m_currRecycledBufIdx],
5320             CodechalDbgAttr::attrInput,
5321             "ImgStateRead",
5322             BRC_IMG_STATE_SIZE_PER_PASS * m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(),
5323             0,
5324             CODECHAL_MEDIA_STATE_BRC_UPDATE));
5325 
5326         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
5327             &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx],
5328             CodechalDbgAttr::attrInput,
5329             "ConstData",
5330             CODECHAL_MEDIA_STATE_BRC_UPDATE));
5331 
5332         // PAK statistics buffer is only dumped for BrcUpdate kernel input
5333         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5334             &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead],
5335             CodechalDbgAttr::attrInput,
5336             "PakStats",
5337             HEVC_BRC_PAK_STATISTCS_SIZE,
5338             0,
5339             CODECHAL_MEDIA_STATE_BRC_UPDATE));
5340 
5341         // HEVC maintains a ptr to its own distortion surface, as it may be a couple different surfaces
5342         if (m_brcDistortion) {
5343             CODECHAL_ENCODE_CHK_STATUS_RETURN(
5344                 m_debugInterface->DumpBuffer(
5345                     &m_brcDistortion->OsResource,
5346                     CodechalDbgAttr::attrInput,
5347                     "BrcDist_BeforeFrameBRC",
5348                     m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
5349                     m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
5350                     CODECHAL_MEDIA_STATE_BRC_UPDATE));
5351         }
5352 
5353         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(&m_brcBuffers.resBrcHistoryBuffer,
5354             CodechalDbgAttr::attrInput,
5355             "HistoryRead_beforeFramBRC",
5356             m_brcHistoryBufferSize,
5357             0,
5358             CODECHAL_MEDIA_STATE_BRC_UPDATE));
5359 
5360         if (m_brcBuffers.pMbEncKernelStateInUse) {
5361             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
5362                 CODECHAL_MEDIA_STATE_BRC_UPDATE,
5363                 m_brcBuffers.pMbEncKernelStateInUse));
5364         }
5365 
5366         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(&m_resMbStatsBuffer,
5367             CodechalDbgAttr::attrInput,
5368             "MBStatsSurf",
5369             m_hwInterface->m_avcMbStatBufferSize,
5370             0,
5371             CODECHAL_MEDIA_STATE_BRC_UPDATE));)
5372     return eStatus;
5373 }
5374 
SendBrcLcuUpdateSurfaces(PMOS_COMMAND_BUFFER cmdBuffer)5375 MOS_STATUS CodechalEncHevcStateG12::SendBrcLcuUpdateSurfaces(
5376     PMOS_COMMAND_BUFFER cmdBuffer)
5377 {
5378     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5379 
5380     PMHW_KERNEL_STATE                      kernelState  = &m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE];
5381     PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_brcKernelBindingTable[CODECHAL_HEVC_BRC_LCU_UPDATE];
5382     uint32_t                               startBTI     = 0;
5383     CODECHAL_SURFACE_CODEC_PARAMS          surfaceCodecParams;
5384 
5385     if (m_brcEnabled)
5386     {
5387         // BRC History Buffer
5388         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5389             &surfaceCodecParams,
5390             &m_brcBuffers.resBrcHistoryBuffer,
5391             m_brcHistoryBufferSize,
5392             0,
5393             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5394             bindingTable->dwBindingTableEntries[startBTI++],
5395             true));
5396         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5397             m_hwInterface,
5398             cmdBuffer,
5399             &surfaceCodecParams,
5400             kernelState));
5401 
5402         // BRC Distortion Surface - Intra or Inter
5403         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5404             &surfaceCodecParams,
5405             m_brcDistortion,
5406             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
5407             bindingTable->dwBindingTableEntries[startBTI++],
5408             0,
5409             true));
5410         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5411             m_hwInterface,
5412             cmdBuffer,
5413             &surfaceCodecParams,
5414             kernelState));
5415 
5416         // Pixel MB Statistics surface
5417         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5418             &surfaceCodecParams,
5419             &m_resMbStatsBuffer,
5420             m_hwInterface->m_avcMbStatBufferSize,
5421             0,
5422             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5423             bindingTable->dwBindingTableEntries[startBTI++],
5424             false));
5425         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5426             m_hwInterface,
5427             cmdBuffer,
5428             &surfaceCodecParams,
5429             kernelState));
5430     }
5431     else
5432     {
5433         // CQP ROI
5434         startBTI += 3;
5435     }
5436     // MB QP surface
5437     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5438         &surfaceCodecParams,
5439         &m_brcBuffers.sBrcMbQpBuffer,
5440         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5441         bindingTable->dwBindingTableEntries[startBTI++],
5442         0,
5443         true));
5444     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5445         m_hwInterface,
5446         cmdBuffer,
5447         &surfaceCodecParams,
5448         kernelState));
5449 
5450     // ROI surface
5451     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5452         &surfaceCodecParams,
5453         &m_brcBuffers.sBrcRoiSurface,
5454         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ROI_ENCODE].Value,
5455         bindingTable->dwBindingTableEntries[startBTI++],
5456         0,
5457         false));
5458     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5459         m_hwInterface,
5460         cmdBuffer,
5461         &surfaceCodecParams,
5462         kernelState));
5463 
5464     return eStatus;
5465 }
5466 
GetCustomDispatchPattern(PMHW_WALKER_PARAMS walkerParams,PCODECHAL_WALKER_CODEC_PARAMS walkerCodecParams)5467 MOS_STATUS CodechalEncHevcStateG12::GetCustomDispatchPattern(
5468     PMHW_WALKER_PARAMS            walkerParams,
5469     PCODECHAL_WALKER_CODEC_PARAMS walkerCodecParams)
5470 {
5471     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5472 
5473     CODECHAL_ENCODE_CHK_NULL_RETURN(walkerParams);
5474     CODECHAL_ENCODE_CHK_NULL_RETURN(walkerCodecParams);
5475 
5476     MOS_ZeroMemory(walkerParams, sizeof(*walkerParams));
5477 
5478     walkerParams->WalkerMode = (MHW_WALKER_MODE)walkerCodecParams->WalkerMode;
5479 
5480     walkerParams->dwLocalLoopExecCount  = 0xFFFF;  //MAX VALUE
5481     walkerParams->dwGlobalLoopExecCount = 0xFFFF;  //MAX VALUE
5482 
5483     // the following code is copied from the kernel ULT
5484     uint32_t maxThreadWidth, maxThreadHeight;
5485     uint32_t threadSpaceWidth, threadSpaceHeight, concurGroupNum, threadScaleV;
5486 
5487     threadSpaceWidth  = walkerCodecParams->dwResolutionX;
5488     threadSpaceHeight = walkerCodecParams->dwResolutionY;
5489     maxThreadWidth    = threadSpaceWidth;
5490     maxThreadHeight   = threadSpaceHeight;
5491     concurGroupNum    = m_numberConcurrentGroup;
5492     threadScaleV      = m_numberEncKernelSubThread;
5493 
5494     if (concurGroupNum > 1)
5495     {
5496         maxThreadWidth  = threadSpaceWidth;
5497         maxThreadHeight = threadSpaceWidth + (threadSpaceWidth + threadSpaceHeight + concurGroupNum - 2) / concurGroupNum;
5498         maxThreadHeight *= threadScaleV;
5499         maxThreadHeight += 1;
5500     }
5501     else
5502     {
5503         threadSpaceHeight *= threadScaleV;
5504         maxThreadHeight *= threadScaleV;
5505     }
5506 
5507     uint32_t localLoopExecCount = m_degree45Needed ? (2 * m_numWavefrontInOneRegion + 1) : m_numWavefrontInOneRegion;
5508 
5509     eStatus = InitMediaObjectWalker(maxThreadWidth,
5510         maxThreadHeight,
5511         concurGroupNum - 1,
5512         m_swScoreboardState->GetDependencyPattern(),
5513         m_numberEncKernelSubThread - 1,
5514         localLoopExecCount,
5515         *walkerParams);
5516 
5517     return eStatus;
5518 }
5519 
GenerateLcuLevelData(MOS_SURFACE & lcuLevelInputDataSurfaceParam)5520 MOS_STATUS CodechalEncHevcStateG12::GenerateLcuLevelData(MOS_SURFACE &lcuLevelInputDataSurfaceParam)
5521 {
5522     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5523 
5524     CODECHAL_ENCODE_FUNCTION_ENTER;
5525 
5526     CODECHAL_ENCODE_CHK_NULL_RETURN(m_tileParams);
5527 
5528     uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
5529     uint32_t numTileRows    = m_hevcPicParams->num_tile_rows_minus1 + 1;
5530 
5531     uint32_t shift    = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
5532     uint32_t residual = (1 << shift) - 1;
5533 
5534     uint32_t frameWidthInLcu  = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift;
5535     uint32_t frameHeightInLcu = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift;
5536 
5537     PLCU_LEVEL_DATA *lcuInfo = (PLCU_LEVEL_DATA *)MOS_AllocMemory(sizeof(PLCU_LEVEL_DATA) * frameWidthInLcu);
5538     CODECHAL_ENCODE_CHK_NULL_RETURN(lcuInfo);
5539     for (uint32_t i = 0; i < frameWidthInLcu; i++)
5540     {
5541         lcuInfo[i] = (PLCU_LEVEL_DATA)MOS_AllocMemory(sizeof(LCU_LEVEL_DATA) * frameHeightInLcu);
5542         if (lcuInfo[i] == nullptr)
5543         {
5544             for (uint32_t j = 0; j < i; j++)
5545             {
5546                 MOS_FreeMemory(lcuInfo[j]);
5547             }
5548             MOS_FreeMemory(lcuInfo);
5549             CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr);
5550         }
5551         MOS_ZeroMemory(lcuInfo[i], (sizeof(LCU_LEVEL_DATA) * frameHeightInLcu));
5552     }
5553 
5554     // Tiling case
5555     if (numTileColumns > 1 || numTileRows > 1)
5556     {
5557         // This assumes that the entire Slice is contained within a Tile
5558         for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
5559         {
5560             for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
5561             {
5562                 uint32_t                             tileId      = tileRow * numTileColumns + tileCol;
5563                 MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 currentTile = m_tileParams[tileId];
5564 
5565                 uint32_t tileColumnWidth = (currentTile.TileWidthInMinCbMinus1 + 1 + residual) >> shift;
5566                 uint32_t tileRowHeight   = (currentTile.TileHeightInMinCbMinus1 + 1 + residual) >> shift;
5567 
5568                 for (uint32_t startLCU = 0, sliceStartLcu = 0, slcCount = 0; slcCount < m_numSlices; slcCount++)
5569                 {
5570                     bool lastSliceInTile = false, sliceInTile = false;
5571 
5572                     eStatus = (MOS_STATUS)IsSliceInTile(slcCount,
5573                         &currentTile,
5574                         &sliceInTile,
5575                         &lastSliceInTile);
5576                     if (eStatus != MOS_STATUS_SUCCESS)
5577                     {
5578                         for (uint32_t i = 0; i < frameWidthInLcu; i++)
5579                         {
5580                             MOS_FreeMemory(lcuInfo[i]);
5581                         }
5582                         MOS_FreeMemory(lcuInfo);
5583                         CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
5584                     }
5585 
5586                     if (!sliceInTile)
5587                     {
5588                         startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice;
5589                         continue;
5590                     }
5591 
5592                     sliceStartLcu      = m_hevcSliceParams[slcCount].slice_segment_address;
5593                     uint32_t sliceLcuX = sliceStartLcu % frameWidthInLcu;
5594                     uint32_t sliceLcuY = sliceStartLcu / frameWidthInLcu;
5595 
5596                     for (uint32_t i = 0; i < m_hevcSliceParams[slcCount].NumLCUsInSlice; i++)
5597                     {
5598                         lcuInfo[sliceLcuX][sliceLcuY].SliceStartLcuIndex   = (uint16_t)startLCU;
5599                         lcuInfo[sliceLcuX][sliceLcuY].SliceEndLcuIndex     = (uint16_t)(startLCU + m_hevcSliceParams[slcCount].NumLCUsInSlice);  // this should be next slice start index
5600                         lcuInfo[sliceLcuX][sliceLcuY].SliceId              = (uint16_t)slcCount;
5601                         lcuInfo[sliceLcuX][sliceLcuY].TileId               = (uint16_t)tileId;
5602                         lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateX = (uint16_t)currentTile.TileStartLCUX;
5603                         lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateY = (uint16_t)currentTile.TileStartLCUY;
5604                         lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateX   = (uint16_t)(currentTile.TileStartLCUX + tileColumnWidth);
5605                         lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateY   = (uint16_t)(currentTile.TileStartLCUY + tileRowHeight);
5606 
5607                         sliceLcuX++;
5608 
5609                         if (sliceLcuX >= currentTile.TileStartLCUX + tileColumnWidth)
5610                         {
5611                             sliceLcuX = currentTile.TileStartLCUX;
5612                             sliceLcuY++;
5613                         }
5614                     }
5615                     startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice;
5616                 }
5617             }
5618         }
5619     }
5620     else  // non-tiling case
5621     {
5622         for (uint32_t startLCU = 0, sliceStartLcu = 0, slcCount = 0; slcCount < m_numSlices; slcCount++)
5623         {
5624             sliceStartLcu      = m_hevcSliceParams[slcCount].slice_segment_address;
5625             uint32_t sliceLcuX = sliceStartLcu % frameWidthInLcu;
5626             uint32_t sliceLcuY = sliceStartLcu / frameWidthInLcu;
5627 
5628             for (uint32_t i = 0; i < m_hevcSliceParams[slcCount].NumLCUsInSlice; i++)
5629             {
5630                 lcuInfo[sliceLcuX][sliceLcuY].SliceStartLcuIndex   = (uint16_t)startLCU;
5631                 lcuInfo[sliceLcuX][sliceLcuY].SliceEndLcuIndex     = (uint16_t)(startLCU + m_hevcSliceParams[slcCount].NumLCUsInSlice);  // this should be next slice start index
5632                 lcuInfo[sliceLcuX][sliceLcuY].SliceId              = (uint16_t)slcCount;
5633                 lcuInfo[sliceLcuX][sliceLcuY].TileId               = 0;
5634                 lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateX = 0;
5635                 lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateY = 0;
5636                 lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateX   = (uint16_t)frameWidthInLcu;
5637                 lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateY   = (uint16_t)frameHeightInLcu;
5638 
5639                 sliceLcuX++;
5640 
5641                 if (sliceLcuX >= frameWidthInLcu)
5642                 {
5643                     sliceLcuX = 0;
5644                     sliceLcuY++;
5645                 }
5646             }
5647             startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice;
5648         }
5649     }
5650 
5651     // Write LCU Info to the surface
5652     if (!Mos_ResourceIsNull(&lcuLevelInputDataSurfaceParam.OsResource))
5653     {
5654         MOS_LOCK_PARAMS lockFlags;
5655         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
5656         lockFlags.WriteOnly          = 1;
5657         PLCU_LEVEL_DATA lcuLevelData = (PLCU_LEVEL_DATA)m_osInterface->pfnLockResource(
5658             m_osInterface,
5659             &lcuLevelInputDataSurfaceParam.OsResource,
5660             &lockFlags);
5661         if (lcuLevelData == nullptr)
5662         {
5663             for (uint32_t i = 0; i < frameWidthInLcu; i++)
5664             {
5665                 MOS_FreeMemory(lcuInfo[i]);
5666             }
5667             MOS_FreeMemory(lcuInfo);
5668             CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr);
5669         }
5670 
5671         uint8_t *dataRowStart = (uint8_t *)lcuLevelData;
5672 
5673         for (uint32_t sliceLcuY = 0; sliceLcuY < frameHeightInLcu; sliceLcuY++)
5674         {
5675             for (uint32_t sliceLcuX = 0; sliceLcuX < frameWidthInLcu; sliceLcuX++)
5676             {
5677                 *(lcuLevelData) = lcuInfo[sliceLcuX][sliceLcuY];
5678 
5679                 if ((sliceLcuX + 1) == frameWidthInLcu)
5680                 {
5681                     dataRowStart += lcuLevelInputDataSurfaceParam.dwPitch;
5682                     lcuLevelData = (PLCU_LEVEL_DATA)dataRowStart;
5683                 }
5684                 else
5685                 {
5686                     lcuLevelData++;
5687                 }
5688             }
5689         }
5690 
5691         m_osInterface->pfnUnlockResource(
5692             m_osInterface,
5693             &lcuLevelInputDataSurfaceParam.OsResource);
5694     }
5695     else
5696     {
5697         eStatus = MOS_STATUS_NULL_POINTER;
5698         CODECHAL_ENCODE_ASSERTMESSAGE("Null pointer exception\n");
5699     }
5700 
5701     // Freeing the temporarily allocated memory
5702     if (lcuInfo)
5703     {
5704         for (uint32_t i = 0; i < frameWidthInLcu; i++)
5705         {
5706             MOS_FreeMemory(lcuInfo[i]);
5707         }
5708         MOS_FreeMemory(lcuInfo);
5709     }
5710     return eStatus;
5711 }
5712 
5713 // Helper class to describe quadtree node
5714 class QuadTreeNode
5715 {
5716     friend class QuadTree;
5717 
5718 public:
QuadTreeNode(const QuadTreeNode * ctb,uint32_t x,uint32_t y,uint32_t level,uint32_t ctbLog2Size)5719     QuadTreeNode(const QuadTreeNode *ctb, uint32_t x, uint32_t y, uint32_t level, uint32_t ctbLog2Size) : m_ctb(ctb), m_x(x), m_y(y), m_level(level), m_size((1 << ctbLog2Size) >> level), m_ctbLog2Size(ctbLog2Size)
5720     {
5721     }
5722 
5723 protected:
Build(uint32_t picWidth,uint32_t picHeight)5724     void Build(uint32_t picWidth, uint32_t picHeight)
5725     {
5726         if (DoesBlockCrossCodedPicture(picWidth, picHeight))
5727         {
5728             CreateCUs();
5729             for_each(m_childBlocks.begin(), m_childBlocks.end(), [&](QuadTreeNode &blk) { blk.Build(picWidth, picHeight); });
5730         }
5731     }
5732 
CreateCUs()5733     void CreateCUs()
5734     {
5735         uint32_t size  = m_size / 2;
5736         uint32_t level = m_level + 1;
5737 
5738         m_childBlocks.emplace_back(m_ctb, m_x, m_y, level, m_ctbLog2Size);
5739         m_childBlocks.emplace_back(m_ctb, m_x + size, m_y, level, m_ctbLog2Size);
5740         m_childBlocks.emplace_back(m_ctb, m_x, m_y + size, level, m_ctbLog2Size);
5741         m_childBlocks.emplace_back(m_ctb, m_x + size, m_y + size, level, m_ctbLog2Size);
5742     }
5743 
DoesBlockCrossCodedPicture(uint32_t w,uint32_t h) const5744     bool DoesBlockCrossCodedPicture(uint32_t w, uint32_t h) const
5745     {
5746         return (m_x < w && ((m_x + m_size) > w)) || (m_y < h && ((m_y + m_size) > h));
5747     }
5748 
5749 public:
5750     const QuadTreeNode *      m_ctb         = nullptr;  // the root of CTB
5751     const uint32_t            m_x           = 0;
5752     const uint32_t            m_y           = 0;
5753     const uint32_t            m_level       = 0;
5754     const uint32_t            m_size        = 0;
5755     const uint32_t            m_ctbLog2Size = 0;
5756     std::vector<QuadTreeNode> m_childBlocks = {};
5757 };
5758 
5759 class QuadTree : public QuadTreeNode
5760 {
5761 public:
QuadTree(uint32_t x,uint32_t y,uint32_t ctbLog2Size)5762     QuadTree(uint32_t x, uint32_t y, uint32_t ctbLog2Size)
5763         : QuadTreeNode(this, x, y, 0, ctbLog2Size)
5764     {
5765     }
5766 
5767     // Build quadtree in the way none of the blocks crosses picture boundary
BuildQuadTree(uint32_t width,uint32_t height)5768     void BuildQuadTree(uint32_t width, uint32_t height)
5769     {
5770         m_picWidth  = width;
5771         m_picHeight = height;
5772 
5773         Build(width, height);
5774 
5775         CUs.reserve(64);
5776         FillCuList(*this, CUs);
5777     }
5778 
GetSplitFlags(const QuadTreeNode & blk,HcpPakObjectG12 & pakObj)5779     static void GetSplitFlags(const QuadTreeNode &blk, HcpPakObjectG12 &pakObj)
5780     {
5781         auto idx = [](uint32_t x0, uint32_t y0, uint32_t x, uint32_t y, uint32_t log2CbSize) {
5782             auto const nCbS = (1 << log2CbSize);
5783             return (x - x0) / nCbS + (y - y0) / nCbS * 2;
5784         };
5785 
5786         if (blk.m_childBlocks.empty())  // Block doesn't have splits
5787             return;
5788 
5789         switch (blk.m_level)
5790         {
5791         case 0:
5792             pakObj.DW1.Split_flag_level0 = 1;
5793             break;
5794 
5795         case 1:
5796         {
5797             auto const blkIdx = idx(blk.m_ctb->m_x, blk.m_ctb->m_y, blk.m_x, blk.m_y, blk.m_ctbLog2Size - 1);
5798             pakObj.DW1.Split_flag_level1 |= 1 << blkIdx;
5799         }
5800         break;
5801 
5802         case 2:
5803         {
5804             auto const blkIdx1 = idx(blk.m_ctb->m_x, blk.m_ctb->m_y, blk.m_x, blk.m_y, blk.m_ctbLog2Size - 1);
5805             auto const nCbS1   = (1 << (blk.m_ctbLog2Size - 1));
5806             auto const x1      = blk.m_ctb->m_x + nCbS1 * (blkIdx1 % 2);
5807             auto const y1      = blk.m_ctb->m_y + nCbS1 * (blkIdx1 / 2);
5808             auto const blkIdx2 = idx(x1, y1, blk.m_x, blk.m_y, blk.m_ctbLog2Size - 2);
5809             switch (blkIdx1)
5810             {
5811             case 0:
5812                 pakObj.DW1.Split_flag_level2_level1part0 |= 1 << blkIdx2;
5813                 break;
5814             case 1:
5815                 pakObj.DW1.Split_flag_level2_level1part1 |= 1 << blkIdx2;
5816                 break;
5817             case 2:
5818                 pakObj.DW1.Split_flag_level2_level1part2 |= 1 << blkIdx2;
5819                 break;
5820             case 3:
5821                 pakObj.DW1.Split_flag_level2_level1part3 |= 1 << blkIdx2;
5822                 break;
5823             };
5824         }
5825         break;
5826         }
5827 
5828         for_each(blk.m_childBlocks.begin(), blk.m_childBlocks.end(), [&](const QuadTreeNode &blk) { GetSplitFlags(blk, pakObj); });
5829     }
5830 
5831 protected:
5832     // Prepare a list of CU inside a coded picure boundary
FillCuList(const QuadTreeNode & cu,std::vector<const QuadTreeNode * > & list)5833     void FillCuList(const QuadTreeNode &cu, std::vector<const QuadTreeNode *> &list)
5834     {
5835         if (cu.m_childBlocks.empty() && ((cu.m_x + cu.m_size) <= m_picWidth) && ((cu.m_y + cu.m_size) <= m_picHeight))
5836             list.push_back(&cu);
5837         else
5838             for_each(cu.m_childBlocks.begin(), cu.m_childBlocks.end(), [&](const QuadTreeNode &blk) { FillCuList(blk, list); });
5839     }
5840 
5841     uint32_t m_picWidth  = 0;
5842     uint32_t m_picHeight = 0;
5843 
5844 public:
5845     std::vector<const QuadTreeNode *> CUs = {};
5846 };
5847 
GenerateSkipFrameMbCodeSurface(SkipFrameInfo & skipframeInfo)5848 MOS_STATUS CodechalEncHevcStateG12::GenerateSkipFrameMbCodeSurface(SkipFrameInfo &skipframeInfo)
5849 {
5850     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5851     CODECHAL_ENCODE_FUNCTION_ENTER;
5852 
5853     MOS_LOCK_PARAMS lockFlags = {};
5854     lockFlags.WriteOnly       = 1;
5855     uint8_t *data             = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &skipframeInfo.m_resMbCodeSkipFrameSurface, &lockFlags);
5856     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
5857     MOS_ZeroMemory(data, m_mbCodeSize + 8 * CODECHAL_CACHELINE_SIZE);
5858 
5859     auto pakObjData = (HcpPakObjectG12 *)data;
5860     auto cuData     = (EncodeHevcCuDataG12 *)(data + m_mvOffset);
5861 
5862     auto const ctbSize          = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
5863     auto const maxNumCuInCtb    = (ctbSize / CODECHAL_HEVC_MIN_CU_SIZE) * (ctbSize / CODECHAL_HEVC_MIN_CU_SIZE);
5864     auto const picWidthInCtb    = MOS_ROUNDUP_DIVIDE(m_frameWidth, ctbSize);
5865     auto const picHeightInCtb   = MOS_ROUNDUP_DIVIDE(m_frameHeight, ctbSize);
5866     CODECHAL_ENCODE_CHK_COND_RETURN(picWidthInCtb <= 0, "Invalid m_frameWidth");
5867     CODECHAL_ENCODE_CHK_COND_RETURN(picHeightInCtb <= 0, "Invalid m_frameHeight");
5868     uint32_t   num_tile_columns = m_hevcPicParams->num_tile_columns_minus1 + 1;
5869     uint32_t * tileColumnsStartPosition{new uint32_t[num_tile_columns]{}};
5870 
5871     for (uint32_t i = 0; i < (num_tile_columns); i++)
5872     {
5873         if (m_hevcPicParams->tile_column_width[i] == 0)
5874         {
5875             tileColumnsStartPosition[i] = picWidthInCtb;
5876             break;
5877         }
5878 
5879         if (i == 0)
5880         {
5881             tileColumnsStartPosition[i] = m_hevcPicParams->tile_column_width[i];
5882             continue;
5883         }
5884 
5885         tileColumnsStartPosition[i] = tileColumnsStartPosition[i - 1] + m_hevcPicParams->tile_column_width[i];
5886     }
5887 
5888     // Prepare CTB splits for corner cases:
5889     // Last column
5890     QuadTree lastColumnCtb((picWidthInCtb - 1) * ctbSize, 0, m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
5891     lastColumnCtb.BuildQuadTree(m_frameWidth, m_frameHeight);
5892 
5893     // Last row
5894     QuadTree lastRowCtb(0, (picHeightInCtb - 1) * ctbSize, m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
5895     lastRowCtb.BuildQuadTree(m_frameWidth, m_frameHeight);
5896 
5897     // Right bottom CTB
5898     QuadTree lastColRowCtb((picWidthInCtb - 1) * ctbSize, (picHeightInCtb - 1) * ctbSize, m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
5899     lastColRowCtb.BuildQuadTree(m_frameWidth, m_frameHeight);
5900 
5901     uint32_t sliceFirstCtbIdx;
5902     uint32_t ctbXAddr;
5903     uint32_t ctbYAddr;
5904     uint32_t nCUs;
5905     uint32_t tileEnd;
5906     uint32_t tileStart;
5907     for (uint32_t slcIdx = 0; slcIdx < m_numSlices; ++slcIdx)
5908     {
5909         sliceFirstCtbIdx = m_hevcSliceParams[slcIdx].slice_segment_address;
5910         tileEnd          = 0;
5911         tileStart        = 0;
5912         ctbXAddr         = sliceFirstCtbIdx % picWidthInCtb;
5913         ctbYAddr         = sliceFirstCtbIdx / picWidthInCtb;
5914         for (uint32_t i = 0; i < num_tile_columns; i++)
5915         {
5916             //Determine what tile slice belongs to
5917             if (ctbXAddr < tileColumnsStartPosition[i])
5918             {
5919                 tileEnd   = tileColumnsStartPosition[i];
5920                 tileStart = (i == 0) ? 0 : tileColumnsStartPosition[i - 1];
5921                 break;
5922             }
5923         }
5924 
5925         for (uint32_t ctbIdxInSlice = 0; ctbIdxInSlice < m_hevcSliceParams[slcIdx].NumLCUsInSlice; ++ctbIdxInSlice, ++pakObjData, ++ctbXAddr)
5926         {
5927             if (ctbXAddr >= tileEnd)
5928             {
5929                 ctbYAddr++;
5930                 ctbXAddr = tileStart;
5931             }
5932             pakObjData->DW0.Type                    = 0x03;
5933             pakObjData->DW0.Opcode                  = 0x27;
5934             pakObjData->DW0.SubOp                   = 0x21;
5935             pakObjData->DW0.DwordLength             = 0x3;
5936             pakObjData->DW2.Current_LCU_X_Addr      = ctbXAddr;
5937             pakObjData->DW2.Current_LCU_Y_Addr      = ctbYAddr;
5938             pakObjData->DW4.LCUForceZeroCoeff       = 1;  // Force skip CUs
5939             pakObjData->DW4.Disable_SAO_On_LCU_Flag = 1;
5940 
5941             const bool bCtbCrossRightPicBoundary       = (ctbXAddr + 1) * ctbSize > m_frameWidth;
5942             const bool bCtbCrossBottomPicBoundary      = (ctbYAddr + 1) * ctbSize > m_frameHeight;
5943             const bool bCtbCrossRightBottomPicBoundary = bCtbCrossRightPicBoundary && bCtbCrossBottomPicBoundary;
5944             if (bCtbCrossRightBottomPicBoundary)
5945             {
5946                 QuadTree::GetSplitFlags(lastColRowCtb, *pakObjData);
5947                 nCUs = lastColRowCtb.CUs.size();
5948             }
5949             else if (bCtbCrossRightPicBoundary)
5950             {
5951                 QuadTree::GetSplitFlags(lastColumnCtb, *pakObjData);
5952                 nCUs = lastColumnCtb.CUs.size();
5953             }
5954             else if (bCtbCrossBottomPicBoundary)
5955             {
5956                 QuadTree::GetSplitFlags(lastRowCtb, *pakObjData);
5957                 nCUs = lastRowCtb.CUs.size();
5958             }
5959             else  // default case
5960             {
5961                 nCUs = 1;
5962                 // For regular CTB, CU splits are not needed. All level values are zero
5963             }
5964             pakObjData->DW1.CU_count_minus1 = nCUs - 1;
5965 
5966             if (ctbIdxInSlice == (m_hevcSliceParams[slcIdx].NumLCUsInSlice - 1))
5967             {
5968                 pakObjData->DW1.LastCtbOfTileFlag = pakObjData->DW1.LastCtbOfSliceFlag = 1;
5969                 pakObjData->DW5                                                        = 0x05000000;  // Add batch buffer end flag
5970             }
5971 
5972             auto CeilLog2 = [](uint32_t x) {
5973                 auto l = 0;
5974                 while (x > (1U << l)) l++;
5975                 return l;
5976             };
5977 
5978             // Fill CU records
5979             for (unsigned int cuIdx = 0; cuIdx < nCUs; ++cuIdx, ++cuData)
5980             {
5981                 cuData->DW7_CuPredMode = 1;  // Inter
5982 
5983                 // Note that this can work only for B slices.
5984                 // If P slice support appears, we need to have the 2nd skipFrameMbCodeSurface
5985                 // When panic mode is triggered backwards reference only should be used
5986                 cuData->DW7_InterPredIdcMv0 = 0;
5987                 cuData->DW7_InterPredIdcMv1 = 0;
5988 
5989                 if (bCtbCrossRightBottomPicBoundary)
5990                 {
5991                     cuData->DW7_CuSize = CeilLog2(lastColRowCtb.CUs[cuIdx]->m_size) - 3;
5992                 }
5993                 else if (bCtbCrossRightPicBoundary)
5994                 {
5995                     cuData->DW7_CuSize = CeilLog2(lastColumnCtb.CUs[cuIdx]->m_size) - 3;
5996                 }
5997                 else if (bCtbCrossBottomPicBoundary)
5998                 {
5999                     cuData->DW7_CuSize = CeilLog2(lastRowCtb.CUs[cuIdx]->m_size) - 3;
6000                 }
6001                 else
6002                 {
6003                     cuData->DW7_CuSize = m_hevcSeqParams->log2_max_coding_block_size_minus3;
6004                 }
6005 
6006                 if (cuData->DW7_CuSize == 3)  // 64x64
6007                 {
6008                     cuData->DW5_TuSize        = 0xff;  // 4 TUs 32x32
6009                     cuData->DW6_TuCountMinus1 = 3;
6010                 }
6011                 else if (cuData->DW7_CuSize == 2)  // 32x32
6012                 {
6013                     cuData->DW5_TuSize = 3;  // 1 TU 32x32
6014                 }
6015                 else if (cuData->DW7_CuSize == 1)  // 16x16
6016                 {
6017                     cuData->DW5_TuSize = 2;  // 1 TU 16x16
6018                 }
6019                 else  // 8x8
6020                 {
6021                     cuData->DW5_TuSize = 1;  // 1 TU 8x8
6022                 }
6023             }
6024             cuData += (maxNumCuInCtb - nCUs);  // Shift to CUs of next CTB
6025 
6026 
6027         }
6028     }
6029     m_osInterface->pfnUnlockResource(m_osInterface, &skipframeInfo.m_resMbCodeSkipFrameSurface);
6030     delete[] tileColumnsStartPosition;
6031 
6032     skipframeInfo.numSlices = m_numSlices;
6033     uint32_t mbCodeSize     = m_mbCodeSize + 8 * CODECHAL_CACHELINE_SIZE;
6034 
6035     #if USE_CODECHAL_DEBUG_TOOL
6036     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6037         &skipframeInfo.m_resMbCodeSkipFrameSurface,
6038         CodechalDbgAttr::attrInput,
6039         "SkipFrameSurface",
6040         mbCodeSize,
6041         0,
6042         CODECHAL_MEDIA_STATE_BRC_UPDATE));
6043     #endif
6044 
6045     return eStatus;
6046 }
6047 
GenerateConcurrentThreadGroupData(MOS_RESOURCE & concurrentThreadGroupData)6048 MOS_STATUS CodechalEncHevcStateG12::GenerateConcurrentThreadGroupData(MOS_RESOURCE &concurrentThreadGroupData)
6049 {
6050     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6051 
6052     CODECHAL_ENCODE_FUNCTION_ENTER;
6053 
6054     if (!Mos_ResourceIsNull(&concurrentThreadGroupData))
6055     {
6056         MOS_LOCK_PARAMS lockFlags;
6057         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
6058         lockFlags.WriteOnly                            = 1;
6059         PCONCURRENT_THREAD_GROUP_DATA concurrentTgData = (PCONCURRENT_THREAD_GROUP_DATA)m_osInterface->pfnLockResource(
6060             m_osInterface,
6061             &concurrentThreadGroupData,
6062             &lockFlags);
6063         CODECHAL_ENCODE_CHK_NULL_RETURN(concurrentTgData);
6064 
6065         MOS_ZeroMemory(concurrentTgData, concurrentThreadGroupData.iSize);
6066 
6067         uint32_t shift    = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
6068         uint32_t residual = (1 << shift) - 1;
6069 
6070         uint32_t frameWidthInLCU  = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift;
6071         uint32_t frameHeightInLCU = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift;
6072 
6073         uint32_t slcCount = 0;
6074         // Currently only using one thread group for each slice. Extend it to multiple soon.
6075         for (uint32_t startLcu = 0; slcCount < m_numSlices; slcCount++, startLcu += m_hevcSliceParams[slcCount].NumLCUsInSlice)
6076         {
6077             uint32_t sliceStartLcu  = m_hevcSliceParams[slcCount].slice_segment_address;
6078             uint32_t sliceStartLcux = sliceStartLcu % frameWidthInLCU;
6079             uint32_t sliceStartLcuy = sliceStartLcu / frameWidthInLCU;
6080 
6081             uint32_t sliceEndLcu  = (uint16_t)(startLcu + m_hevcSliceParams[slcCount].NumLCUsInSlice);  // this should be next slice start index
6082             uint32_t sliceEndLcux = sliceStartLcu % frameWidthInLCU;
6083             uint32_t sliceEndLcuy = sliceStartLcu / frameWidthInLCU;
6084 
6085             concurrentTgData->CurrSliceStartLcuX = (uint16_t)sliceStartLcux;
6086             concurrentTgData->CurrSliceStartLcuY = (uint16_t)sliceStartLcuy;
6087 
6088             concurrentTgData->CurrSliceEndLcuX = (uint16_t)sliceEndLcux;
6089             concurrentTgData->CurrSliceEndLcuY = (uint16_t)sliceEndLcuy;
6090 
6091             concurrentTgData->CurrTgStartLcuX = (uint16_t)sliceStartLcux;
6092             concurrentTgData->CurrTgStartLcuY = (uint16_t)sliceStartLcuy;
6093 
6094             concurrentTgData->CurrTgEndLcuX = (uint16_t)sliceEndLcux;
6095             concurrentTgData->CurrTgEndLcuY = (uint16_t)sliceEndLcuy;
6096         }
6097 
6098         m_osInterface->pfnUnlockResource(
6099             m_osInterface,
6100             &concurrentThreadGroupData);
6101     }
6102     else
6103     {
6104         CODECHAL_ENCODE_ASSERTMESSAGE("Null pointer exception\n");
6105         return MOS_STATUS_NULL_POINTER;
6106     }
6107 
6108     return eStatus;
6109 }
6110 
EncodeMbEncKernel(CODECHAL_MEDIA_STATE_TYPE encFunctionType)6111 MOS_STATUS CodechalEncHevcStateG12::EncodeMbEncKernel(
6112     CODECHAL_MEDIA_STATE_TYPE encFunctionType)
6113 {
6114     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6115 
6116     PerfTagSetting perfTag;
6117     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_MBENC_KERNEL);
6118 
6119     // Initialize DSH kernel state
6120     PMHW_KERNEL_STATE            kernelState;
6121     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
6122     CODECHAL_WALKER_DEGREE       walkerDegree;
6123     MHW_WALKER_PARAMS            walkerParams;
6124     uint32_t                     walkerResolutionX, walkerResolutionY;
6125     bool                         customDispatchPattern = true;
6126     uint16_t                     totalThreadNumPerLcu  = 1;
6127 
6128     if (m_hevcPicParams->CodingType == I_TYPE)
6129     {
6130         encFunctionType = CODECHAL_MEDIA_STATE_HEVC_I_MBENC;
6131     }
6132     else
6133     {
6134         encFunctionType = m_isMaxLcu64 ? CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC : CODECHAL_MEDIA_STATE_HEVC_B_MBENC;
6135     }
6136 
6137     if (m_isMaxLcu64)
6138     {
6139         kernelState = &m_mbEncKernelStates[MBENC_LCU64_KRNIDX];
6140         if (m_hevcSeqParams->TargetUsage == 1)
6141         {
6142             walkerResolutionX = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE) >> 6;
6143             walkerResolutionY = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE) >> 6;
6144         }
6145         else
6146         {
6147             walkerResolutionX = 2 * (MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE) >> 6);
6148             walkerResolutionY = 2 * (MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE) >> 6);
6149         }
6150     }
6151     else
6152     {
6153         kernelState       = &m_mbEncKernelStates[MBENC_LCU32_KRNIDX];
6154         walkerResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
6155         walkerResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
6156     }
6157 
6158     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
6159     walkerCodecParams.WalkerMode             = m_walkerMode;
6160     walkerCodecParams.dwResolutionX          = walkerResolutionX;
6161     walkerCodecParams.dwResolutionY          = walkerResolutionY;
6162     walkerCodecParams.dwNumSlices            = m_numSlices;
6163     walkerCodecParams.usTotalThreadNumPerLcu = totalThreadNumPerLcu;
6164     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCustomDispatchPattern(&walkerParams, &walkerCodecParams));
6165 
6166     // If Single Task Phase is not enabled, use BT count for the kernel state.
6167     if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
6168     {
6169         uint32_t maxBtCount = m_singleTaskPhaseSupported ? m_maxBtCount : kernelState->KernelParams.iBTCount;
6170         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
6171             m_stateHeapInterface,
6172             maxBtCount));
6173         m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
6174         CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
6175     }
6176 
6177     // Set up the DSH/SSH as normal
6178     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
6179         m_stateHeapInterface,
6180         kernelState,
6181         false,
6182         0,
6183         false,
6184         m_storeData));
6185 
6186     MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
6187     MOS_ZeroMemory(&idParams, sizeof(idParams));
6188     idParams.pKernelState = kernelState;
6189     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
6190         m_stateHeapInterface,
6191         1,
6192         &idParams));
6193 
6194     // Generate Lcu Level Data
6195     CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateLcuLevelData(m_lcuLevelInputDataSurface[m_currRecycledBufIdx]));
6196 
6197     // Generate Concurrent Thread Group Data
6198     if (m_swScoreboardState->GetDependencyPattern() == dependencyWavefront26Degree ||
6199         m_swScoreboardState->GetDependencyPattern() == dependencyWavefront26ZDegree ||
6200         m_swScoreboardState->GetDependencyPattern() == dependencyWavefront26XDegree)
6201     {
6202         // Generate Concurrent Thread Group Data
6203         uint32_t curIdx = m_currRecycledBufIdx;
6204 
6205         CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateConcurrentThreadGroupData(m_encBCombinedBuffer1[curIdx].sResource));
6206     }
6207     else
6208     {
6209         // For 45D walking patter, kernel generates the concurrent thread group by itself. No need for driver to generate.
6210     }
6211 
6212     // setup curbe
6213     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeMbEncBKernel());
6214 
6215     CODECHAL_DEBUG_TOOL(
6216         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6217             encFunctionType,
6218             MHW_DSH_TYPE,
6219             kernelState));
6220 
6221         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
6222             encFunctionType,
6223             kernelState));
6224         //CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpHEVCMbEncCurbeG12(
6225         //m_debugInterface,
6226         //encFunctionType,
6227         //&m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource));
6228 
6229         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6230             encFunctionType,
6231             MHW_ISH_TYPE,
6232             kernelState));)
6233 
6234     MOS_COMMAND_BUFFER cmdBuffer;
6235     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
6236 
6237     SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
6238     sendKernelCmdsParams.EncFunctionType      = encFunctionType;
6239     sendKernelCmdsParams.pKernelState         = kernelState;
6240     // TO DO : Remove scoreboard from VFE STATE Command
6241     sendKernelCmdsParams.bEnableCustomScoreBoard = false;
6242     sendKernelCmdsParams.pCustomScoreBoard       = nullptr;
6243     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
6244 
6245     // Add binding table
6246     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
6247         m_stateHeapInterface,
6248         kernelState));
6249 
6250     // send surfaces
6251     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMbEncSurfacesBKernel(&cmdBuffer));
6252 
6253     CODECHAL_DEBUG_TOOL(
6254         if (m_pictureCodingType == I_TYPE) {
6255             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
6256                 &m_lcuLevelInputDataSurface[m_currRecycledBufIdx],
6257                 CodechalDbgAttr::attrOutput,
6258                 "HEVC_I_MBENC_LcuLevelData_In",
6259                 CODECHAL_MEDIA_STATE_HEVC_I_MBENC));
6260         } else {
6261             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
6262                 &m_lcuLevelInputDataSurface[m_currRecycledBufIdx],
6263                 CodechalDbgAttr::attrOutput,
6264                 "HEVC_B_MBENC_LcuLevelData_In",
6265                 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
6266         })
6267 
6268     if ((encFunctionType == CODECHAL_MEDIA_STATE_HEVC_B_MBENC) || (encFunctionType == CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC))
6269     {
6270         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6271             &m_encConstantTableForB.sResource,
6272             "HEVC_B_MBENC_ConstantData_In",
6273             CodechalDbgAttr::attrOutput,
6274             m_encConstantTableForB.dwSize,
6275             0,
6276             encFunctionType)));
6277     }
6278 
6279     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObjectWalkerCmd(
6280         &cmdBuffer,
6281         &walkerParams));
6282 
6283     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
6284 
6285     // Add dump for MBEnc surface state heap here
6286     CODECHAL_DEBUG_TOOL(
6287         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6288             encFunctionType,
6289             MHW_SSH_TYPE,
6290             kernelState));)
6291 
6292     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
6293         m_stateHeapInterface,
6294         kernelState));
6295 
6296     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6297     {
6298         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
6299             m_stateHeapInterface));
6300         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(
6301             &cmdBuffer,
6302             nullptr));
6303     }
6304 
6305     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
6306         &cmdBuffer,
6307         encFunctionType,
6308         nullptr)));
6309 
6310     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
6311 
6312     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
6313 
6314     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6315     {
6316         m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
6317         m_lastTaskInPhase = false;
6318     }
6319 
6320     CODECHAL_DEBUG_TOOL(
6321         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6322             &m_debugSurface[0].sResource,
6323             CodechalDbgAttr::attrOutput,
6324             "DebugDataSurface_Out0",
6325             m_debugSurface[0].dwSize,
6326             0,
6327             encFunctionType));
6328         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6329             &m_debugSurface[1].sResource,
6330             CodechalDbgAttr::attrOutput,
6331             "DebugDataSurface_Out1",
6332             m_debugSurface[1].dwSize,
6333             0,
6334             encFunctionType));
6335         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6336             &m_debugSurface[2].sResource,
6337             CodechalDbgAttr::attrOutput,
6338             "DebugDataSurface_Out2",
6339             m_debugSurface[2].dwSize,
6340             0,
6341             encFunctionType));
6342         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6343             &m_debugSurface[3].sResource,
6344             CodechalDbgAttr::attrOutput,
6345             "DebugDataSurface_Out3",
6346             m_debugSurface[3].dwSize,
6347             0,
6348             encFunctionType)););
6349 
6350 #if 0  // the dump should be done in the GetStatusReport. However, if ENC causes PAK hangs-up, there is no way to get them.
6351     {
6352         CODECHAL_DEBUG_TOOL(
6353             CODEC_REF_LIST      currRefList;
6354 
6355         currRefList = *(pRefList[m_currReconstructedPic.FrameIdx]);
6356         currRefList.RefPic = m_currOriginalPic;
6357 
6358         m_debugInterface->CurrPic = m_currOriginalPic;
6359         m_debugInterface->dwBufferDumpFrameNum = m_storeData;
6360         m_debugInterface->wFrameType = m_pictureCodingType;
6361 
6362         //CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeMbEncMbPakOutput(
6363         //    m_debugInterface,
6364         //    this,
6365         //    &currRefList,
6366         //    (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
6367         //    CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
6368         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6369             &currRefList.resRefMbCodeBuffer,
6370             CodechalDbgAttr::attrOutput,
6371             "MbCode",
6372             m_picWidthInMb * m_frameFieldHeightInMb*64,
6373             CodecHal_PictureIsBottomField(currRefList.RefPic) ? m_frameFieldHeightInMb * m_picWidthInMb * 64 : 0,
6374             (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
6375             CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
6376 
6377         if (m_mvDataSize)
6378         {
6379             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6380                 &currRefList.resRefMvDataBuffer,
6381                 CodechalDbgAttr::attrOutput,
6382                 "MbData",
6383                 m_picWidthInMb * m_frameFieldHeightInMb * (32 * 4),
6384                 CodecHal_PictureIsBottomField(currRefList.RefPic) ? MOS_ALIGN_CEIL(m_frameFieldHeightInMb * m_picWidthInMb * (32 * 4), 0x1000) : 0,
6385                 (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
6386                 CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
6387         }
6388         if (CodecHalIsFeiEncode(m_codecFunction))
6389         {
6390             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6391                 &m_resDistortionBuffer,
6392                 CodechalDbgAttr::attrOutput,
6393                 "DistortionSurf",
6394                 m_picWidthInMb * m_frameFieldHeightInMb * 48,
6395                 CodecHal_PictureIsBottomField(currRefList.RefPic) ? MOS_ALIGN_CEIL(m_frameFieldHeightInMb * m_picWidthInMb * 48, 0x1000) : 0,
6396                 (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
6397                 CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
6398         }
6399 
6400         )
6401 
6402             CODECHAL_DEBUG_TOOL(
6403                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
6404                     this,
6405                     &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
6406                     m_encBCombinedBuffer2[m_currRecycledBufIdx].dwSize,
6407                     (const char*)"_Hevc_CombinedBuffer2",
6408                     false));
6409         );
6410 
6411         // Dump SW scoreboard surface - Output of MBENC
6412         CODECHAL_DEBUG_TOOL(
6413             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpHevcEncodeSwScoreboardSurface(
6414                 m_debugInterface,
6415                 m_swScoreboardState->GetCurSwScoreboardSurface(), false));
6416         );
6417 
6418         CODECHAL_DEBUG_TOOL(
6419             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
6420                 this,
6421                 &m_encConstantTableForB.sResource,
6422                 m_encConstantTableForB.dwSize,
6423                 (const char*)"_Hevc_EncConstantTable",
6424                 true));
6425         );
6426 
6427         CODECHAL_DEBUG_TOOL(
6428             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
6429                 this,
6430                 &m_debugSurface[0].sResource,
6431                 m_debugSurface[0].dwSize,
6432                 (const char*)"_Hevc_DebugDump0",
6433                 true));
6434         );
6435 
6436         CODECHAL_DEBUG_TOOL(
6437             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
6438                 this,
6439                 &m_debugSurface[1].sResource,
6440                 m_debugSurface[1].dwSize,
6441                 (const char*)"_Hevc_DebugDump1",
6442                 true));
6443         );
6444 
6445         CODECHAL_DEBUG_TOOL(
6446             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
6447                 this,
6448                 &m_debugSurface[2].sResource,
6449                 m_debugSurface[2].dwSize,
6450                 (const char*)"_Hevc_DebugDump2",
6451                 true));
6452         );
6453 
6454         CODECHAL_DEBUG_TOOL(
6455             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
6456                 this,
6457                 &m_debugSurface[3].sResource,
6458                 m_debugSurface[3].dwSize,
6459                 (const char*)"_Hevc_DebugDump3",
6460                 true));
6461         );
6462 
6463         CODECHAL_DEBUG_TOOL(
6464             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
6465                 &m_currPicWithReconBoundaryPix,
6466                 CodechalDbgAttr::attrReconstructedSurface,
6467                 "ReconSurf")));
6468     }
6469 #endif
6470 
6471     return eStatus;
6472 }
6473 
EncodeBrcInitResetKernel()6474 MOS_STATUS CodechalEncHevcStateG12::EncodeBrcInitResetKernel()
6475 {
6476     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6477 
6478     CODECHAL_ENCODE_FUNCTION_ENTER;
6479 
6480     CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
6481 
6482     CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx = m_brcInit ? CODECHAL_HEVC_BRC_INIT : CODECHAL_HEVC_BRC_RESET;
6483 
6484     // Initialize DSH kernel state
6485     PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
6486 
6487     // If Single Task Phase is not enabled, use BT count for the kernel state.
6488     if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
6489     {
6490         uint32_t maxBtCount = m_singleTaskPhaseSupported ? m_maxBtCount : kernelState->KernelParams.iBTCount;
6491         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
6492             m_stateHeapInterface,
6493             maxBtCount));
6494         m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
6495         CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
6496     }
6497 
6498     // Set up the DSH/SSH as normal
6499     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
6500         m_stateHeapInterface,
6501         kernelState,
6502         false,
6503         0,
6504         false,
6505         m_storeData));
6506 
6507     MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
6508     MOS_ZeroMemory(&idParams, sizeof(idParams));
6509     idParams.pKernelState = kernelState;
6510     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
6511         m_stateHeapInterface,
6512         1,
6513         &idParams));
6514 
6515     // Setup curbe for BrcInitReset kernel
6516     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeBrcInitReset(
6517         brcKrnIdx));
6518 
6519     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_BRC_INIT_RESET;
6520     CODECHAL_DEBUG_TOOL(
6521         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6522             encFunctionType,
6523             MHW_DSH_TYPE,
6524             kernelState));
6525         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
6526             encFunctionType,
6527             kernelState));
6528         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6529             encFunctionType,
6530             MHW_ISH_TYPE,
6531             kernelState));)
6532 
6533     MOS_COMMAND_BUFFER cmdBuffer;
6534     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
6535 
6536     SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
6537     sendKernelCmdsParams.EncFunctionType      = encFunctionType;
6538     sendKernelCmdsParams.pKernelState         = kernelState;
6539     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
6540 
6541     // Add binding table
6542     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
6543         m_stateHeapInterface,
6544         kernelState));
6545 
6546     // Send surfaces for BrcInitReset Kernel
6547     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendBrcInitResetSurfaces(&cmdBuffer, brcKrnIdx));
6548 
6549     MHW_MEDIA_OBJECT_PARAMS mediaObjectParams;
6550     MOS_ZeroMemory(&mediaObjectParams, sizeof(mediaObjectParams));
6551 
6552     MediaObjectInlineData mediaObjectInlineData;
6553     MOS_ZeroMemory(&mediaObjectInlineData, sizeof(mediaObjectInlineData));
6554     mediaObjectParams.pInlineData      = &mediaObjectInlineData;
6555     mediaObjectParams.dwInlineDataSize = sizeof(mediaObjectInlineData);
6556     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObject(
6557         &cmdBuffer,
6558         nullptr,
6559         &mediaObjectParams));
6560 
6561     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
6562 
6563     // Add dump for BrcInitReset surface state heap here
6564     CODECHAL_DEBUG_TOOL(
6565         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6566             encFunctionType,
6567             MHW_SSH_TYPE,
6568             kernelState));)
6569 
6570     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
6571         m_stateHeapInterface,
6572         kernelState));
6573 
6574     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6575     {
6576         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
6577             m_stateHeapInterface));
6578         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(
6579             &cmdBuffer,
6580             nullptr));
6581     }
6582 
6583     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
6584         &cmdBuffer,
6585         encFunctionType,
6586         nullptr)));
6587 
6588     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
6589 
6590     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
6591 
6592     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6593     {
6594         m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
6595         m_lastTaskInPhase = false;
6596     }
6597 
6598     return eStatus;
6599 }
6600 
EncodeBrcFrameUpdateKernel()6601 MOS_STATUS CodechalEncHevcStateG12::EncodeBrcFrameUpdateKernel()
6602 {
6603     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6604 
6605     CODECHAL_ENCODE_FUNCTION_ENTER;
6606 
6607     PerfTagSetting perfTag;
6608     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE);
6609 
6610     CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx = CODECHAL_HEVC_BRC_FRAME_UPDATE;
6611 
6612     // Initialize DSH kernel state
6613     PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
6614 
6615     // If Single Task Phase is not enabled, use BT count for the kernel state.
6616     if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
6617     {
6618         uint32_t maxBtCount = m_singleTaskPhaseSupported ? m_maxBtCount : kernelState->KernelParams.iBTCount;
6619         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
6620             m_stateHeapInterface,
6621             maxBtCount));
6622         m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
6623         CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
6624     }
6625 
6626     // Set up the DSH/SSH as normal
6627     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
6628         m_stateHeapInterface,
6629         kernelState,
6630         false,
6631         0,
6632         false,
6633         m_storeData));
6634 
6635     MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
6636     MOS_ZeroMemory(&idParams, sizeof(idParams));
6637     idParams.pKernelState = kernelState;
6638     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
6639         m_stateHeapInterface,
6640         1,
6641         &idParams));
6642 
6643     // Setup curbe for BrcFrameUpdate kernel
6644     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeBrcUpdate(
6645         brcKrnIdx));
6646 
6647     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_BRC_UPDATE;
6648     CODECHAL_DEBUG_TOOL(
6649         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6650             encFunctionType,
6651             MHW_DSH_TYPE,
6652             kernelState));
6653         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
6654             encFunctionType,
6655             kernelState));
6656         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6657             encFunctionType,
6658             MHW_ISH_TYPE,
6659             kernelState));)
6660 
6661     MOS_COMMAND_BUFFER cmdBuffer;
6662     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
6663 
6664     SendKernelCmdsParams sendKernelCmdsParams;
6665     sendKernelCmdsParams                 = SendKernelCmdsParams();
6666     sendKernelCmdsParams.EncFunctionType = encFunctionType;
6667     sendKernelCmdsParams.pKernelState    = kernelState;
6668     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
6669 
6670     // Add binding table
6671     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
6672         m_stateHeapInterface,
6673         kernelState));
6674 
6675     // Send surfaces for BrcFrameUpdate Kernel
6676     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendBrcFrameUpdateSurfaces(&cmdBuffer));
6677 
6678     MHW_MEDIA_OBJECT_PARAMS mediaObjectParams;
6679     MOS_ZeroMemory(&mediaObjectParams, sizeof(mediaObjectParams));
6680 
6681     MediaObjectInlineData mediaObjectInlineData;
6682     MOS_ZeroMemory(&mediaObjectInlineData, sizeof(mediaObjectInlineData));
6683     mediaObjectParams.pInlineData      = &mediaObjectInlineData;
6684     mediaObjectParams.dwInlineDataSize = sizeof(mediaObjectInlineData);
6685     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObject(
6686         &cmdBuffer,
6687         nullptr,
6688         &mediaObjectParams));
6689 
6690     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
6691 
6692     // Add dump for BrcFrameUpdate surface state heap here
6693     CODECHAL_DEBUG_TOOL(
6694         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6695             encFunctionType,
6696             MHW_SSH_TYPE,
6697             kernelState));)
6698     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
6699         m_stateHeapInterface,
6700         kernelState));
6701 
6702     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6703     {
6704         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
6705             m_stateHeapInterface));
6706         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(
6707             &cmdBuffer,
6708             nullptr));
6709     }
6710 
6711     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
6712         &cmdBuffer,
6713         encFunctionType,
6714         nullptr)));
6715 
6716     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
6717 
6718     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
6719 
6720     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6721     {
6722         m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
6723         m_lastTaskInPhase = false;
6724     }
6725 
6726     return eStatus;
6727 }
6728 
EncodeBrcLcuUpdateKernel()6729 MOS_STATUS CodechalEncHevcStateG12::EncodeBrcLcuUpdateKernel()
6730 {
6731     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6732 
6733     CODECHAL_ENCODE_FUNCTION_ENTER;
6734 
6735     PerfTagSetting perfTag;
6736     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE_LCU);
6737 
6738     CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx = CODECHAL_HEVC_BRC_LCU_UPDATE;
6739 
6740     // Initialize DSH kernel state
6741     PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
6742 
6743     // If Single Task Phase is not enabled, use BT count for the kernel state.
6744     if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
6745     {
6746         uint32_t maxBtCount = m_singleTaskPhaseSupported ? m_maxBtCount : kernelState->KernelParams.iBTCount;
6747         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
6748             m_stateHeapInterface,
6749             maxBtCount));
6750         m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
6751         CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
6752     }
6753 
6754     // Set up the DSH/SSH as normal
6755     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
6756         m_stateHeapInterface,
6757         kernelState,
6758         false,
6759         0,
6760         false,
6761         m_storeData));
6762 
6763     MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
6764     MOS_ZeroMemory(&idParams, sizeof(idParams));
6765     idParams.pKernelState = kernelState;
6766     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
6767         m_stateHeapInterface,
6768         1,
6769         &idParams));
6770 
6771     // Setup curbe for BrcFrameUpdate kernel
6772     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeBrcUpdate(
6773         brcKrnIdx));
6774 
6775     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_MB_BRC_UPDATE;
6776     CODECHAL_DEBUG_TOOL(
6777         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6778             encFunctionType,
6779             MHW_DSH_TYPE,
6780             kernelState));
6781         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
6782             encFunctionType,
6783             kernelState));
6784         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6785             encFunctionType,
6786             MHW_ISH_TYPE,
6787             kernelState));)
6788 
6789     MOS_COMMAND_BUFFER cmdBuffer;
6790     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
6791 
6792     SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
6793     sendKernelCmdsParams.EncFunctionType      = encFunctionType;
6794     sendKernelCmdsParams.pKernelState         = kernelState;
6795     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
6796 
6797     // Add binding table
6798     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
6799         m_stateHeapInterface,
6800         kernelState));
6801 
6802     if (m_hevcPicParams->NumROI)
6803     {
6804         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupROISurface());
6805     }
6806 
6807     // Send surfaces for BrcFrameUpdate Kernel
6808     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendBrcLcuUpdateSurfaces(&cmdBuffer));
6809 
6810     // Program Media walker
6811     uint32_t resolutionX, resolutionY;
6812     resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth);
6813     resolutionX = MOS_ROUNDUP_SHIFT(resolutionX, 4);
6814     resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight);
6815     resolutionY = MOS_ROUNDUP_SHIFT(resolutionY, 3);
6816     CODECHAL_ENCODE_ASSERTMESSAGE("LucBRC thread space = %d x %d", resolutionX, resolutionY);
6817 
6818     MHW_WALKER_PARAMS walkerParams;
6819     MOS_ZeroMemory(&walkerParams, sizeof(walkerParams));
6820 
6821     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
6822     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
6823     walkerCodecParams.WalkerMode              = m_walkerMode;
6824     walkerCodecParams.dwResolutionX           = resolutionX;
6825     walkerCodecParams.dwResolutionY           = resolutionY;
6826     walkerCodecParams.bNoDependency           = true;
6827     walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported;
6828     walkerCodecParams.ucGroupId               = m_groupId;
6829     walkerCodecParams.wPictureCodingType      = m_pictureCodingType;
6830     walkerCodecParams.bUseScoreboard          = false;
6831 
6832     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
6833         m_hwInterface,
6834         &walkerParams,
6835         &walkerCodecParams));
6836 
6837     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObjectWalkerCmd(
6838         &cmdBuffer,
6839         &walkerParams));
6840 
6841     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
6842 
6843     // Add dump for BrcFrameUpdate surface state heap here
6844     CODECHAL_DEBUG_TOOL(
6845         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6846             encFunctionType,
6847             MHW_SSH_TYPE,
6848             kernelState));)
6849 
6850     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
6851         m_stateHeapInterface,
6852         kernelState));
6853 
6854     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6855     {
6856         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
6857             m_stateHeapInterface));
6858         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(
6859             &cmdBuffer,
6860             nullptr));
6861     }
6862 
6863     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
6864         &cmdBuffer,
6865         encFunctionType,
6866         nullptr)));
6867 
6868     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
6869 
6870     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
6871 
6872     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6873     {
6874         m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
6875         m_lastTaskInPhase = false;
6876     }
6877 
6878     return eStatus;
6879 }
6880 
EncodeKernelFunctions()6881 MOS_STATUS CodechalEncHevcStateG12::EncodeKernelFunctions()
6882 {
6883     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6884 
6885     if (m_pakOnlyTest)
6886     {
6887         // Skip ENC when PAK only mode is enabled
6888         return eStatus;
6889     }
6890 
6891     if (m_pictureCodingType == P_TYPE)
6892     {
6893         m_lowDelay = true;
6894     }
6895 
6896     if (m_hevcPicParams->bUsedAsRef || m_brcEnabled)
6897     {
6898         m_currRefSync = &m_refSync[m_currMbCodeIdx];
6899 
6900         // Check if the signal obj has been used before
6901         if (!m_hevcSeqParams->ParallelBRC && (m_currRefSync->uiSemaphoreObjCount || m_currRefSync->bInUsed))
6902         {
6903             MOS_SYNC_PARAMS syncParams  = g_cInitSyncParams;
6904             syncParams.GpuContext       = m_renderContext;
6905             syncParams.presSyncResource = &m_currRefSync->resSyncObject;
6906             syncParams.uiSemaphoreCount = m_currRefSync->uiSemaphoreObjCount;
6907 
6908             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
6909             m_currRefSync->uiSemaphoreObjCount = 0;
6910             m_currRefSync->bInUsed             = false;
6911         }
6912     }
6913     else
6914     {
6915         m_currRefSync = nullptr;
6916     }
6917 
6918     //Reset to use a different performance tag ID
6919     m_osInterface->pfnResetPerfBufferID(m_osInterface);
6920 
6921     m_firstTaskInPhase = true;
6922     m_lastTaskInPhase  = false;
6923 
6924     m_brcInputForEncKernelBuffer = &m_encBCombinedBuffer2[m_currRecycledBufIdx];
6925 
6926     // BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface
6927     // BRC init is called once even for CQP mode when ROI is enabled, hence also checking for first frame flag
6928     if ((m_brcEnabled && (m_brcInit || m_brcReset)) || (m_firstFrame && m_hevcPicParams->NumROI))
6929     {
6930         m_firstTaskInPhase = m_lastTaskInPhase = true;
6931         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hevcBrcG12->EncodeBrcInitResetKernel());
6932         m_brcInit = m_brcReset = false;
6933     }
6934 
6935     m_firstTaskInPhase = true;
6936     m_lastTaskInPhase  = false;
6937 
6938     CodechalEncodeSwScoreboard::KernelParams swScoreboardKernelParames;
6939     MOS_ZeroMemory(&swScoreboardKernelParames, sizeof(swScoreboardKernelParames));
6940 
6941     InitSwScoreBoardParams(swScoreboardKernelParames);
6942 
6943     if (m_useSwInitScoreboard)
6944     {
6945         SetupSwScoreBoard(&swScoreboardKernelParames);
6946     }
6947     else
6948     {
6949         // Call SW scoreboard Init kernel used by MBEnc kernel
6950         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_swScoreboardState->Execute(&swScoreboardKernelParames));
6951     }
6952 
6953     // Dump SW scoreboard surface - Output of SW scoreboard Init Kernel and Input to MBENC
6954     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
6955         m_swScoreboardState->GetCurSwScoreboardSurface(),
6956         CodechalDbgAttr::attrInput,
6957         "InitSWScoreboard_In",
6958         CODECHAL_MEDIA_STATE_SW_SCOREBOARD_INIT)));
6959 
6960     // Csc, Downscaling, and/or 10-bit to 8-bit conversion
6961     CODECHAL_ENCODE_CHK_NULL_RETURN(m_cscDsState);
6962 
6963     CodechalEncodeCscDs::KernelParams cscScalingKernelParams;
6964     MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams));
6965     cscScalingKernelParams.bLastTaskInPhaseCSC =
6966         cscScalingKernelParams.bLastTaskInPhase4xDS = !(m_16xMeSupported || m_hmeEnabled || m_brcEnabled);
6967     cscScalingKernelParams.bLastTaskInPhase16xDS    = !(m_32xMeSupported || m_hmeEnabled || m_brcEnabled);
6968     cscScalingKernelParams.bLastTaskInPhase32xDS    = !(m_hmeEnabled || m_brcEnabled);
6969 
6970     CodechalEncodeCscDsG12::HevcExtKernelParams hevcExtCscParams;
6971     MOS_ZeroMemory(&hevcExtCscParams, sizeof(hevcExtCscParams));
6972 
6973     if (m_isMaxLcu64)
6974     {
6975         hevcExtCscParams.bHevcEncHistorySum            = true;
6976         hevcExtCscParams.bUseLCU32                     = false;
6977         hevcExtCscParams.presHistoryBuffer             = &m_encBCombinedBuffer2[m_lastRecycledBufIdx].sResource;
6978         hevcExtCscParams.dwSizeHistoryBuffer           = m_historyOutBufferSize;
6979         hevcExtCscParams.dwOffsetHistoryBuffer         = m_historyOutBufferOffset;
6980         hevcExtCscParams.presHistorySumBuffer          = &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource;
6981         hevcExtCscParams.dwSizeHistorySumBuffer        = sizeof(MBENC_COMBINED_BUFFER2::ucHistoryInBuffer);
6982         hevcExtCscParams.dwOffsetHistorySumBuffer      = sizeof(MBENC_COMBINED_BUFFER2::ucBrcCombinedEncBuffer);
6983         hevcExtCscParams.presMultiThreadTaskBuffer     = &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource;
6984         hevcExtCscParams.dwSizeMultiThreadTaskBuffer   = m_threadTaskBufferSize;
6985         hevcExtCscParams.dwOffsetMultiThreadTaskBuffer = m_threadTaskBufferOffset;
6986         cscScalingKernelParams.hevcExtParams           = &hevcExtCscParams;
6987     }
6988     else
6989     {
6990         cscScalingKernelParams.hevcExtParams = nullptr;  // LCU32 does not require history buffers
6991     }
6992 
6993     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->KernelFunctions(&cscScalingKernelParams));
6994 
6995     if (m_hmeEnabled)
6996     {
6997         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel());
6998     }
6999     else if (m_brcEnabled && m_hevcPicParams->CodingType == I_TYPE)
7000     {
7001         m_lastTaskInPhase = true;
7002         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeIntraDistKernel());
7003     }
7004 
7005     // BRC + MbEnc in second task phase
7006     m_firstTaskInPhase = true;
7007     m_lastTaskInPhase  = false;
7008 
7009     // Wait for PAK if necessary
7010     CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitForPak());
7011 
7012     // ROI uses the BRC LCU update kernel, even in CQP.  So we will call it
7013     if (m_hevcPicParams->NumROI && !m_brcEnabled)
7014     {
7015         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hevcBrcG12->EncodeBrcLcuUpdateKernel());
7016         m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame;
7017 
7018         CODECHAL_DEBUG_TOOL(
7019             if (!Mos_ResourceIsNull(&m_brcBuffers.sBrcMbQpBuffer.OsResource)) {
7020                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7021                     &m_brcBuffers.sBrcMbQpBuffer.OsResource,
7022                     CodechalDbgAttr::attrOutput,
7023                     "MbQp",
7024                     m_brcBuffers.sBrcMbQpBuffer.dwPitch * m_brcBuffers.sBrcMbQpBuffer.dwHeight,
7025                     m_brcBuffers.dwBrcMbQpBottomFieldOffset,
7026                     CODECHAL_MEDIA_STATE_BRC_UPDATE));
7027             } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(&m_brcDistortion->OsResource,
7028                 CodechalDbgAttr::attrInput,
7029                 "BrcDist_AfterLcuBrc",
7030                 m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
7031                 m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
7032                 CODECHAL_MEDIA_STATE_BRC_UPDATE));)
7033     }
7034 
7035     if (m_brcEnabled)
7036     {
7037         m_hevcBrcG12->m_brcNumPakPasses = m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses();
7038         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hevcBrcG12->EncodeBrcFrameUpdateKernel());
7039 
7040         CODECHAL_DEBUG_TOOL(
7041             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7042                 &m_brcDistortion->OsResource,
7043                 CodechalDbgAttr::attrInput,
7044                 "BrcDist_AfterFrameBrc",
7045                 m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
7046                 m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
7047                 CODECHAL_MEDIA_STATE_BRC_UPDATE));
7048             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7049                 &m_brcBuffers.resBrcHistoryBuffer,
7050                 CodechalDbgAttr::attrOutput,
7051                 "HistoryWrite",
7052                 m_brcHistoryBufferSize,
7053                 0,
7054                 CODECHAL_MEDIA_STATE_BRC_UPDATE));
7055             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7056                 &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
7057                 CodechalDbgAttr::attrOutput,
7058                 "ImgStateWrite",
7059                 BRC_IMG_STATE_SIZE_PER_PASS_G11 * m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(),
7060                 0,
7061                 CODECHAL_MEDIA_STATE_BRC_UPDATE));)
7062 
7063         CODECHAL_DEBUG_TOOL(
7064             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7065                 &m_brcDistortion->OsResource,
7066                 CodechalDbgAttr::attrInput,
7067                 "BrcDist_AfterFrameBrcUpdate",
7068                 m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
7069                 m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
7070                 CODECHAL_MEDIA_STATE_BRC_UPDATE));
7071             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7072                 &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
7073                 CodechalDbgAttr::attrOutput,
7074                 "ImgStateWrite",
7075                 BRC_IMG_STATE_SIZE_PER_PASS * m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(),
7076                 0,
7077                 CODECHAL_MEDIA_STATE_BRC_UPDATE));
7078             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7079                 &m_brcBuffers.resBrcHistoryBuffer,
7080                 CodechalDbgAttr::attrOutput,
7081                 "HistoryWrite",
7082                 m_brcHistoryBufferSize,
7083                 0,
7084                 CODECHAL_MEDIA_STATE_BRC_UPDATE));
7085             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7086                 &m_brcBuffers.sBrcIntraDistortionBuffer.OsResource,
7087                 CodechalDbgAttr::attrOutput,
7088                 "Idistortion",
7089                 m_brcBuffers.sBrcIntraDistortionBuffer.dwWidth * m_brcBuffers.sBrcIntraDistortionBuffer.dwHeight,
7090                 0,
7091                 CODECHAL_MEDIA_STATE_BRC_UPDATE));)
7092 
7093         if (m_lcuBrcEnabled || m_hevcPicParams->NumROI)
7094         {
7095             // LCU-based BRC needs to have frame-based one to be call first in order to get HCP_IMG_STATE command result
7096             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hevcBrcG12->EncodeBrcLcuUpdateKernel());
7097             m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame;
7098         }
7099         else
7100         {
7101             m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame;
7102         }
7103 
7104         CODECHAL_DEBUG_TOOL(
7105             if (!Mos_ResourceIsNull(&m_brcBuffers.sBrcMbQpBuffer.OsResource)) {
7106                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7107                     &m_brcBuffers.sBrcMbQpBuffer.OsResource,
7108                     CodechalDbgAttr::attrOutput,
7109                     "MbQp",
7110                     m_brcBuffers.sBrcMbQpBuffer.dwPitch * m_brcBuffers.sBrcMbQpBuffer.dwHeight,
7111                     m_brcBuffers.dwBrcMbQpBottomFieldOffset,
7112                     CODECHAL_MEDIA_STATE_BRC_UPDATE));
7113             } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(&m_brcDistortion->OsResource,
7114                 CodechalDbgAttr::attrInput,
7115                 "BrcDist_AfterLcuBrcUpdate",
7116                 m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
7117                 m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
7118                 CODECHAL_MEDIA_STATE_BRC_UPDATE));)
7119     }
7120 
7121     m_useWeightedSurfaceForL0 = false;
7122     m_useWeightedSurfaceForL1 = false;
7123 
7124     //currently only support same weightoffset for all slices, and only support Luma weighted prediction
7125     auto slicetype = m_hevcSliceParams->slice_type;
7126     if (m_weightedPredictionSupported && !m_feiEnable &&
7127         ((slicetype == CODECHAL_HEVC_P_SLICE && m_hevcPicParams->weighted_pred_flag) ||
7128             (slicetype == CODECHAL_HEVC_B_SLICE && m_hevcPicParams->weighted_bipred_flag)))
7129     {
7130         uint32_t                      LumaWeightFlag[2] = {0};  //[L0, L1]
7131         CodechalEncodeWP::SliceParams sliceWPParams;
7132         MOS_FillMemory((void *)&sliceWPParams, sizeof(sliceWPParams), 0);
7133 
7134         //populate the slice WP parameter structure
7135         sliceWPParams.luma_log2_weight_denom = m_hevcSliceParams->luma_log2_weight_denom;  // luma weidht denom
7136         for (auto i = 0; i < 2; i++)
7137         {
7138             for (auto j = 0; j < CODEC_MAX_NUM_REF_FRAME_HEVC; j++)
7139             {
7140                 sliceWPParams.weights[i][j][0][0] = (1 << m_hevcSliceParams->luma_log2_weight_denom) +
7141                                                     m_hevcSliceParams->delta_luma_weight[i][j];  //Luma weight
7142                 sliceWPParams.weights[i][j][0][1] = m_hevcSliceParams->luma_offset[i][j];        //Luma offset
7143 
7144                 if (m_hevcSliceParams->delta_luma_weight[i][j] || m_hevcSliceParams->luma_offset[i][j])
7145                 {
7146                     LumaWeightFlag[i] |= (1 << j);
7147                 }
7148             }
7149         }
7150 
7151         CodechalEncodeWP::KernelParams wpKernelParams;
7152         MOS_FillMemory((void *)&wpKernelParams, sizeof(wpKernelParams), 0);
7153         wpKernelParams.useWeightedSurfaceForL0 = &m_useWeightedSurfaceForL0;
7154         wpKernelParams.useWeightedSurfaceForL1 = &m_useWeightedSurfaceForL1;
7155         wpKernelParams.slcWPParams             = &sliceWPParams;
7156 
7157         // Weighted Prediction to be applied for L0
7158         for (auto i = 0; i < (m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1); i++)
7159         {
7160             if ((LumaWeightFlag[LIST_0] & (1 << i)) && (i < CODEC_MAX_FORWARD_WP_FRAME))
7161             {
7162                 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][i];
7163                 if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
7164                 {
7165                     MOS_SURFACE refFrameInput;
7166                     uint8_t     frameIndex = m_picIdx[refPic.FrameIdx].ucPicIdx;
7167                     refFrameInput          = m_hevcPicParams->bUseRawPicForRef ? m_refList[frameIndex]->sRefRawBuffer : m_refList[frameIndex]->sRefReconBuffer;
7168 
7169                     //Weighted Prediction for ith forward reference frame
7170                     wpKernelParams.useRefPicList1 = false;
7171                     wpKernelParams.wpIndex        = i;
7172                     wpKernelParams.refFrameInput  = &refFrameInput;
7173                     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_wpState->Execute(&wpKernelParams));
7174                 }
7175             }
7176         }
7177 
7178         // Weighted Predition to be applied for L1
7179         if (slicetype == CODECHAL_HEVC_B_SLICE && m_hevcPicParams->weighted_bipred_flag)
7180         {
7181             for (auto i = 0; i < (m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1); i++)
7182             {
7183                 if ((LumaWeightFlag[LIST_1] & (1 << i)) && (i < CODEC_MAX_BACKWARD_WP_FRAME))
7184                 {
7185                     CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_1][i];
7186                     if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
7187                     {
7188                         MOS_SURFACE refFrameInput;
7189                         uint8_t     frameIndex = m_picIdx[refPic.FrameIdx].ucPicIdx;
7190                         refFrameInput          = m_hevcPicParams->bUseRawPicForRef ? m_refList[frameIndex]->sRefRawBuffer : m_refList[frameIndex]->sRefReconBuffer;
7191 
7192                         //Weighted Prediction for ith backward reference frame
7193                         wpKernelParams.useRefPicList1 = true;
7194                         wpKernelParams.wpIndex        = i;
7195                         wpKernelParams.refFrameInput  = &refFrameInput;
7196                         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_wpState->Execute(&wpKernelParams));
7197                     }
7198                 }
7199             }
7200         }
7201     }
7202 
7203     // Reset to use a different performance tag ID
7204     m_osInterface->pfnResetPerfBufferID(m_osInterface);
7205 
7206     m_lastTaskInPhase = true;
7207 
7208     if (m_hevcPicParams->CodingType == I_TYPE)
7209     {
7210         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMbEncKernel(CODECHAL_MEDIA_STATE_HEVC_I_MBENC));
7211     }
7212     else
7213     {
7214         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMbEncKernel(m_isMaxLcu64 ? CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC : CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
7215     }
7216 
7217     if (m_brcEnabled && m_enableFramePanicMode && (false == m_hevcSeqParams->DisableHRDConformance) &&
7218         m_skipFrameInfo.numSlices != m_numSlices)  // 'numSlices != m_numSlices' check is to re-generate surface if slice layout changed from previous frame
7219     {
7220         CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateSkipFrameMbCodeSurface(m_skipFrameInfo));
7221     }
7222 
7223     // Notify PAK engine once ENC is done
7224     if (!Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
7225     {
7226         MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
7227         if (m_useMdf)
7228         {
7229             if (!m_computeContextEnabled)
7230             {
7231                 syncParams.GpuContext = MOS_GPU_CONTEXT_RENDER3;  //MDF uses render3
7232             }
7233             else
7234             {
7235                 syncParams.GpuContext = MOS_GPU_CONTEXT_CM_COMPUTE;
7236             }
7237         }
7238         else
7239         {
7240             syncParams.GpuContext = m_renderContext;
7241         }
7242         syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
7243 
7244         uint32_t old_stream_index  = m_osInterface->streamIndex;
7245         m_osInterface->streamIndex = static_cast<CmQueueRT *>(m_cmQueue)->StreamIndex();
7246         CODECHAL_ENCODE_CHK_STATUS_RETURN(
7247             m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
7248         m_osInterface->streamIndex = old_stream_index;
7249     }
7250 
7251     if (m_brcEnabled)
7252     {
7253         if (m_hevcSeqParams->ParallelBRC)
7254         {
7255             m_brcBuffers.uiCurrBrcPakStasIdxForRead =
7256                 (m_brcBuffers.uiCurrBrcPakStasIdxForRead + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
7257         }
7258     }
7259 
7260     CODECHAL_DEBUG_TOOL(
7261         uint8_t       index;
7262         CODEC_PICTURE refPic;
7263         if (m_useWeightedSurfaceForL0) {
7264             refPic = m_hevcSliceParams->RefPicList[LIST_0][0];
7265             index  = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
7266 
7267             CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
7268                 &m_refList[index]->sRefBuffer,
7269                 CodechalDbgAttr::attrReferenceSurfaces,
7270                 "WP_In_L0")));
7271 
7272             CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
7273                 m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L0_START + 0),
7274                 CodechalDbgAttr::attrReferenceSurfaces,
7275                 "WP_Out_L0")));
7276         } if (m_useWeightedSurfaceForL1) {
7277             refPic = m_hevcSliceParams->RefPicList[LIST_1][0];
7278             index  = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
7279 
7280             CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
7281                 &m_refList[index]->sRefBuffer,
7282                 CodechalDbgAttr::attrReferenceSurfaces,
7283                 "WP_In_L1")));
7284 
7285             CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
7286                 m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L1_START + 0),
7287                 CodechalDbgAttr::attrReferenceSurfaces,
7288                 "WP_Out_L1")));
7289         })
7290 
7291     m_lastPictureCodingType = m_pictureCodingType;
7292     m_lastRecycledBufIdx    = m_currRecycledBufIdx;
7293 
7294     return eStatus;
7295 }
7296 
EncodeIntraDistKernel()7297 MOS_STATUS CodechalEncHevcStateG12::EncodeIntraDistKernel()
7298 {
7299     CodechalKernelIntraDist::CurbeParam curbeParam;
7300     curbeParam.downScaledWidthInMb4x  = m_downscaledWidthInMb4x;
7301     curbeParam.downScaledHeightInMb4x = m_downscaledHeightInMb4x;
7302 
7303     CodechalKernelIntraDist::SurfaceParams surfaceParam;
7304     surfaceParam.input4xDsSurface =
7305         surfaceParam.input4xDsVmeSurface    = m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
7306     surfaceParam.intraDistSurface           = m_brcDistortion;
7307     surfaceParam.intraDistBottomFieldOffset = m_brcBuffers.dwMeBrcDistortionBottomFieldOffset;
7308     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_intraDistKernel->Execute(curbeParam, surfaceParam));
7309 
7310     return MOS_STATUS_SUCCESS;
7311 }
7312 
InitKernelState()7313 MOS_STATUS CodechalEncHevcStateG12::InitKernelState()
7314 {
7315     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7316 
7317     CODECHAL_ENCODE_FUNCTION_ENTER;
7318 
7319     // Init kernel state
7320     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMbEnc());
7321     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateBrc());
7322 
7323     // Create weighted prediction kernel state
7324     CODECHAL_ENCODE_CHK_NULL_RETURN(m_wpState = MOS_New(CodechalEncodeWPG12, this));
7325     m_wpState->SetKernelBase(m_kernelBase);
7326     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_wpState->InitKernelState());
7327     // create intra distortion kernel
7328     m_intraDistKernel = MOS_New(CodechalKernelIntraDist, this);
7329     CODECHAL_ENCODE_CHK_NULL_RETURN(m_intraDistKernel);
7330     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_intraDistKernel->Initialize(
7331         GetCommonKernelHeaderAndSizeG12,
7332         m_kernelBase,
7333         m_kuidCommon));
7334 
7335     // Create SW scoreboard init kernel state
7336     CODECHAL_ENCODE_CHK_NULL_RETURN(m_swScoreboardState = MOS_New(CodechalEncodeSwScoreboardG12, this));
7337     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_swScoreboardState->InitKernelState());
7338     // Create Hme kernel
7339     m_hmeKernel = MOS_New(CodechalKernelHmeG12, this);
7340     CODECHAL_ENCODE_CHK_NULL_RETURN(m_hmeKernel);
7341     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Initialize(
7342         GetCommonKernelHeaderAndSizeG12,
7343         m_kernelBase,
7344         m_kuidCommon));
7345 
7346     return eStatus;
7347 }
7348 
SetDmemHuCPakIntegrate(PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)7349 MOS_STATUS CodechalEncHevcStateG12::SetDmemHuCPakIntegrate(
7350     PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)
7351 {
7352     CODECHAL_ENCODE_FUNCTION_ENTER;
7353 
7354     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7355 
7356     MOS_LOCK_PARAMS lockFlagsWriteOnly;
7357     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
7358     lockFlagsWriteOnly.WriteOnly = true;
7359 
7360     int32_t currentPass = GetCurrentPass();
7361     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES || !m_brcEnabled)
7362     {
7363         eStatus = MOS_STATUS_INVALID_PARAMETER;
7364         return eStatus;
7365     }
7366 
7367     HucPakStitchDmemEncG12 *hucPakStitchDmem = (HucPakStitchDmemEncG12 *)m_osInterface->pfnLockResource(
7368         m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]), &lockFlagsWriteOnly);
7369     CODECHAL_ENCODE_CHK_NULL_RETURN(hucPakStitchDmem);
7370 
7371     MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakStitchDmemEncG12));
7372 
7373     // reset all the offsets to -1
7374     uint32_t TotalOffsetSize = sizeof(hucPakStitchDmem->TileSizeRecord_offset) +
7375                                sizeof(hucPakStitchDmem->VDENCSTAT_offset) +
7376                                sizeof(hucPakStitchDmem->HEVC_PAKSTAT_offset) +
7377                                sizeof(hucPakStitchDmem->HEVC_Streamout_offset) +
7378                                sizeof(hucPakStitchDmem->VP9_PAK_STAT_offset) +
7379                                sizeof(hucPakStitchDmem->Vp9CounterBuffer_offset);
7380     MOS_FillMemory(hucPakStitchDmem, TotalOffsetSize, 0xFF);
7381 
7382     uint16_t numTileRows    = m_hevcPicParams->num_tile_rows_minus1 + 1;
7383     uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
7384     CODECHAL_ENCODE_ASSERT(numTileColumns > 0 && numTileColumns % 2 == 0);                       //numTileColumns is nonzero and even number; 2 or 4
7385     CODECHAL_ENCODE_ASSERT(m_numPipe > 0 && m_numPipe % 2 == 0 && numTileColumns <= m_numPipe);  //ucNumPipe is nonzero and even number; 2 or 4
7386     uint16_t numTiles        = numTileRows * numTileColumns;
7387     uint16_t numTilesPerPipe = m_numTiles / m_numPipe;
7388 
7389     hucPakStitchDmem->PicWidthInPixel          = (uint16_t)m_frameWidth;
7390     hucPakStitchDmem->PicHeightInPixel         = (uint16_t)m_frameHeight;
7391     hucPakStitchDmem->TotalNumberOfPAKs        = m_numPipe;
7392     hucPakStitchDmem->Codec                    = 1;  // 1: HEVC DP; 2: HEVC VDEnc; 3: VP9 VDEnc
7393     hucPakStitchDmem->MAXPass                  = m_brcEnabled ? (m_numPassesInOnePipe + 1) : 1;
7394     hucPakStitchDmem->CurrentPass              = (uint8_t)currentPass + 1;  // // Current BRC pass [1..MAXPass]
7395     hucPakStitchDmem->MinCUSize                = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
7396     hucPakStitchDmem->CabacZeroWordFlag        = true;                                          // to do: set to true later
7397     hucPakStitchDmem->bitdepth_luma            = m_hevcSeqParams->bit_depth_luma_minus8 + 8;    // default: 8
7398     hucPakStitchDmem->bitdepth_chroma          = m_hevcSeqParams->bit_depth_chroma_minus8 + 8;  // default: 8
7399     hucPakStitchDmem->ChromaFormatIdc          = m_hevcSeqParams->chroma_format_idc;
7400     hucPakStitchDmem->TotalSizeInCommandBuffer = m_numTiles * CODECHAL_CACHELINE_SIZE;
7401     // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record
7402     hucPakStitchDmem->OffsetInCommandBuffer   = m_tileParams[m_numTiles - 1].TileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8;
7403     hucPakStitchDmem->LastTileBS_StartInBytes = m_tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
7404 
7405     hucPakStitchDmem->StitchEnable        = false;
7406     hucPakStitchDmem->StitchCommandOffset = 0;
7407     hucPakStitchDmem->BBEndforStitch      = HUC_BATCH_BUFFER_END;
7408     hucPakStitchDmem->brcUnderFlowEnable  = false;  //temporally disable underflow bit rate control in HUC fw since it need more tuning.
7409 
7410     PCODEC_ENCODER_SLCDATA slcData = m_slcData;
7411     CODECHAL_ENCODE_CHK_NULL_RETURN(slcData);
7412     uint32_t totalSliceHeaderSize = 0;
7413     for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
7414     {
7415         totalSliceHeaderSize += (slcData->BitSize + 7) >> 3;
7416         slcData++;
7417     }
7418     hucPakStitchDmem->SliceHeaderSizeinBits = totalSliceHeaderSize * 8;
7419     hucPakStitchDmem->currFrameBRClevel     = m_currFrameBrcLevel;
7420 
7421     //Set the kernel output offsets
7422     hucPakStitchDmem->TileSizeRecord_offset[0] = m_hevcFrameStatsOffset.uiTileSizeRecord;
7423     hucPakStitchDmem->HEVC_PAKSTAT_offset[0]   = m_hevcFrameStatsOffset.uiHevcPakStatistics;
7424     hucPakStitchDmem->HEVC_Streamout_offset[0] = 0xFFFFFFFF;
7425     hucPakStitchDmem->VDENCSTAT_offset[0]      = 0xFFFFFFFF;
7426 
7427     for (auto i = 0; i < m_numPipe; i++)
7428     {
7429         hucPakStitchDmem->NumTiles[i] = numTilesPerPipe;
7430 
7431         // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic.
7432         // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region.
7433         hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiTileSizeRecord) +
7434                                                          m_hevcTileStatsOffset.uiTileSizeRecord;
7435         hucPakStitchDmem->HEVC_PAKSTAT_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiHevcPakStatistics) +
7436                                                        m_hevcTileStatsOffset.uiHevcPakStatistics;
7437     }
7438 
7439     m_osInterface->pfnUnlockResource(m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]));
7440 
7441     MOS_ZeroMemory(dmemParams, sizeof(MHW_VDBOX_HUC_DMEM_STATE_PARAMS));
7442     dmemParams->presHucDataSource = &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]);
7443     dmemParams->dwDataLength      = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemEncG12), CODECHAL_CACHELINE_SIZE);
7444     dmemParams->dwDmemOffset      = HUC_DMEM_OFFSET_RTOS_GEMS;
7445 
7446     return eStatus;
7447 }
7448 
SetRegionsHuCPakIntegrate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)7449 MOS_STATUS CodechalEncHevcStateG12::SetRegionsHuCPakIntegrate(
7450     PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
7451 {
7452     CODECHAL_ENCODE_FUNCTION_ENTER;
7453 
7454     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7455 
7456     int32_t currentPass = GetCurrentPass();
7457     if (currentPass < 0 ||
7458         (currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES && m_brcEnabled) ||
7459         (currentPass != 0 && m_cqpEnabled))
7460     {
7461         eStatus = MOS_STATUS_INVALID_PARAMETER;
7462         return eStatus;
7463     }
7464 
7465     CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
7466     MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
7467     // Add Virtual addr
7468     virtualAddrParams->regionParams[0].presRegion  = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;  // Region 0 - Tile based input statistics from PAK/ VDEnc
7469     virtualAddrParams->regionParams[0].dwOffset    = 0;
7470     virtualAddrParams->regionParams[1].presRegion  = &m_resHuCPakAggregatedFrameStatsBuffer.sResource;  // Region 1 - HuC Frame statistics output
7471     virtualAddrParams->regionParams[1].isWritable  = true;
7472     virtualAddrParams->regionParams[4].presRegion  = &m_resBitstreamBuffer;  // Region 4 - Last Tile bitstream
7473     virtualAddrParams->regionParams[5].presRegion  = &m_resBitstreamBuffer;  // Region 5 - HuC modifies the last tile bitstream before stitch command
7474     virtualAddrParams->regionParams[5].isWritable  = true;
7475     virtualAddrParams->regionParams[6].presRegion  = &m_brcBuffers.resBrcHistoryBuffer;  // Region 6  History Buffer (Input/Output)
7476     virtualAddrParams->regionParams[6].isWritable  = true;
7477     virtualAddrParams->regionParams[7].presRegion  = &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx];  //&m_resHucPakStitchReadBatchBuffer;             // Region 7 - HCP PIC state command
7478     virtualAddrParams->regionParams[9].presRegion  = &m_resBrcDataBuffer;                                               // Region 9  HuC outputs BRC data
7479     virtualAddrParams->regionParams[9].isWritable  = true;
7480     virtualAddrParams->regionParams[8].presRegion  = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass];  // Region 8 - data buffer read by HUC for stitching cmd generation
7481     virtualAddrParams->regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource;                         // Region 10 - SLB for stitching cmd output from Huc
7482     virtualAddrParams->regionParams[10].isWritable = true;
7483     virtualAddrParams->regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource;  // Region 15 [In/Out] - Tile Record Buffer
7484     virtualAddrParams->regionParams[15].dwOffset   = 0;
7485 
7486     return eStatus;
7487 }
7488 
SetDmemHuCPakIntegrateCqp(PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)7489 MOS_STATUS CodechalEncHevcStateG12::SetDmemHuCPakIntegrateCqp(
7490     PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)
7491 {
7492     CODECHAL_ENCODE_FUNCTION_ENTER;
7493 
7494     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7495 
7496     MOS_LOCK_PARAMS lockFlagsWriteOnly;
7497     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
7498     lockFlagsWriteOnly.WriteOnly = true;
7499 
7500     int32_t currentPass = GetCurrentPass();
7501     if (currentPass != 0 || (!m_cqpEnabled && m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ))
7502     {
7503         eStatus = MOS_STATUS_INVALID_PARAMETER;
7504         return eStatus;
7505     }
7506 
7507     HucPakStitchDmemEncG12 *hucPakStitchDmem = (HucPakStitchDmemEncG12 *)m_osInterface->pfnLockResource(
7508         m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]), &lockFlagsWriteOnly);
7509     CODECHAL_ENCODE_CHK_NULL_RETURN(hucPakStitchDmem);
7510 
7511     MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakStitchDmemEncG12));
7512 
7513     // reset all the offsets to -1
7514     uint32_t TotalOffsetSize = sizeof(hucPakStitchDmem->TileSizeRecord_offset) +
7515                                sizeof(hucPakStitchDmem->VDENCSTAT_offset) +
7516                                sizeof(hucPakStitchDmem->HEVC_PAKSTAT_offset) +
7517                                sizeof(hucPakStitchDmem->HEVC_Streamout_offset) +
7518                                sizeof(hucPakStitchDmem->VP9_PAK_STAT_offset) +
7519                                sizeof(hucPakStitchDmem->Vp9CounterBuffer_offset);
7520     MOS_FillMemory(hucPakStitchDmem, TotalOffsetSize, 0xFF);
7521 
7522     uint16_t numTileRows    = m_hevcPicParams->num_tile_rows_minus1 + 1;
7523     uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
7524     CODECHAL_ENCODE_ASSERT(numTileColumns > 0 && numTileColumns % 2 == 0);                       //numTileColumns is nonzero and even number; 2 or 4
7525     CODECHAL_ENCODE_ASSERT(m_numPipe > 0 && m_numPipe % 2 == 0 && numTileColumns <= m_numPipe);  //ucNumPipe is nonzero and even number; 2 or 4
7526     uint16_t numTiles        = numTileRows * numTileColumns;
7527     uint16_t numTilesPerPipe = m_numTiles / m_numPipe;
7528 
7529     hucPakStitchDmem->PicWidthInPixel          = (uint16_t)m_frameWidth;
7530     hucPakStitchDmem->PicHeightInPixel         = (uint16_t)m_frameHeight;
7531     hucPakStitchDmem->TotalNumberOfPAKs        = m_numPipe;
7532     hucPakStitchDmem->Codec                    = 2;  //HEVC DP CQP
7533     hucPakStitchDmem->MAXPass                  = 1;
7534     hucPakStitchDmem->CurrentPass              = 1;
7535     hucPakStitchDmem->MinCUSize                = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
7536     hucPakStitchDmem->CabacZeroWordFlag        = true;
7537     hucPakStitchDmem->bitdepth_luma            = m_hevcSeqParams->bit_depth_luma_minus8 + 8;    // default: 8
7538     hucPakStitchDmem->bitdepth_chroma          = m_hevcSeqParams->bit_depth_chroma_minus8 + 8;  // default: 8
7539     hucPakStitchDmem->ChromaFormatIdc          = m_hevcSeqParams->chroma_format_idc;
7540     hucPakStitchDmem->TotalSizeInCommandBuffer = m_numTiles * CODECHAL_CACHELINE_SIZE;
7541     // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record
7542     hucPakStitchDmem->OffsetInCommandBuffer   = m_tileParams[m_numTiles - 1].TileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8;
7543     hucPakStitchDmem->LastTileBS_StartInBytes = m_tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
7544 
7545     hucPakStitchDmem->StitchEnable        = false;
7546     hucPakStitchDmem->StitchCommandOffset = 0;
7547     hucPakStitchDmem->BBEndforStitch      = HUC_BATCH_BUFFER_END;
7548 
7549     //Set the kernel output offsets
7550     hucPakStitchDmem->TileSizeRecord_offset[0] = m_hevcFrameStatsOffset.uiTileSizeRecord;
7551     hucPakStitchDmem->HEVC_PAKSTAT_offset[0]   = 0xFFFFFFFF;
7552     hucPakStitchDmem->HEVC_Streamout_offset[0] = 0xFFFFFFFF;
7553     hucPakStitchDmem->VDENCSTAT_offset[0]      = 0xFFFFFFFF;
7554 
7555     for (auto i = 0; i < m_numPipe; i++)
7556     {
7557         hucPakStitchDmem->NumTiles[i] = numTilesPerPipe;
7558 
7559         // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic.
7560         // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region.
7561         hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiTileSizeRecord) +
7562                                                          m_hevcTileStatsOffset.uiTileSizeRecord;
7563     }
7564 
7565     m_osInterface->pfnUnlockResource(m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]));
7566 
7567     MOS_ZeroMemory(dmemParams, sizeof(MHW_VDBOX_HUC_DMEM_STATE_PARAMS));
7568     dmemParams->presHucDataSource = &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]);
7569     dmemParams->dwDataLength      = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemEncG12), CODECHAL_CACHELINE_SIZE);
7570     dmemParams->dwDmemOffset      = HUC_DMEM_OFFSET_RTOS_GEMS;
7571 
7572     return eStatus;
7573 }
7574 
ConfigStitchDataBuffer()7575 MOS_STATUS CodechalEncHevcStateG12::ConfigStitchDataBuffer()
7576 {
7577     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7578     CODECHAL_ENCODE_FUNCTION_ENTER;
7579     int32_t currentPass = GetCurrentPass();
7580     if (currentPass < 0 ||
7581         (currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES && m_brcEnabled) ||
7582         (currentPass != 0 && m_cqpEnabled))
7583     {
7584         eStatus = MOS_STATUS_INVALID_PARAMETER;
7585         return eStatus;
7586     }
7587 
7588     MOS_LOCK_PARAMS lockFlagsWriteOnly;
7589     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
7590     lockFlagsWriteOnly.WriteOnly = 1;
7591 
7592     HucCommandData *hucStitchDataBuf = (HucCommandData *)m_osInterface->pfnLockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass], &lockFlagsWriteOnly);
7593     CODECHAL_ENCODE_CHK_NULL_RETURN(hucStitchDataBuf);
7594 
7595     MOS_ZeroMemory(hucStitchDataBuf, sizeof(HucCommandData));
7596     hucStitchDataBuf->TotalCommands          = 1;
7597     hucStitchDataBuf->InputCOM[0].SizeOfData = 0xF;
7598 
7599     HucInputCmdG12 hucInputCmd;
7600     MOS_ZeroMemory(&hucInputCmd, sizeof(HucInputCmdG12));
7601 
7602     CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
7603     hucInputCmd.SelectionForIndData = m_osInterface->osCpInterface->IsCpEnabled() ? 4 : 0;
7604     hucInputCmd.CmdMode             = HUC_CMD_LIST_MODE;
7605     hucInputCmd.LengthOfTable       = (uint8_t)(m_numTiles);
7606     hucInputCmd.CopySize            = m_hwInterface->m_tileRecordSize;
7607     ;
7608 
7609     PMOS_RESOURCE presSrc = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource;
7610 
7611     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
7612         m_osInterface,
7613         presSrc,
7614         false,
7615         false));
7616     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
7617         m_osInterface,
7618         &m_resBitstreamBuffer,
7619         true,
7620         true));
7621 
7622     uint64_t srcAddr          = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, presSrc);
7623     uint64_t destAddr         = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, &m_resBitstreamBuffer);
7624     hucInputCmd.SrcAddrBottom = (uint32_t)(srcAddr & 0x00000000FFFFFFFF);
7625     hucInputCmd.SrcAddrTop    = (uint32_t)((srcAddr & 0xFFFFFFFF00000000) >> 32);
7626 
7627     hucInputCmd.DestAddrBottom = (uint32_t)(destAddr & 0x00000000FFFFFFFF);
7628     hucInputCmd.DestAddrTop    = (uint32_t)((destAddr & 0xFFFFFFFF00000000) >> 32);
7629 
7630     MOS_SecureMemcpy(hucStitchDataBuf->InputCOM[0].data, sizeof(HucInputCmdG12), &hucInputCmd, sizeof(HucInputCmdG12));
7631 
7632     m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]);
7633 
7634     return eStatus;
7635 }
7636 
SetRegionsHuCPakIntegrateCqp(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)7637 MOS_STATUS CodechalEncHevcStateG12::SetRegionsHuCPakIntegrateCqp(
7638     PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
7639 {
7640     CODECHAL_ENCODE_FUNCTION_ENTER;
7641 
7642     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7643 
7644     int32_t currentPass = GetCurrentPass();
7645     if (currentPass < 0 ||
7646         (m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ && m_brcEnabled) ||
7647         (currentPass != 0 && m_cqpEnabled))
7648     {
7649         eStatus = MOS_STATUS_INVALID_PARAMETER;
7650         return eStatus;
7651     }
7652     MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
7653 
7654     CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
7655 
7656     // Add Virtual addr
7657     virtualAddrParams->regionParams[0].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;  // Region 0 - Tile based input statistics from PAK/ VDEnc
7658     virtualAddrParams->regionParams[0].dwOffset   = 0;
7659     virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource;  // Region 1 - HuC Frame statistics output
7660     virtualAddrParams->regionParams[1].isWritable = true;
7661     virtualAddrParams->regionParams[4].presRegion = &m_resBitstreamBuffer;  // Region 4 - Last Tile bitstream
7662     virtualAddrParams->regionParams[5].presRegion = &m_resBitstreamBuffer;  // Region 5 - HuC modifies the last tile bitstream before stitch command
7663     virtualAddrParams->regionParams[5].isWritable = true;
7664     virtualAddrParams->regionParams[6].presRegion = &m_brcBuffers.resBrcHistoryBuffer;  // Region 6  History Buffer (Input/Output)
7665     virtualAddrParams->regionParams[6].isWritable = true;
7666     virtualAddrParams->regionParams[7].presRegion = &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx];  //&m_resHucPakStitchReadBatchBuffer;             // Region 7 - HCP PIC state command
7667 
7668     virtualAddrParams->regionParams[9].presRegion  = &m_resBrcDataBuffer;  // Region 9  HuC outputs BRC data
7669     virtualAddrParams->regionParams[9].isWritable  = true;
7670     virtualAddrParams->regionParams[8].presRegion  = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass];  // Region 8 - data buffer read by HUC for stitching cmd generation
7671     virtualAddrParams->regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource;                         // Region 10 - SLB for stitching cmd output from Huc
7672     virtualAddrParams->regionParams[10].isWritable = true;
7673     virtualAddrParams->regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource;  // Region 15 [In/Out] - Tile Record Buffer
7674     virtualAddrParams->regionParams[15].dwOffset   = 0;
7675 
7676     return eStatus;
7677 }
7678 
7679 #if (_DEBUG || _RELEASE_INTERNAL)
ResetImgCtrlRegInPAKStatisticsBuffer(PMOS_COMMAND_BUFFER cmdBuffer)7680 MOS_STATUS CodechalEncHevcStateG12::ResetImgCtrlRegInPAKStatisticsBuffer(
7681     PMOS_COMMAND_BUFFER cmdBuffer)
7682 {
7683     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7684 
7685     CODECHAL_ENCODE_FUNCTION_ENTER;
7686 
7687     MHW_MI_STORE_DATA_PARAMS storeDataParams;
7688     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
7689     storeDataParams.pOsResource      = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
7690     storeDataParams.dwResourceOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL);
7691     storeDataParams.dwValue          = 0;
7692     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
7693         cmdBuffer,
7694         &storeDataParams));
7695 
7696     return eStatus;
7697 }
7698 #endif
7699 
ReadBrcPakStatisticsForScalability(PMOS_COMMAND_BUFFER cmdBuffer)7700 MOS_STATUS CodechalEncHevcStateG12::ReadBrcPakStatisticsForScalability(
7701     PMOS_COMMAND_BUFFER cmdBuffer)
7702 {
7703     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7704 
7705     CODECHAL_ENCODE_FUNCTION_ENTER;
7706 
7707     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
7708 
7709     MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
7710     MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
7711     miCpyMemMemParams.presSrc     = &m_resBrcDataBuffer;
7712     miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, FrameByteCount);
7713     miCpyMemMemParams.presDst     = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
7714     miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_BITSTREAM_BYTECOUNT_FRAME);
7715     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
7716 
7717     MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
7718     miCpyMemMemParams.presSrc     = &m_resBrcDataBuffer;
7719     miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, FrameByteCountNoHeader);
7720     miCpyMemMemParams.presDst     = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
7721     miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_BITSTREAM_BYTECOUNT_FRAME_NOHEADER);
7722     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
7723 
7724     MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
7725     miCpyMemMemParams.presSrc     = &m_resBrcDataBuffer;
7726     miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, HCP_ImageStatusControl);
7727     miCpyMemMemParams.presDst     = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
7728     miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL);
7729     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
7730 
7731     uint32_t dwOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
7732                         m_encodeStatusBuf.dwNumPassesOffset +  // Num passes offset
7733                         sizeof(uint32_t) * 2;                  // encodeStatus is offset by 2 DWs in the resource
7734 
7735     MHW_MI_STORE_DATA_PARAMS storeDataParams;
7736     storeDataParams.pOsResource      = &m_encodeStatusBuf.resStatusBuffer;
7737     storeDataParams.dwResourceOffset = dwOffset;
7738     storeDataParams.dwValue          = (uint8_t)GetCurrentPass();
7739     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
7740 
7741     return eStatus;
7742 }
7743 
DumpHucDebugOutputBuffers()7744 MOS_STATUS CodechalEncHevcStateG12::DumpHucDebugOutputBuffers()
7745 {
7746     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7747 
7748     //only dump HuC in/out buffers in brc scalability case
7749     bool dumpDebugBuffers = IsLastPipe() && (m_numPipe >= 2) && m_brcEnabled;
7750     if (m_singleTaskPhaseSupported)
7751     {
7752         dumpDebugBuffers = dumpDebugBuffers && IsLastPass();
7753     }
7754 
7755     if (dumpDebugBuffers)
7756     {
7757         CODECHAL_DEBUG_TOOL(
7758             int32_t currentPass = GetCurrentPass();
7759             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
7760                 &m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass],
7761                 sizeof(HucPakStitchDmemEncG12),
7762                 currentPass,
7763                 hucRegionDumpPakIntegrate));
7764 
7765             // Region 7 - HEVC PIC State Command
7766             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
7767                 &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
7768                 0,
7769                 m_hwInterface->m_vdenc2ndLevelBatchBufferSize,
7770                 7,
7771                 "_PicState",
7772                 true,
7773                 currentPass,
7774                 hucRegionDumpPakIntegrate));
7775 
7776             // Region 5 -  Last Tile PAK Bitstream Output
7777             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
7778                 &m_resBitstreamBuffer,
7779                 0,
7780                 m_encodeParams.dwBitstreamSize,
7781                 5,
7782                 "_Bitstream",
7783                 false,
7784                 currentPass,
7785                 hucRegionDumpPakIntegrate));
7786 
7787             // Region 6 - BRC History buffer
7788             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
7789                 &m_brcBuffers.resBrcHistoryBuffer,
7790                 0,
7791                 m_brcHistoryBufferSize,
7792                 6,
7793                 "_HistoryBuffer",
7794                 false,
7795                 currentPass,
7796                 hucRegionDumpPakIntegrate));
7797             // Region 9 - HCP BRC Data Output
7798             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
7799                 &m_resBrcDataBuffer,
7800                 0,
7801                 CODECHAL_CACHELINE_SIZE,
7802                 9,
7803                 "_HcpBrcData",
7804                 false,
7805                 currentPass,
7806                 hucRegionDumpPakIntegrate));
7807             // Region 1 - Output Aggregated Frame Level Statistics
7808             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
7809                 &m_resHuCPakAggregatedFrameStatsBuffer.sResource,
7810                 0,
7811                 m_hwInterface->m_pakIntAggregatedFrameStatsSize,  // program exact out size
7812                 1,
7813                 "_AggregateFrameStats",
7814                 false,
7815                 currentPass,
7816                 hucRegionDumpPakIntegrate));
7817             // Region 0 - Tile Statistics Constant Buffer
7818             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
7819                 &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
7820                 0,
7821                 m_hwInterface->m_pakIntTileStatsSize,
7822                 0,
7823                 "_TileBasedStats",
7824                 true,
7825                 currentPass,
7826                 hucRegionDumpPakIntegrate));
7827             // Region 15 - Tile Record Buffer
7828             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
7829                 &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource,
7830                 0,
7831                 m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize,
7832                 15,
7833                 "_TileRecord",
7834                 false,
7835                 currentPass,
7836                 hucRegionDumpPakIntegrate));)
7837     }
7838 
7839     return eStatus;
7840 }
7841 
CodechalEncHevcStateG12(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)7842 CodechalEncHevcStateG12::CodechalEncHevcStateG12(
7843     CodechalHwInterface *   hwInterface,
7844     CodechalDebugInterface *debugInterface,
7845     PCODECHAL_STANDARD_INFO standardInfo)
7846     : CodechalEncHevcState(hwInterface, debugInterface, standardInfo)
7847 {
7848     m_2xMeSupported =
7849         m_useCommonKernel = true;
7850     m_useHwScoreboard     = false;
7851 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
7852     m_kernelBase          = (uint8_t *)IGCODECKRN_G12;
7853 #else
7854     m_kernelBase          = nullptr;
7855 #endif
7856     m_kuidCommon          = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
7857     m_hucPakStitchEnabled = true;
7858     m_scalabilityState    = nullptr;
7859 
7860     MOS_ZeroMemory(&m_currPicWithReconBoundaryPix, sizeof(m_currPicWithReconBoundaryPix));
7861     MOS_ZeroMemory(&m_lcuLevelInputDataSurface, sizeof(m_lcuLevelInputDataSurface));
7862     MOS_ZeroMemory(&m_encoderHistoryInputBuffer, sizeof(m_encoderHistoryInputBuffer));
7863     MOS_ZeroMemory(&m_encoderHistoryOutputBuffer, sizeof(m_encoderHistoryOutputBuffer));
7864     MOS_ZeroMemory(&m_intermediateCuRecordSurfaceLcu32, sizeof(m_intermediateCuRecordSurfaceLcu32));
7865     MOS_ZeroMemory(&m_scratchSurface, sizeof(m_scratchSurface));
7866     MOS_ZeroMemory(&m_16x16QpInputData, sizeof(m_16x16QpInputData));
7867     MOS_ZeroMemory(m_debugSurface, sizeof(m_debugSurface));
7868     MOS_ZeroMemory(&m_encConstantTableForB, sizeof(m_encConstantTableForB));
7869     MOS_ZeroMemory(&m_mvAndDistortionSumSurface, sizeof(m_mvAndDistortionSumSurface));
7870     MOS_ZeroMemory(m_encBCombinedBuffer1, sizeof(m_encBCombinedBuffer1));
7871     MOS_ZeroMemory(m_encBCombinedBuffer2, sizeof(m_encBCombinedBuffer2));
7872 
7873     MOS_ZeroMemory(&m_resPakcuLevelStreamoutData, sizeof(m_resPakcuLevelStreamoutData));
7874     MOS_ZeroMemory(&m_resPakSliceLevelStreamoutData, sizeof(m_resPakSliceLevelStreamoutData));
7875     MOS_ZeroMemory(m_resTileBasedStatisticsBuffer, sizeof(m_resTileBasedStatisticsBuffer));
7876     MOS_ZeroMemory(&m_resHuCPakAggregatedFrameStatsBuffer, sizeof(m_resHuCPakAggregatedFrameStatsBuffer));
7877     MOS_ZeroMemory(m_tileRecordBuffer, sizeof(m_tileRecordBuffer));
7878     MOS_ZeroMemory(&m_kmdVeOveride, sizeof(m_kmdVeOveride));
7879     MOS_ZeroMemory(&m_resHcpScalabilitySyncBuffer, sizeof(m_resHcpScalabilitySyncBuffer));
7880 
7881     MOS_ZeroMemory(m_veBatchBuffer, sizeof(m_veBatchBuffer));
7882     MOS_ZeroMemory(&m_realCmdBuffer, sizeof(m_realCmdBuffer));
7883     MOS_ZeroMemory(&m_resBrcSemaphoreMem, sizeof(m_resBrcSemaphoreMem));
7884     MOS_ZeroMemory(&m_resBrcPakSemaphoreMem, sizeof(m_resBrcPakSemaphoreMem));
7885     MOS_ZeroMemory(&m_resPipeStartSemaMem, sizeof(m_resPipeStartSemaMem));
7886     MOS_ZeroMemory(&m_resPipeCompleteSemaMem, sizeof(m_resPipeCompleteSemaMem));
7887     MOS_ZeroMemory(m_resHucPakStitchDmemBuffer, sizeof(m_resHucPakStitchDmemBuffer));
7888     MOS_ZeroMemory(&m_resBrcDataBuffer, sizeof(m_resBrcDataBuffer));
7889     MOS_ZeroMemory(&m_skipFrameInfo.m_resMbCodeSkipFrameSurface, sizeof(m_skipFrameInfo.m_resMbCodeSkipFrameSurface));
7890 
7891     CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_osInterface);
7892     m_hwInterface->GetStateHeapSettings()->dwNumSyncTags = CODECHAL_ENCODE_HEVC_NUM_SYNC_TAGS;
7893     m_hwInterface->GetStateHeapSettings()->dwDshSize     = CODECHAL_INIT_DSH_SIZE_HEVC_ENC;
7894 
7895     m_kuid             = IDR_CODEC_HEVC_COMBINED_KENREL_INTEL;
7896     MOS_STATUS eStatus = CodecHalGetKernelBinaryAndSize(
7897         m_kernelBase,
7898         m_kuid,
7899         &m_kernelBinary,
7900         &m_combinedKernelSize);
7901     CODECHAL_ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
7902 
7903     m_hwInterface->GetStateHeapSettings()->dwIshSize +=
7904         MOS_ALIGN_CEIL(m_combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
7905 
7906     m_osInterface->pfnVirtualEngineSupported(m_osInterface, false, true);
7907 
7908     Mos_SetVirtualEngineSupported(m_osInterface, true);
7909 }
7910 
~CodechalEncHevcStateG12()7911 CodechalEncHevcStateG12::~CodechalEncHevcStateG12()
7912 {
7913     CODECHAL_ENCODE_FUNCTION_ENTER;
7914 
7915     if (m_wpState)
7916     {
7917         MOS_Delete(m_wpState);
7918         m_wpState = nullptr;
7919     }
7920 
7921     if (m_intraDistKernel)
7922     {
7923         MOS_Delete(m_intraDistKernel);
7924         m_intraDistKernel = nullptr;
7925     }
7926 
7927     if (m_swScoreboardState)
7928     {
7929         MOS_Delete(m_swScoreboardState);
7930         m_swScoreboardState = nullptr;
7931     }
7932 
7933     if (m_scalabilityState)
7934     {
7935         MOS_FreeMemAndSetNull(m_scalabilityState);
7936     }
7937 
7938 #if (_DEBUG || _RELEASE_INTERNAL)
7939     if (m_statusReportDebugInterface != nullptr)
7940     {
7941         MOS_Delete(m_statusReportDebugInterface);
7942         m_statusReportDebugInterface = nullptr;
7943     }
7944 #endif
7945 }
7946 
Allocate(CodechalSetting * codecHalSettings)7947 MOS_STATUS CodechalEncHevcStateG12::Allocate(CodechalSetting *codecHalSettings)
7948 {
7949 #if (_DEBUG || _RELEASE_INTERNAL)
7950     if (!m_statusReportDebugInterface)
7951     {
7952         m_statusReportDebugInterface = MOS_New(CodechalDebugInterface);
7953         CODECHAL_ENCODE_CHK_NULL_RETURN(m_statusReportDebugInterface);
7954         CODECHAL_ENCODE_CHK_STATUS_RETURN(
7955             m_statusReportDebugInterface->Initialize(m_hwInterface, codecHalSettings->codecFunction));
7956     }
7957 #endif
7958 
7959     return CodechalEncoderState::Allocate(codecHalSettings);
7960 }
7961 
CodecHalHevc_GetFileSize(char * fileName)7962 uint32_t CodechalEncHevcStateG12::CodecHalHevc_GetFileSize(char *fileName)
7963 {
7964     FILE *   fp       = nullptr;
7965     uint32_t fileSize = 0;
7966     MosUtilities::MosSecureFileOpen(&fp, fileName, "rb");
7967     if (fp == nullptr)
7968     {
7969         return 0;
7970     }
7971     fseek(fp, 0, SEEK_END);
7972     fileSize = ftell(fp);
7973     fseek(fp, 0, SEEK_SET);
7974     fclose(fp);
7975 
7976     return fileSize;
7977 }
7978 
LoadSourceAndRef2xDSFromFile(PMOS_SURFACE pRef2xSurface,PMOS_SURFACE pSrc2xSurface,uint8_t reflist,uint8_t refIdx)7979 MOS_STATUS CodechalEncHevcStateG12::LoadSourceAndRef2xDSFromFile(
7980     PMOS_SURFACE pRef2xSurface,
7981     PMOS_SURFACE pSrc2xSurface,
7982     uint8_t      reflist,
7983     uint8_t      refIdx)
7984 {
7985     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7986 
7987     CODECHAL_ENCODE_FUNCTION_ENTER;
7988 
7989     if (m_loadKernelInput == false || (pSrc2xSurface && Mos_ResourceIsNull(&pSrc2xSurface->OsResource)) ||
7990         (pRef2xSurface && Mos_ResourceIsNull(&pRef2xSurface->OsResource)) ||
7991         (pSrc2xSurface == NULL && pRef2xSurface == NULL))
7992     {
7993         return eStatus;
7994     }
7995 
7996     char pathOfRef2xDSCmd[MOS_USER_CONTROL_MAX_DATA_SIZE];
7997     MOS_SecureStringPrint(pathOfRef2xDSCmd,
7998         sizeof(pathOfRef2xDSCmd),
7999         sizeof(pathOfRef2xDSCmd),
8000         "%s\\Ref2xDSL%1d%1d.dat.%d",
8001         m_loadKernelInputDataFolder,
8002         reflist,
8003         refIdx,
8004         m_frameNum);
8005     char pathOfSrc2xDSCmd[MOS_USER_CONTROL_MAX_DATA_SIZE];
8006     MOS_SecureStringPrint(pathOfSrc2xDSCmd,
8007         sizeof(pathOfSrc2xDSCmd),
8008         sizeof(pathOfSrc2xDSCmd),
8009         "%s\\Src2xDS.dat.%d",
8010         m_loadKernelInputDataFolder,
8011         m_frameNum);
8012 
8013     uint32_t sizeRef2xDS = CodecHalHevc_GetFileSize(pathOfRef2xDSCmd);
8014     uint32_t sizeSrc2xDS = CodecHalHevc_GetFileSize(pathOfSrc2xDSCmd);
8015     if (sizeRef2xDS == 0 && sizeSrc2xDS == 0)
8016         return MOS_STATUS_SUCCESS;
8017     MOS_LOCK_PARAMS lockFlags;
8018 
8019     if (pRef2xSurface && sizeRef2xDS)
8020     {
8021         if (sizeRef2xDS > (pRef2xSurface->dwPitch * pRef2xSurface->dwHeight * 3 / 2))
8022         {
8023             return MOS_STATUS_INVALID_FILE_SIZE;
8024         }
8025         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
8026         lockFlags.WriteOnly = 1;
8027         uint8_t *data       = (uint8_t *)m_osInterface->pfnLockResource(
8028             m_osInterface, &pRef2xSurface->OsResource, &lockFlags);
8029         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8030 
8031         FILE *Ref2xDS = nullptr;
8032         eStatus       = MosUtilities::MosSecureFileOpen(&Ref2xDS, pathOfRef2xDSCmd, "rb");
8033         if (Ref2xDS == nullptr)
8034         {
8035             m_osInterface->pfnUnlockResource(m_osInterface, &pRef2xSurface->OsResource);
8036             return eStatus;
8037         }
8038 
8039         uint32_t sizeToRead = sizeRef2xDS * 2 / 3;
8040         if (sizeToRead != fread((void *)data, 1, sizeToRead, Ref2xDS))
8041         {
8042             fclose(Ref2xDS);
8043             m_osInterface->pfnUnlockResource(m_osInterface, &pRef2xSurface->OsResource);
8044             return MOS_STATUS_INVALID_FILE_SIZE;
8045         }
8046         fclose(Ref2xDS);
8047         //MOS_ZeroMemory(data + sizeToRead, sizeRef2xDS-sizeToRead);
8048 
8049         m_osInterface->pfnUnlockResource(m_osInterface, &pRef2xSurface->OsResource);
8050     }
8051 
8052     if (pSrc2xSurface && sizeSrc2xDS)
8053     {
8054         if (sizeSrc2xDS > (pSrc2xSurface->dwPitch * pSrc2xSurface->dwHeight * 3 / 2))
8055         {
8056             return MOS_STATUS_INVALID_FILE_SIZE;
8057         }
8058 
8059         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
8060         lockFlags.WriteOnly = 1;
8061         uint8_t *data       = (uint8_t *)m_osInterface->pfnLockResource(
8062             m_osInterface, &pSrc2xSurface->OsResource, &lockFlags);
8063         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8064 
8065         FILE *Src2xDS = nullptr;
8066         eStatus       = MosUtilities::MosSecureFileOpen(&Src2xDS, pathOfSrc2xDSCmd, "rb");
8067         if (Src2xDS == nullptr)
8068         {
8069             m_osInterface->pfnUnlockResource(m_osInterface, &pSrc2xSurface->OsResource);
8070             return eStatus;
8071         }
8072 
8073         uint32_t sizeToRead = sizeSrc2xDS * 2 / 3;
8074         if (sizeToRead != fread((void *)data, 1, sizeToRead, Src2xDS))
8075         {
8076             fclose(Src2xDS);
8077             m_osInterface->pfnUnlockResource(m_osInterface, &pSrc2xSurface->OsResource);
8078             return MOS_STATUS_INVALID_FILE_SIZE;
8079         }
8080         fclose(Src2xDS);
8081         //MOS_ZeroMemory(data + sizeToRead, sizeRef2xDS-sizeToRead);
8082 
8083         m_osInterface->pfnUnlockResource(m_osInterface, &pSrc2xSurface->OsResource);
8084     }
8085 
8086     return eStatus;
8087 }
8088 
LoadPakCommandAndCuRecordFromFile()8089 MOS_STATUS CodechalEncHevcStateG12::LoadPakCommandAndCuRecordFromFile()
8090 {
8091     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8092 
8093     CODECHAL_ENCODE_FUNCTION_ENTER;
8094 
8095     char pathOfPakCmd[MOS_USER_CONTROL_MAX_DATA_SIZE];
8096     MOS_SecureStringPrint(pathOfPakCmd,
8097         sizeof(pathOfPakCmd),
8098         sizeof(pathOfPakCmd),
8099         "%s\\PAKObj.dat.%d",
8100         m_pakOnlyDataFolder,
8101         m_frameNum);
8102 
8103     char pathOfCuRecord[MOS_USER_CONTROL_MAX_DATA_SIZE];
8104     MOS_SecureStringPrint(pathOfCuRecord,
8105         sizeof(pathOfCuRecord),
8106         sizeof(pathOfCuRecord),
8107         "%s\\CURecord.dat.%d",
8108         m_pakOnlyDataFolder,
8109         m_frameNum);
8110 
8111     uint32_t sizePakObj = CodecHalHevc_GetFileSize(pathOfPakCmd);
8112     if (sizePakObj == 0 || sizePakObj > m_mvOffset)
8113     {
8114         return MOS_STATUS_INVALID_FILE_SIZE;
8115     }
8116 
8117     uint32_t sizeCuRecord = CodecHalHevc_GetFileSize(pathOfCuRecord);
8118     if (sizeCuRecord == 0 || sizeCuRecord > m_mbCodeSize - m_mvOffset)
8119     {
8120         return MOS_STATUS_INVALID_FILE_SIZE;
8121     }
8122 
8123     MOS_LOCK_PARAMS lockFlags;
8124     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
8125     lockFlags.WriteOnly = 1;
8126     uint8_t *data       = (uint8_t *)m_osInterface->pfnLockResource(
8127         m_osInterface, &m_resMbCodeSurface, &lockFlags);
8128     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8129 
8130     FILE *pakObj = nullptr;
8131     eStatus      = MosUtilities::MosSecureFileOpen(&pakObj, pathOfPakCmd, "rb");
8132     if (pakObj == nullptr)
8133     {
8134         m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
8135         return eStatus;
8136     }
8137 
8138     uint8_t *pakCmd = data;
8139     if (sizePakObj != fread((void *)pakCmd, 1, sizePakObj, pakObj))
8140     {
8141         fclose(pakObj);
8142         m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
8143         return MOS_STATUS_INVALID_FILE_SIZE;
8144     }
8145     fclose(pakObj);
8146 
8147     uint8_t *record  = data + m_mvOffset;
8148     FILE *   fRecord = nullptr;
8149     eStatus          = MosUtilities::MosSecureFileOpen(&fRecord, pathOfCuRecord, "rb");
8150     if (fRecord == nullptr)
8151     {
8152         m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
8153         return eStatus;
8154     }
8155 
8156     if (sizeCuRecord != fread((void *)record, 1, sizeCuRecord, fRecord))
8157     {
8158         fclose(fRecord);
8159         m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
8160         return MOS_STATUS_INVALID_FILE_SIZE;
8161     }
8162     fclose(fRecord);
8163 
8164     m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
8165 
8166     if (m_brcEnabled)
8167     {
8168         //Image State
8169         char pathOfPicState[MOS_USER_CONTROL_MAX_DATA_SIZE];
8170         MOS_SecureStringPrint(pathOfPicState,
8171             sizeof(pathOfPicState),
8172             sizeof(pathOfPicState),
8173             "%s\\BrcUpdate_ImgStateWrite.dat.%d",
8174             m_pakOnlyDataFolder,
8175             m_frameNum);
8176 
8177         int32_t tmpSizePicState = CodecHalHevc_GetFileSize(pathOfPicState);
8178         uint32_t sizePicState   = 0;
8179         if (tmpSizePicState <= 0)
8180         {
8181             return MOS_STATUS_INVALID_FILE_SIZE;
8182         }
8183         else
8184         {
8185             sizePicState = static_cast<uint32_t>(tmpSizePicState);
8186         }
8187 
8188         data = (uint8_t *)m_osInterface->pfnLockResource(
8189             m_osInterface, &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx], &lockFlags);
8190         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8191 
8192         FILE *fPicState = nullptr;
8193         eStatus         = MosUtilities::MosSecureFileOpen(&fPicState, pathOfPicState, "rb");
8194         if (fPicState == nullptr)
8195         {
8196             m_osInterface->pfnUnlockResource(m_osInterface, &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx]);
8197             return eStatus;
8198         }
8199 
8200         if (sizePicState != fread((void *)data, 1, sizePicState, fPicState))
8201         {
8202             fclose(fPicState);
8203             m_osInterface->pfnUnlockResource(m_osInterface, &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx]);
8204             return MOS_STATUS_INVALID_FILE_SIZE;
8205         }
8206         fclose(fPicState);
8207         m_osInterface->pfnUnlockResource(m_osInterface, &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx]);
8208     }
8209 
8210     return eStatus;
8211 }
8212 
PicCodingTypeToSliceType(uint16_t pictureCodingType)8213 uint8_t CodechalEncHevcStateG12::PicCodingTypeToSliceType(uint16_t pictureCodingType)
8214 {
8215     uint8_t sliceType = 0;
8216 
8217     switch (pictureCodingType)
8218     {
8219     case I_TYPE:
8220         sliceType = CODECHAL_ENCODE_HEVC_I_SLICE;
8221         break;
8222     case P_TYPE:
8223         sliceType = CODECHAL_ENCODE_HEVC_P_SLICE;
8224         break;
8225     case B_TYPE:
8226     case B1_TYPE:
8227     case B2_TYPE:
8228         sliceType = CODECHAL_ENCODE_HEVC_B_SLICE;
8229         break;
8230     default:
8231         CODECHAL_ENCODE_ASSERT(false);
8232     }
8233     return sliceType;
8234 }
8235 
8236 // The following code is from the kernel ULT
InitMediaObjectWalker(uint32_t threadSpaceWidth,uint32_t threadSpaceHeight,uint32_t colorCountMinusOne,DependencyPattern dependencyPattern,uint32_t childThreadNumber,uint32_t localLoopExecCount,MHW_WALKER_PARAMS & walkerParams)8237 MOS_STATUS CodechalEncHevcStateG12::InitMediaObjectWalker(
8238     uint32_t           threadSpaceWidth,
8239     uint32_t           threadSpaceHeight,
8240     uint32_t           colorCountMinusOne,
8241     DependencyPattern  dependencyPattern,
8242     uint32_t           childThreadNumber,
8243     uint32_t           localLoopExecCount,
8244     MHW_WALKER_PARAMS &walkerParams)
8245 {
8246     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8247 
8248     walkerParams.ColorCountMinusOne    = colorCountMinusOne;
8249     walkerParams.dwGlobalLoopExecCount = 0x3ff;
8250     walkerParams.dwLocalLoopExecCount  = 0x3ff;
8251 
8252     if (dependencyPattern == dependencyWavefrontHorizontal)
8253     {
8254         // Global
8255         walkerParams.GlobalResolution.x       = threadSpaceWidth;
8256         walkerParams.GlobalResolution.y       = threadSpaceHeight;
8257         walkerParams.GlobalStart.x            = 0;
8258         walkerParams.GlobalStart.y            = 0;
8259         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8260         walkerParams.GlobalOutlerLoopStride.y = 0;
8261         walkerParams.GlobalInnerLoopUnit.x    = 0;
8262         walkerParams.GlobalInnerLoopUnit.y    = threadSpaceHeight;
8263 
8264         // Local
8265         walkerParams.BlockResolution.x    = threadSpaceWidth;
8266         walkerParams.BlockResolution.y    = threadSpaceHeight;
8267         walkerParams.LocalStart.x         = 0;
8268         walkerParams.LocalStart.y         = 0;
8269         walkerParams.LocalOutLoopStride.x = 1;
8270         walkerParams.LocalOutLoopStride.y = 0;
8271         walkerParams.LocalInnerLoopUnit.x = 0;
8272         walkerParams.LocalInnerLoopUnit.y = 1;
8273 
8274         // Mid
8275         walkerParams.MiddleLoopExtraSteps = 0;
8276         walkerParams.MidLoopUnitX         = 0;
8277         walkerParams.MidLoopUnitY         = 0;
8278     }
8279     else if (dependencyPattern == dependencyWavefrontVertical)
8280     {
8281         // Global
8282         walkerParams.GlobalResolution.x       = threadSpaceWidth;
8283         walkerParams.GlobalResolution.y       = threadSpaceHeight;
8284         walkerParams.GlobalStart.x            = 0;
8285         walkerParams.GlobalStart.y            = 0;
8286         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8287         walkerParams.GlobalOutlerLoopStride.y = 0;
8288         walkerParams.GlobalInnerLoopUnit.x    = 0;
8289         walkerParams.GlobalInnerLoopUnit.y    = threadSpaceHeight;
8290 
8291         // Local
8292         walkerParams.BlockResolution.x    = threadSpaceWidth;
8293         walkerParams.BlockResolution.y    = threadSpaceHeight;
8294         walkerParams.LocalStart.x         = 0;
8295         walkerParams.LocalStart.y         = 0;
8296         walkerParams.LocalOutLoopStride.x = 0;
8297         walkerParams.LocalOutLoopStride.y = 1;
8298         walkerParams.LocalInnerLoopUnit.x = 1;
8299         walkerParams.LocalInnerLoopUnit.y = 0;
8300 
8301         // Mid
8302         walkerParams.MiddleLoopExtraSteps = 0;
8303         walkerParams.MidLoopUnitX         = 0;
8304         walkerParams.MidLoopUnitY         = 0;
8305     }
8306     else if (dependencyPattern == dependencyWavefront45Degree)
8307     {
8308         // Global
8309         walkerParams.GlobalResolution.x       = threadSpaceWidth;
8310         walkerParams.GlobalResolution.y       = threadSpaceHeight;
8311         walkerParams.GlobalStart.x            = 0;
8312         walkerParams.GlobalStart.y            = 0;
8313         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8314         walkerParams.GlobalOutlerLoopStride.y = 0;
8315         walkerParams.GlobalInnerLoopUnit.x    = 0;
8316         walkerParams.GlobalInnerLoopUnit.y    = threadSpaceHeight;
8317 
8318         // Local
8319         walkerParams.BlockResolution.x    = threadSpaceWidth;
8320         walkerParams.BlockResolution.y    = threadSpaceHeight;
8321         walkerParams.LocalStart.x         = 0;
8322         walkerParams.LocalStart.y         = 0;
8323         walkerParams.LocalOutLoopStride.x = 1;
8324         walkerParams.LocalOutLoopStride.y = 0;
8325         walkerParams.LocalInnerLoopUnit.x = -1;
8326         walkerParams.LocalInnerLoopUnit.y = 1;
8327 
8328         // Mid
8329         walkerParams.MiddleLoopExtraSteps = 0;
8330         walkerParams.MidLoopUnitX         = 0;
8331         walkerParams.MidLoopUnitY         = 0;
8332     }
8333     else if (dependencyPattern == dependencyWavefront26Degree)
8334     {
8335         // Global
8336         walkerParams.GlobalResolution.x       = threadSpaceWidth;
8337         walkerParams.GlobalResolution.y       = threadSpaceHeight;
8338         walkerParams.GlobalStart.x            = 0;
8339         walkerParams.GlobalStart.y            = 0;
8340         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8341         walkerParams.GlobalOutlerLoopStride.y = 0;
8342         walkerParams.GlobalInnerLoopUnit.x    = 0;
8343         walkerParams.GlobalInnerLoopUnit.y    = threadSpaceHeight;
8344 
8345         // Local
8346         walkerParams.BlockResolution.x    = threadSpaceWidth;
8347         walkerParams.BlockResolution.y    = threadSpaceHeight;
8348         walkerParams.LocalStart.x         = 0;
8349         walkerParams.LocalStart.y         = 0;
8350         walkerParams.LocalOutLoopStride.x = 1;
8351         walkerParams.LocalOutLoopStride.y = 0;
8352         walkerParams.LocalInnerLoopUnit.x = -2;
8353         walkerParams.LocalInnerLoopUnit.y = 1;
8354 
8355         // Mid
8356         walkerParams.MiddleLoopExtraSteps = 0;
8357         walkerParams.MidLoopUnitX         = 0;
8358         walkerParams.MidLoopUnitY         = 0;
8359     }
8360     else if ((dependencyPattern == dependencyWavefront45XDegree) ||
8361              (dependencyPattern == dependencyWavefront45XDegreeAlt))
8362     {
8363         // Global
8364         walkerParams.GlobalResolution.x       = threadSpaceWidth;
8365         walkerParams.GlobalResolution.y       = threadSpaceHeight;
8366         walkerParams.GlobalStart.x            = 0;
8367         walkerParams.GlobalStart.y            = 0;
8368         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8369         walkerParams.GlobalOutlerLoopStride.y = 0;
8370         walkerParams.GlobalInnerLoopUnit.x    = 0;
8371         walkerParams.GlobalInnerLoopUnit.y    = threadSpaceHeight;
8372 
8373         // Local
8374         walkerParams.BlockResolution.x    = threadSpaceWidth;
8375         walkerParams.BlockResolution.y    = threadSpaceHeight;
8376         walkerParams.LocalStart.x         = 0;
8377         walkerParams.LocalStart.y         = 0;
8378         walkerParams.LocalOutLoopStride.x = 1;
8379         walkerParams.LocalOutLoopStride.y = 0;
8380         walkerParams.LocalInnerLoopUnit.x = -1;
8381         walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1;
8382 
8383         // Mid
8384         walkerParams.MiddleLoopExtraSteps = childThreadNumber;
8385         walkerParams.MidLoopUnitX         = 0;
8386         walkerParams.MidLoopUnitY         = 1;
8387     }
8388     else if ((dependencyPattern == dependencyWavefront26XDegree) ||
8389              (dependencyPattern == dependencyWavefront26XDegreeAlt))
8390     {
8391         // Global
8392         walkerParams.GlobalResolution.x       = threadSpaceWidth;
8393         walkerParams.GlobalResolution.y       = threadSpaceHeight;
8394         walkerParams.GlobalStart.x            = 0;
8395         walkerParams.GlobalStart.y            = 0;
8396         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8397         walkerParams.GlobalOutlerLoopStride.y = 0;
8398         walkerParams.GlobalInnerLoopUnit.x    = 0;
8399         walkerParams.GlobalInnerLoopUnit.y    = threadSpaceHeight;
8400 
8401         // Local
8402         walkerParams.BlockResolution.x    = threadSpaceWidth;
8403         walkerParams.BlockResolution.y    = threadSpaceHeight;
8404         walkerParams.LocalStart.x         = 0;
8405         walkerParams.LocalStart.y         = 0;
8406         walkerParams.LocalOutLoopStride.x = 1;
8407         walkerParams.LocalOutLoopStride.y = 0;
8408         walkerParams.LocalInnerLoopUnit.x = -2;
8409         walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1;
8410 
8411         // Mid
8412         walkerParams.MiddleLoopExtraSteps = childThreadNumber;
8413         walkerParams.MidLoopUnitX         = 0;
8414         walkerParams.MidLoopUnitY         = 1;
8415     }
8416     else if (dependencyPattern == dependencyWavefront45XVp9Degree)
8417     {
8418         // Global
8419         walkerParams.GlobalResolution.x       = threadSpaceWidth;
8420         walkerParams.GlobalResolution.y       = threadSpaceHeight;
8421         walkerParams.GlobalStart.x            = 0;
8422         walkerParams.GlobalStart.y            = 0;
8423         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8424         walkerParams.GlobalOutlerLoopStride.y = 0;
8425         walkerParams.GlobalInnerLoopUnit.x    = 0;
8426         walkerParams.GlobalInnerLoopUnit.y    = threadSpaceHeight;
8427 
8428         // Local
8429         walkerParams.BlockResolution.x    = threadSpaceWidth;
8430         walkerParams.BlockResolution.y    = threadSpaceHeight;
8431         walkerParams.LocalStart.x         = 0;
8432         walkerParams.LocalStart.y         = 0;
8433         walkerParams.LocalOutLoopStride.x = 1;
8434         walkerParams.LocalOutLoopStride.y = 0;
8435         walkerParams.LocalInnerLoopUnit.x = -1;
8436         walkerParams.LocalInnerLoopUnit.y = 4;
8437 
8438         // Mid
8439         walkerParams.MiddleLoopExtraSteps = 3;
8440         walkerParams.MidLoopUnitX         = 0;
8441         walkerParams.MidLoopUnitY         = 1;
8442     }
8443     else if (dependencyPattern == dependencyWavefront26ZDegree)
8444     {
8445         // Global
8446         walkerParams.GlobalResolution.x       = threadSpaceWidth;
8447         walkerParams.GlobalResolution.y       = threadSpaceHeight;
8448         walkerParams.GlobalStart.x            = 0;
8449         walkerParams.GlobalStart.y            = 0;
8450         walkerParams.GlobalOutlerLoopStride.x = 2;
8451         walkerParams.GlobalOutlerLoopStride.y = 0;
8452         walkerParams.GlobalInnerLoopUnit.x    = -4;
8453         walkerParams.GlobalInnerLoopUnit.y    = 2;
8454 
8455         // Local
8456         walkerParams.BlockResolution.x    = 2;
8457         walkerParams.BlockResolution.y    = 2;
8458         walkerParams.LocalStart.x         = 0;
8459         walkerParams.LocalStart.y         = 0;
8460         walkerParams.LocalOutLoopStride.x = 0;
8461         walkerParams.LocalOutLoopStride.y = 1;
8462         walkerParams.LocalInnerLoopUnit.x = 1;
8463         walkerParams.LocalInnerLoopUnit.y = 0;
8464 
8465         // Mid
8466         walkerParams.MiddleLoopExtraSteps = 0;
8467         walkerParams.MidLoopUnitX         = 0;
8468         walkerParams.MidLoopUnitY         = 0;
8469     }
8470     else if (dependencyPattern == dependencyWavefront26ZigDegree)
8471     {
8472         int32_t size_x = threadSpaceWidth;   //(threadSpaceWidth + 1)>> 1;
8473         int32_t size_y = threadSpaceHeight;  //threadSpaceHeight << 1;
8474 
8475         // Global
8476         walkerParams.GlobalResolution.x       = size_x;
8477         walkerParams.GlobalResolution.y       = size_y;
8478         walkerParams.GlobalStart.x            = 0;
8479         walkerParams.GlobalStart.y            = 0;
8480         walkerParams.GlobalOutlerLoopStride.x = size_x;
8481         walkerParams.GlobalOutlerLoopStride.y = 0;
8482         walkerParams.GlobalInnerLoopUnit.x    = 0;
8483         walkerParams.GlobalInnerLoopUnit.y    = size_y;
8484 
8485         // Local
8486         walkerParams.BlockResolution.x    = size_x;
8487         walkerParams.BlockResolution.y    = size_y;
8488         walkerParams.LocalStart.x         = 0;
8489         walkerParams.LocalStart.y         = 0;
8490         walkerParams.LocalOutLoopStride.x = 1;
8491         walkerParams.LocalOutLoopStride.y = 0;
8492         walkerParams.LocalInnerLoopUnit.x = -2;
8493         walkerParams.LocalInnerLoopUnit.y = 4;
8494 
8495         // Mid
8496         walkerParams.MiddleLoopExtraSteps = 3;
8497         walkerParams.MidLoopUnitX         = 0;
8498         walkerParams.MidLoopUnitY         = 1;
8499     }
8500     else if (dependencyPattern == dependencyWavefront45DDegree)
8501     {
8502         // Global
8503         walkerParams.GlobalResolution.x       = threadSpaceWidth;
8504         walkerParams.GlobalResolution.y       = threadSpaceHeight;
8505         walkerParams.GlobalStart.x            = 0;
8506         walkerParams.GlobalStart.y            = 0;
8507         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8508         walkerParams.GlobalOutlerLoopStride.y = 0;
8509         walkerParams.GlobalInnerLoopUnit.x    = 0;
8510         walkerParams.GlobalInnerLoopUnit.y    = threadSpaceHeight;
8511 
8512         // Local
8513         walkerParams.BlockResolution.x    = threadSpaceWidth;
8514         walkerParams.BlockResolution.y    = threadSpaceHeight;
8515         walkerParams.LocalStart.x         = threadSpaceWidth;
8516         walkerParams.LocalStart.y         = 0;
8517         walkerParams.LocalOutLoopStride.x = 1;
8518         walkerParams.LocalOutLoopStride.y = 0;
8519         walkerParams.LocalInnerLoopUnit.x = -1;
8520         walkerParams.LocalInnerLoopUnit.y = 1;
8521 
8522         // Mid
8523         walkerParams.MiddleLoopExtraSteps = 0;
8524         walkerParams.MidLoopUnitX         = 0;
8525         walkerParams.MidLoopUnitY         = 0;
8526         if (colorCountMinusOne > 0)
8527         {
8528             walkerParams.dwLocalLoopExecCount = localLoopExecCount;
8529         }
8530     }
8531     else if (dependencyPattern == dependencyWavefront26DDegree)
8532     {
8533         // Global
8534         walkerParams.GlobalResolution.x       = threadSpaceWidth;
8535         walkerParams.GlobalResolution.y       = threadSpaceHeight;
8536         walkerParams.GlobalStart.x            = 0;
8537         walkerParams.GlobalStart.y            = 0;
8538         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8539         walkerParams.GlobalOutlerLoopStride.y = 0;
8540         walkerParams.GlobalInnerLoopUnit.x    = 0;
8541         walkerParams.GlobalInnerLoopUnit.y    = threadSpaceHeight;
8542         // Local
8543         walkerParams.BlockResolution.x    = threadSpaceWidth;
8544         walkerParams.BlockResolution.y    = threadSpaceHeight;
8545         walkerParams.LocalStart.x         = threadSpaceWidth;
8546         walkerParams.LocalStart.y         = 0;
8547         walkerParams.LocalOutLoopStride.x = 1;
8548         walkerParams.LocalOutLoopStride.y = 0;
8549         walkerParams.LocalInnerLoopUnit.x = -2;
8550         walkerParams.LocalInnerLoopUnit.y = 1;
8551         // Mid
8552         walkerParams.MiddleLoopExtraSteps = 0;
8553         walkerParams.MidLoopUnitX         = 0;
8554         walkerParams.MidLoopUnitY         = 0;
8555 
8556         if (colorCountMinusOne > 0)
8557         {
8558             walkerParams.dwLocalLoopExecCount = localLoopExecCount;
8559         }
8560     }
8561     else if (dependencyPattern == dependencyWavefront45XDDegree)
8562     {
8563         // Global
8564         walkerParams.GlobalResolution.x       = threadSpaceWidth;
8565         walkerParams.GlobalResolution.y       = threadSpaceHeight;
8566         walkerParams.GlobalStart.x            = 0;
8567         walkerParams.GlobalStart.y            = 0;
8568         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8569         walkerParams.GlobalOutlerLoopStride.y = 0;
8570         walkerParams.GlobalInnerLoopUnit.x    = 0;
8571         walkerParams.GlobalInnerLoopUnit.y    = threadSpaceHeight;
8572 
8573         // Local
8574         walkerParams.BlockResolution.x    = threadSpaceWidth;
8575         walkerParams.BlockResolution.y    = threadSpaceHeight;
8576         walkerParams.LocalStart.x         = threadSpaceWidth;
8577         walkerParams.LocalStart.y         = 0;
8578         walkerParams.LocalOutLoopStride.x = 1;
8579         walkerParams.LocalOutLoopStride.y = 0;
8580         walkerParams.LocalInnerLoopUnit.x = -1;
8581         walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1;
8582 
8583         // Mid
8584         walkerParams.MiddleLoopExtraSteps = childThreadNumber;
8585         walkerParams.MidLoopUnitX         = 0;
8586         walkerParams.MidLoopUnitY         = 1;
8587         if (colorCountMinusOne > 0)
8588         {
8589             walkerParams.dwLocalLoopExecCount = localLoopExecCount;
8590         }
8591     }
8592     else if (dependencyPattern == dependencyWavefront26XDDegree)
8593     {
8594         // Global
8595         walkerParams.GlobalResolution.x       = threadSpaceWidth;
8596         walkerParams.GlobalResolution.y       = threadSpaceHeight;
8597         walkerParams.GlobalStart.x            = 0;
8598         walkerParams.GlobalStart.y            = 0;
8599         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8600         walkerParams.GlobalOutlerLoopStride.y = 0;
8601         walkerParams.GlobalInnerLoopUnit.x    = 0;
8602         walkerParams.GlobalInnerLoopUnit.y    = threadSpaceHeight;
8603         // Local
8604         walkerParams.BlockResolution.x    = threadSpaceWidth;
8605         walkerParams.BlockResolution.y    = threadSpaceHeight;
8606         walkerParams.LocalStart.x         = threadSpaceWidth;
8607         walkerParams.LocalStart.y         = 0;
8608         walkerParams.LocalOutLoopStride.x = 1;
8609         walkerParams.LocalOutLoopStride.y = 0;
8610         walkerParams.LocalInnerLoopUnit.x = -2;
8611         walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1;
8612         // Mid
8613         walkerParams.MiddleLoopExtraSteps = childThreadNumber;
8614         walkerParams.MidLoopUnitX         = 0;
8615         walkerParams.MidLoopUnitY         = 1;
8616 
8617         if (colorCountMinusOne > 0)
8618         {
8619             walkerParams.dwLocalLoopExecCount = localLoopExecCount;
8620         }
8621     }
8622     else
8623     {
8624         CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported walking pattern is observed\n");
8625         eStatus = MOS_STATUS_INVALID_PARAMETER;
8626     }
8627     return eStatus;
8628 }
8629 
IsDegree45Needed()8630 bool CodechalEncHevcStateG12::IsDegree45Needed()
8631 {
8632     if (m_numberConcurrentGroup == 1 && m_numberEncKernelSubThread == 1)
8633     {
8634         return false;
8635     }
8636     return true;
8637 }
8638 
DecideConcurrentGroupAndWaveFrontNumber()8639 void CodechalEncHevcStateG12::DecideConcurrentGroupAndWaveFrontNumber()
8640 {
8641     uint32_t          shift       = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
8642     uint32_t          widthInLcu  = MOS_ROUNDUP_SHIFT((m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1), shift);
8643     uint32_t          heightInLcu = MOS_ROUNDUP_SHIFT((m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1), shift);
8644     DependencyPattern walkerDegree;
8645 
8646     //As per kernel ULT,for all non TU1 cases m_numberEncKernelSubThread should be set to 1
8647     // LCU32 has no multiple thread support,
8648     if (!m_isMaxLcu64 || m_hevcSeqParams->TargetUsage != 1)
8649     {
8650         m_numberEncKernelSubThread = 1;  // LCU32 has no multiple thread support
8651     }
8652 
8653     while (heightInLcu / m_numberConcurrentGroup == 0)
8654     {
8655         m_numberConcurrentGroup = m_numberConcurrentGroup >> 1;
8656         if (m_numberConcurrentGroup == 0)
8657         {
8658             // Try out all values and now have to use the default ones.
8659             // Concurrent group and wave-front split must be enabled together
8660             m_numberConcurrentGroup = 1;
8661             break;
8662         }
8663     }
8664 
8665     if (m_numberConcurrentGroup > 1)
8666     {
8667         m_numWavefrontInOneRegion = 0;
8668         while (m_numWavefrontInOneRegion == 0)
8669         {
8670             uint32_t shift = m_degree45Needed ? 0 : 1;
8671 
8672             m_numWavefrontInOneRegion =
8673                 (widthInLcu + ((heightInLcu - 1) << shift) + m_numberConcurrentGroup - 1) / m_numberConcurrentGroup;
8674 
8675             if (m_numWavefrontInOneRegion > 0)
8676             {
8677                 // this is a valid setting and number of regisions is greater than or equal to 1
8678                 break;
8679             }
8680             m_numberConcurrentGroup = m_numberConcurrentGroup >> 1;
8681             if (m_numberConcurrentGroup == 0)
8682             {
8683                 // Try out all values and now have to use the default ones.
8684                 m_numberConcurrentGroup = 1;
8685                 break;
8686             }
8687         }
8688     }
8689     else
8690     {
8691         m_numWavefrontInOneRegion = 0;
8692     }
8693 
8694     m_numberEncKernelSubThread = MOS_MIN(m_numberEncKernelSubThread, m_hevcThreadTaskDataNum);
8695 
8696     return;
8697 }
8698 
InitSwScoreBoardParams(CodechalEncodeSwScoreboard::KernelParams & swScoreboardKernelParames)8699 void CodechalEncHevcStateG12::InitSwScoreBoardParams(CodechalEncodeSwScoreboard::KernelParams &swScoreboardKernelParames)
8700 {
8701     uint32_t widthAlignedMaxLcu;
8702     uint32_t heightAlignedMaxLcu;
8703     uint32_t widthAlignedLcu32;
8704     uint32_t heightAlignedLcu32;
8705 
8706     if (m_mfeEnabled && m_colorBitMfeEnabled)
8707     {
8708         widthAlignedMaxLcu  = MOS_ALIGN_CEIL(m_mfeEncodeParams.maxWidth, MAX_LCU_SIZE);
8709         heightAlignedMaxLcu = MOS_ALIGN_CEIL(m_mfeEncodeParams.maxHeight, MAX_LCU_SIZE);
8710         widthAlignedLcu32   = MOS_ALIGN_CEIL(m_mfeEncodeParams.maxWidth, 32);
8711         heightAlignedLcu32  = MOS_ALIGN_CEIL(m_mfeEncodeParams.maxHeight, 32);
8712     }
8713     else
8714     {
8715         widthAlignedMaxLcu  = m_widthAlignedMaxLcu;
8716         heightAlignedMaxLcu = m_heightAlignedMaxLcu;
8717         widthAlignedLcu32   = m_widthAlignedLcu32;
8718         heightAlignedLcu32  = m_heightAlignedLcu32;
8719     }
8720 
8721     // SW scoreboard Kernel Call -- to be continued - DS + HME kernel call
8722     swScoreboardKernelParames.isHevc = false;  // can be set to false. Need to enabled only for an optimization which is not needed for now
8723 
8724     m_degree45Needed = true;
8725     if (m_hevcSeqParams->TargetUsage == 1)
8726     {
8727         m_numberConcurrentGroup = MOS_MIN(m_maxWavefrontsforTU1, m_numberConcurrentGroup);
8728         // m_numberConcurrentGroup should  default to 2 here for TU1. the only other value allowed from reg key will be 1
8729         m_degree45Needed = false;
8730     }
8731     else if (m_hevcSeqParams->TargetUsage == 4)
8732     {
8733         m_numberConcurrentGroup = MOS_MIN(m_maxWavefrontsforTU4, m_numberConcurrentGroup);
8734     }
8735     DecideConcurrentGroupAndWaveFrontNumber();
8736 
8737     DependencyPattern walkPattern;
8738     if (m_hevcSeqParams->TargetUsage == 1)
8739     {
8740         if (m_isMaxLcu64)
8741         {
8742             walkPattern = m_numberConcurrentGroup == 1 ? dependencyWavefront26XDegreeAlt : dependencyWavefront26XDDegree;
8743         }
8744         else
8745         {
8746             walkPattern = m_numberConcurrentGroup == 1 ? dependencyWavefront26Degree : dependencyWavefront26DDegree;
8747         }
8748     }
8749     else if (m_hevcSeqParams->TargetUsage == 4)
8750     {
8751         walkPattern = m_numberConcurrentGroup == 1 ? dependencyWavefront45Degree : dependencyWavefront45DDegree;
8752     }
8753     else
8754     {
8755         walkPattern = dependencyWavefront45DDegree;
8756     }
8757     m_swScoreboardState->SetDependencyPattern(walkPattern);
8758 
8759     if (m_isMaxLcu64)
8760     {
8761         if (m_hevcSeqParams->TargetUsage == 1)
8762         {
8763             swScoreboardKernelParames.scoreboardWidth  = (widthAlignedMaxLcu >> 6);
8764             swScoreboardKernelParames.scoreboardHeight = (heightAlignedMaxLcu >> 6) * m_numberEncKernelSubThread;
8765         }
8766         else
8767         {
8768             swScoreboardKernelParames.scoreboardWidth  = 2 * (widthAlignedMaxLcu >> 6);
8769             swScoreboardKernelParames.scoreboardHeight = 2 * (heightAlignedMaxLcu >> 6);
8770         }
8771         swScoreboardKernelParames.numberOfWaveFrontSplit = m_numberConcurrentGroup;
8772         swScoreboardKernelParames.numberOfChildThread    = m_numberEncKernelSubThread - 1;  // child thread number is minus one of the total sub-thread for the main thread takes one.
8773     }
8774     else
8775     {
8776         swScoreboardKernelParames.scoreboardWidth        = widthAlignedLcu32 >> 5;
8777         swScoreboardKernelParames.scoreboardHeight       = heightAlignedLcu32 >> 5;
8778         swScoreboardKernelParames.numberOfWaveFrontSplit = m_numberConcurrentGroup;
8779         swScoreboardKernelParames.numberOfChildThread    = 0;
8780     }
8781 
8782     swScoreboardKernelParames.swScoreboardSurfaceWidth  = swScoreboardKernelParames.scoreboardWidth;
8783     swScoreboardKernelParames.swScoreboardSurfaceHeight = swScoreboardKernelParames.scoreboardHeight;
8784 
8785     m_swScoreboardState->SetCurSwScoreboardSurfaceIndex(m_currRecycledBufIdx);
8786 
8787     swScoreboardKernelParames.lcuInfoSurface = &m_lcuLevelInputDataSurface[m_currRecycledBufIdx];
8788 }
8789 
UserFeatureKeyReport()8790 MOS_STATUS CodechalEncHevcStateG12::UserFeatureKeyReport()
8791 {
8792     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8793 
8794     CODECHAL_ENCODE_FUNCTION_ENTER;
8795 
8796     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::UserFeatureKeyReport());
8797 #if (_DEBUG || _RELEASE_INTERNAL)
8798     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_REGION_NUMBER_ID, m_numberConcurrentGroup, m_osInterface->pOsContext);
8799     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_SUBTHREAD_NUM_ID, m_numberEncKernelSubThread, m_osInterface->pOsContext);
8800     CodecHalEncode_WriteKey64(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE, m_kmdVeOveride.Value, m_osInterface->pOsContext);
8801 
8802     if (m_pakOnlyTest)
8803     {
8804         CodecHalEncode_WriteStringKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_PAK_ONLY_ID, m_pakOnlyDataFolder, strlen(m_pakOnlyDataFolder), m_osInterface->pOsContext);
8805     }
8806     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENCODE_USED_VDBOX_NUM_ID, m_numPipe, m_osInterface->pOsContext);
8807     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENABLE_ENCODE_VE_CTXSCHEDULING_ID, MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface), m_osInterface->pOsContext);
8808 #endif
8809 
8810     return eStatus;
8811 }
8812 
SetupSwScoreBoard(CodechalEncodeSwScoreboard::KernelParams * params)8813 MOS_STATUS CodechalEncHevcStateG12::SetupSwScoreBoard(CodechalEncodeSwScoreboard::KernelParams *params)
8814 {
8815     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8816 
8817     MEDIA_WA_TABLE* waTable = m_osInterface->pfnGetWaTable(m_osInterface);
8818     uint32_t memType = (MEDIA_IS_WA(waTable, WaForceAllocateLML4)) ? MOS_MEMPOOL_DEVICEMEMORY : 0;
8819 
8820     if (Mos_ResourceIsNull(&m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource))
8821     {
8822         MOS_ZeroMemory(m_swScoreboardState->GetCurSwScoreboardSurface(), sizeof(*m_swScoreboardState->GetCurSwScoreboardSurface()));
8823 
8824         MOS_ALLOC_GFXRES_PARAMS allocParamsForBuffer2D;
8825         MOS_ZeroMemory(&allocParamsForBuffer2D, sizeof(MOS_ALLOC_GFXRES_PARAMS));
8826         allocParamsForBuffer2D.Type     = MOS_GFXRES_2D;
8827         allocParamsForBuffer2D.TileType = MOS_TILE_LINEAR;
8828         allocParamsForBuffer2D.Format   = Format_R32U;
8829         allocParamsForBuffer2D.dwWidth  = params->swScoreboardSurfaceWidth;
8830         allocParamsForBuffer2D.dwHeight = params->swScoreboardSurfaceHeight;
8831         allocParamsForBuffer2D.pBufName = "SW Scoreboard Init buffer";
8832         allocParamsForBuffer2D.dwMemType = memType;
8833 
8834         eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
8835             m_osInterface,
8836             &allocParamsForBuffer2D,
8837             &m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource);
8838 
8839         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo(
8840             m_osInterface,
8841             m_swScoreboardState->GetCurSwScoreboardSurface()));
8842     }
8843 
8844     if (m_swScoreboard == nullptr)
8845     {
8846         m_swScoreboard = (uint8_t *)MOS_AllocAndZeroMemory(params->scoreboardWidth * sizeof(uint32_t) * params->scoreboardHeight);
8847         InitSWScoreboard(m_swScoreboard, params->scoreboardWidth, params->scoreboardHeight, m_swScoreboardState->GetDependencyPattern(), (char)(params->numberOfChildThread));
8848     }
8849 
8850     MOS_LOCK_PARAMS lockFlags;
8851 
8852     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
8853     lockFlags.WriteOnly = 1;
8854     uint8_t *data       = (uint8_t *)m_osInterface->pfnLockResource(
8855         m_osInterface,
8856         &m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource,
8857         &lockFlags);
8858     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8859 
8860     for (uint32_t h = 0; h < params->scoreboardHeight; h++)
8861     {
8862         uint32_t s = params->scoreboardWidth * sizeof(uint32_t);
8863         MOS_SecureMemcpy(data, s, &m_swScoreboard[h * s], s);
8864         data += m_swScoreboardState->GetCurSwScoreboardSurface()->dwPitch;
8865     }
8866 
8867     m_osInterface->pfnUnlockResource(
8868         m_osInterface,
8869         &m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource);
8870 
8871     return eStatus;
8872 }
8873 
SetDependency(uint8_t & numDependencies,char * scoreboardDeltaX,char * scoreboardDeltaY,uint32_t dependencyPattern,char childThreadNumber)8874 void CodechalEncHevcStateG12::SetDependency(
8875     uint8_t &numDependencies,
8876     char *   scoreboardDeltaX,
8877     char *   scoreboardDeltaY,
8878     uint32_t dependencyPattern,
8879     char     childThreadNumber)
8880 {
8881     if (dependencyPattern == dependencyWavefrontHorizontal)
8882     {
8883         numDependencies = m_numDependencyHorizontal;
8884         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependencyHorizontal, m_dxWavefrontHorizontal, m_numDependencyHorizontal);
8885         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependencyHorizontal, m_dyWavefrontHorizontal, m_numDependencyHorizontal);
8886     }
8887     else if (dependencyPattern == dependencyWavefrontVertical)
8888     {
8889         numDependencies = m_numDependencyVertical;
8890         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependencyVertical, m_dxWavefrontVertical, m_numDependencyVertical);
8891         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependencyVertical, m_dyWavefrontVertical, m_numDependencyVertical);
8892     }
8893     else if (dependencyPattern == dependencyWavefront45Degree)
8894     {
8895         numDependencies = m_numDependency45Degree;
8896         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45Degree, m_dxWavefront45Degree, m_numDependency45Degree);
8897         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45Degree, m_dyWavefront45Degree, m_numDependency45Degree);
8898     }
8899     else if (dependencyPattern == dependencyWavefront26Degree ||
8900              dependencyPattern == dependencyWavefront26DDegree)
8901     {
8902         numDependencies = m_numDependency26Degree;
8903         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26Degree, m_dxWavefront26Degree, m_numDependency26Degree);
8904         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26Degree, m_dyWavefront26Degree, m_numDependency26Degree);
8905     }
8906     else if (dependencyPattern == dependencyWavefront45XDegree)
8907     {
8908         numDependencies = m_numDependency45xDegree;
8909         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45xDegree, m_dxWavefront45xDegree, m_numDependency45xDegree);
8910         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45xDegree, m_dyWavefront45xDegree, m_numDependency45xDegree);
8911         numDependencies     = childThreadNumber + 2;
8912         scoreboardDeltaY[0] = childThreadNumber;
8913     }
8914     else if (dependencyPattern == dependencyWavefront26XDegree)
8915     {
8916         numDependencies = m_numDependency26xDegree;
8917         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26xDegree, m_dxWavefront26xDegree, m_numDependency26xDegree);
8918         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26xDegree, m_dyWavefront26xDegree, m_numDependency26xDegree);
8919         numDependencies     = childThreadNumber + 3;
8920         scoreboardDeltaY[0] = childThreadNumber;
8921     }
8922     else if ((dependencyPattern == dependencyWavefront45XDegreeAlt) ||
8923              (dependencyPattern == dependencyWavefront45XDDegree))
8924     {
8925         numDependencies = m_numDependency45xDegreeAlt;
8926         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45xDegreeAlt, m_dxWavefront45xDegreeAlt, m_numDependency45xDegreeAlt);
8927         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45xDegreeAlt, m_dyWavefront45xDegreeAlt, m_numDependency45xDegreeAlt);
8928         scoreboardDeltaY[0] = childThreadNumber;
8929     }
8930     else if ((dependencyPattern == dependencyWavefront26XDegreeAlt) ||
8931              (dependencyPattern == dependencyWavefront26XDDegree))
8932     {
8933         numDependencies = m_numDependency26xDegreeAlt;
8934         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26xDegreeAlt, m_dxWavefront26xDegreeAlt, m_numDependency26xDegreeAlt);
8935         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26xDegreeAlt, m_dyWavefront26xDegreeAlt, m_numDependency26xDegreeAlt);
8936         scoreboardDeltaY[0] = childThreadNumber;
8937     }
8938     else if (dependencyPattern == dependencyWavefront45XVp9Degree)
8939     {
8940         numDependencies = m_numDependency45xVp9Degree;
8941         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45xVp9Degree, m_dxWavefront45xVp9Degree, m_numDependency45xVp9Degree);
8942         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45xVp9Degree, m_dyWavefront45xVp9Degree, m_numDependency45xVp9Degree);
8943     }
8944     else if (dependencyPattern == dependencyWavefront26ZDegree)
8945     {
8946         numDependencies = m_numDependency26zDegree;
8947         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26zDegree, m_dxWavefront26zDegree, m_numDependency26zDegree);
8948         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26zDegree, m_dyWavefront26zDegree, m_numDependency26zDegree);
8949     }
8950     else if (dependencyPattern == dependencyWavefront26ZigDegree)
8951     {
8952         numDependencies = m_numDependency26ZigDegree;
8953         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26ZigDegree, m_dxWavefront26ZigDegree, m_numDependency26ZigDegree);
8954         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26ZigDegree, m_dyWavefront26ZigDegree, m_numDependency26ZigDegree);
8955     }
8956     else if (dependencyPattern == dependencyWavefront45DDegree)
8957     {
8958         numDependencies = m_numDependency45Degree;
8959         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45Degree, m_dxWavefront45Degree, m_numDependency45Degree);
8960         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45Degree, m_dyWavefront45Degree, m_numDependency45Degree);
8961     }
8962     else
8963     {
8964         numDependencies = m_numDependencyNone;
8965         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependencyNone, m_dxWavefrontNone, m_numDependencyNone);
8966         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependencyNone, m_dyWavefrontNone, m_numDependencyNone);
8967     }
8968 }
8969 
8970 // ========================================================================================
8971 // FUNCTION:        InitSWScoreboard
8972 // DESCRIPTION:        Initialize software scoreboard for a specific dependency pattern.
8973 // INPUTS:            scoreboardWidth - Width of scoreboard in Entries
8974 //                    scoreboardHeight - Height of scoreboard in Entries
8975 //                    dependencyPattern - The Enumeration of the Dependency Pattern
8976 // OUTPUTS:            scoreboard - Pointer to scoreboard in Memory
8977 // ========================================================================================
InitSWScoreboard(uint8_t * scoreboard,uint32_t scoreboardWidth,uint32_t scoreboardHeight,uint32_t dependencyPattern,char childThreadNumber)8978 void CodechalEncHevcStateG12::InitSWScoreboard(uint8_t *scoreboard, uint32_t scoreboardWidth, uint32_t scoreboardHeight, uint32_t dependencyPattern, char childThreadNumber)
8979 {
8980     // 1. Select Dependency Pattern
8981     uint8_t numDependencies = 0;
8982     char    scoreboardDeltaX[m_maxNumDependency];
8983     char    scoreboardDeltaY[m_maxNumDependency];
8984     memset(scoreboardDeltaX, 0, sizeof(scoreboardDeltaX));
8985     memset(scoreboardDeltaY, 0, sizeof(scoreboardDeltaY));
8986 
8987     SetDependency(numDependencies, scoreboardDeltaX, scoreboardDeltaY, dependencyPattern, childThreadNumber);
8988 
8989     // 2. Initialize scoreboard (CPU Based)
8990     int32_t   dependentLocationX = 0;
8991     int32_t   dependentLocationY = 0;
8992     uint32_t *scoreboardInDws    = (uint32_t *)scoreboard;
8993     int32_t   totalThreadNumber  = childThreadNumber + 1;
8994     for (int32_t y = 0; y < (int32_t)scoreboardHeight; y += totalThreadNumber)
8995     {
8996         for (int32_t x = 0; x < (int32_t)scoreboardWidth; x++)
8997         {
8998             scoreboardInDws[y * scoreboardWidth + x] = 0;
8999 
9000             // Add dependencies accordingly
9001             for (int32_t i = 0; i < numDependencies; i++)
9002             {
9003                 dependentLocationX = x + scoreboardDeltaX[i];
9004                 dependentLocationY = y + scoreboardDeltaY[i];
9005                 if ((dependentLocationX < 0) || (dependentLocationY < 0) ||
9006                     (dependentLocationX >= (int32_t)scoreboardWidth) ||
9007                     (dependentLocationY >= (int32_t)scoreboardHeight))
9008                 {
9009                     // Do not add dependency because thread does not exist
9010                 }
9011                 else
9012                 {
9013                     scoreboardInDws[y * scoreboardWidth + x] |= (1 << i);
9014                 }
9015             }  // End NumDep
9016         }      // End x
9017 
9018         for (int32_t n = y + 1; n < y + totalThreadNumber; n++)
9019         {
9020             for (int32_t k = 0; k < (int32_t)scoreboardWidth; k++)
9021             {
9022                 scoreboardInDws[n * scoreboardWidth + k] = scoreboardInDws[y * scoreboardWidth + k];
9023             }
9024         }
9025 
9026     }  // End y
9027 }
9028 
CreateMhwParams()9029 void CodechalEncHevcStateG12::CreateMhwParams()
9030 {
9031     m_sliceStateParams     = MOS_New(MHW_VDBOX_HEVC_SLICE_STATE_G12);
9032     m_pipeModeSelectParams = MOS_New(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12);
9033     m_pipeBufAddrParams    = MOS_New(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS_G12);
9034 }
9035 
CalculatePictureStateCommandSize()9036 MOS_STATUS CodechalEncHevcStateG12::CalculatePictureStateCommandSize()
9037 {
9038     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9039 
9040     CODECHAL_ENCODE_FUNCTION_ENTER;
9041 
9042     MHW_VDBOX_STATE_CMDSIZE_PARAMS_G12 stateCmdSizeParams;
9043     CODECHAL_ENCODE_CHK_STATUS_RETURN(
9044         m_hwInterface->GetHxxStateCommandSize(
9045             CODECHAL_ENCODE_MODE_HEVC,
9046             &m_defaultPictureStatesSize,
9047             &m_defaultPicturePatchListSize,
9048             &stateCmdSizeParams));
9049 
9050     return eStatus;
9051 }
9052 
AddHcpPipeBufAddrCmd(PMOS_COMMAND_BUFFER cmdBuffer)9053 MOS_STATUS CodechalEncHevcStateG12::AddHcpPipeBufAddrCmd(
9054     PMOS_COMMAND_BUFFER cmdBuffer)
9055 {
9056     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9057 
9058     CODECHAL_ENCODE_FUNCTION_ENTER;
9059 
9060     *m_pipeBufAddrParams = {};
9061     SetHcpPipeBufAddrParams(*m_pipeBufAddrParams);
9062 #ifdef _MMC_SUPPORTED
9063     m_mmcState->SetPipeBufAddr(m_pipeBufAddrParams);
9064 #endif
9065     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(cmdBuffer, m_pipeBufAddrParams));
9066 
9067     return eStatus;
9068 }
9069 
SetTileData(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 * tileCodingParams,uint32_t bitstreamBufSize)9070 MOS_STATUS CodechalEncHevcStateG12::SetTileData(
9071     MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 *tileCodingParams,
9072     uint32_t                              bitstreamBufSize)
9073 {
9074     CODECHAL_ENCODE_FUNCTION_ENTER;
9075 
9076     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9077 
9078     if (!m_hevcPicParams->tiles_enabled_flag)
9079     {
9080         return eStatus;
9081     }
9082 
9083     uint32_t colBd[100]       = {0};
9084     uint32_t num_tile_columns = m_hevcPicParams->num_tile_columns_minus1 + 1;
9085     for (uint32_t i = 0; i < num_tile_columns; i++)
9086     {
9087         colBd[i + 1] = colBd[i] + m_hevcPicParams->tile_column_width[i];
9088     }
9089 
9090     uint32_t rowBd[100]    = {0};
9091     uint32_t num_tile_rows = m_hevcPicParams->num_tile_rows_minus1 + 1;
9092     for (uint32_t i = 0; i < num_tile_rows; i++)
9093     {
9094         rowBd[i + 1] = rowBd[i] + m_hevcPicParams->tile_row_height[i];
9095     }
9096 
9097     m_numTiles = num_tile_rows * num_tile_columns;
9098 
9099     uint32_t const uiNumCuRecordTab[]  = {1, 4, 16, 64};  //LCU: 8x8->1, 16x16->4, 32x32->16, 64x64->64
9100     uint32_t       numCuRecord         = uiNumCuRecordTab[MOS_MIN(3, m_hevcSeqParams->log2_max_coding_block_size_minus3)];
9101     uint32_t       bitstreamByteOffset = 0, saoRowstoreOffset = 0, cuLevelStreamoutOffset = 0, sseRowstoreOffset = 0;
9102     int32_t        frameWidthInMinCb  = m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1;
9103     int32_t        frameHeightInMinCb = m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1;
9104     int32_t        shift              = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
9105     uint32_t       NumLCUInPic        = 0;
9106 
9107     for (uint32_t i = 0; i < num_tile_rows; i++)
9108     {
9109         for (uint32_t j = 0; j < num_tile_columns; j++)
9110         {
9111             NumLCUInPic += m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j];
9112         }
9113     }
9114 
9115     uint32_t numSliceInTile = 0;
9116     for (uint32_t uiNumLCUsInTiles = 0, i = 0; i < num_tile_rows; i++)
9117     {
9118         for (uint32_t j = 0; j < num_tile_columns; j++)
9119         {
9120             uint32_t idx          = i * num_tile_columns + j;
9121             uint32_t numLCUInTile = m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j];
9122 
9123             tileCodingParams[idx].TileStartLCUX = colBd[j];
9124             tileCodingParams[idx].TileStartLCUY = rowBd[i];
9125 
9126             tileCodingParams[idx].TileColumnStoreSelect = j % 2;
9127             tileCodingParams[idx].TileRowStoreSelect    = i % 2;
9128 
9129             if (j != num_tile_columns - 1)
9130             {
9131                 tileCodingParams[idx].TileWidthInMinCbMinus1 = (m_hevcPicParams->tile_column_width[j] << shift) - 1;
9132                 tileCodingParams[idx].IsLastTileofRow        = false;
9133             }
9134             else
9135             {
9136                 tileCodingParams[idx].TileWidthInMinCbMinus1 = (frameWidthInMinCb - (colBd[j] << shift)) - 1;
9137                 tileCodingParams[idx].IsLastTileofRow        = true;
9138             }
9139 
9140             if (i != num_tile_rows - 1)
9141             {
9142                 tileCodingParams[idx].IsLastTileofColumn      = false;
9143                 tileCodingParams[idx].TileHeightInMinCbMinus1 = (m_hevcPicParams->tile_row_height[i] << shift) - 1;
9144             }
9145             else
9146             {
9147                 tileCodingParams[idx].TileHeightInMinCbMinus1 = (frameHeightInMinCb - (rowBd[i] << shift)) - 1;
9148                 tileCodingParams[idx].IsLastTileofColumn      = true;
9149             }
9150 
9151             tileCodingParams[idx].NumOfTilesInFrame       = m_numTiles;
9152             tileCodingParams[idx].NumOfTileColumnsInFrame = num_tile_columns;
9153             tileCodingParams[idx].CuRecordOffset          = MOS_ALIGN_CEIL(((numCuRecord * uiNumLCUsInTiles) * m_hcpInterface->GetHevcEncCuRecordSize()),
9154                                                        CODECHAL_CACHELINE_SIZE) /
9155                                                    CODECHAL_CACHELINE_SIZE;
9156             tileCodingParams[idx].NumberOfActiveBePipes = (m_numPipe > 1) ? m_numPipe : 1;
9157 
9158             tileCodingParams[idx].PakTileStatisticsOffset              = m_sizeOfHcpPakFrameStats * idx / CODECHAL_CACHELINE_SIZE;
9159             tileCodingParams[idx].TileSizeStreamoutOffset              = idx;
9160             tileCodingParams[idx].Vp9ProbabilityCounterStreamoutOffset = 0;
9161             tileCodingParams[idx].presHcpSyncBuffer                    = &m_resHcpScalabilitySyncBuffer.sResource;
9162             tileCodingParams[idx].CuLevelStreamoutOffset               = cuLevelStreamoutOffset;
9163             tileCodingParams[idx].SliceSizeStreamoutOffset             = numSliceInTile;
9164             tileCodingParams[idx].SseRowstoreOffset                    = sseRowstoreOffset;
9165             tileCodingParams[idx].BitstreamByteOffset                  = bitstreamByteOffset;
9166             tileCodingParams[idx].SaoRowstoreOffset                    = saoRowstoreOffset;
9167 
9168             cuLevelStreamoutOffset += MOS_ALIGN_CEIL((tileCodingParams[idx].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[idx].TileHeightInMinCbMinus1 + 1) * 16, CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
9169             sseRowstoreOffset += ((m_hevcPicParams->tile_column_width[j] + 3) * m_sizeOfSseSrcPixelRowStoreBufferPerLcu) / CODECHAL_CACHELINE_SIZE;
9170             saoRowstoreOffset += (MOS_ALIGN_CEIL(m_hevcPicParams->tile_column_width[j], 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU) / CODECHAL_CACHELINE_SIZE;
9171             uint64_t totalSizeTemp        = (uint64_t)bitstreamBufSize * (uint64_t)numLCUInTile;
9172             uint32_t bitStreamSizePerTile = (uint32_t)(totalSizeTemp / (uint64_t)NumLCUInPic) + ((totalSizeTemp % (uint64_t)NumLCUInPic) ? 1 : 0);
9173             bitstreamByteOffset += MOS_ALIGN_CEIL(bitStreamSizePerTile, CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
9174             uiNumLCUsInTiles += numLCUInTile;
9175 
9176             for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
9177             {
9178                 bool lastSliceInTile = false, sliceInTile = false;
9179                 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
9180                     &tileCodingParams[idx],
9181                     &sliceInTile,
9182                     &lastSliceInTile));
9183                 numSliceInTile += (sliceInTile ? 1 : 0);
9184             }
9185         }
9186         // same row store buffer for different tile rows.
9187         saoRowstoreOffset = 0;
9188         sseRowstoreOffset = 0;
9189     }
9190 
9191     return eStatus;
9192 }
9193 
IsSliceInTile(uint32_t sliceNumber,PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 currentTile,bool * sliceInTile,bool * lastSliceInTile)9194 MOS_STATUS CodechalEncHevcStateG12::IsSliceInTile(
9195     uint32_t                              sliceNumber,
9196     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 currentTile,
9197     bool *                                sliceInTile,
9198     bool *                                lastSliceInTile)
9199 {
9200     CODECHAL_ENCODE_FUNCTION_ENTER;
9201 
9202     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9203 
9204     CODECHAL_ENCODE_CHK_NULL_RETURN(currentTile);
9205     CODECHAL_ENCODE_CHK_NULL_RETURN(sliceInTile);
9206     CODECHAL_ENCODE_CHK_NULL_RETURN(lastSliceInTile);
9207 
9208     uint32_t shift            = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
9209     uint32_t residual         = (1 << shift) - 1;
9210     uint32_t frameWidthInLCU  = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift;
9211     uint32_t frameHeightInLCU = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift;
9212 
9213     PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = &m_hevcSliceParams[sliceNumber];
9214     uint32_t                        sliceStartLCU = hevcSlcParams->slice_segment_address;
9215     uint32_t                        sliceLCUx     = sliceStartLCU % frameWidthInLCU;
9216     uint32_t                        sliceLCUy     = sliceStartLCU / frameWidthInLCU;
9217 
9218     uint32_t tile_column_width = (currentTile->TileWidthInMinCbMinus1 + 1 + residual) >> shift;
9219     uint32_t tile_row_height   = (currentTile->TileHeightInMinCbMinus1 + 1 + residual) >> shift;
9220     if (sliceLCUx < currentTile->TileStartLCUX ||
9221         sliceLCUy < currentTile->TileStartLCUY ||
9222         sliceLCUx >= currentTile->TileStartLCUX + tile_column_width ||
9223         sliceLCUy >= currentTile->TileStartLCUY + tile_row_height)
9224     {
9225         // slice start is not in the tile boundary
9226         *lastSliceInTile = *sliceInTile = false;
9227         return eStatus;
9228     }
9229 
9230     sliceLCUx += (hevcSlcParams->NumLCUsInSlice - 1) % tile_column_width;
9231     sliceLCUy += (hevcSlcParams->NumLCUsInSlice - 1) / tile_column_width;
9232 
9233     if (sliceLCUx >= currentTile->TileStartLCUX + tile_column_width)
9234     {
9235         sliceLCUx -= tile_column_width;
9236         sliceLCUy++;
9237     }
9238 
9239     if (sliceLCUx < currentTile->TileStartLCUX ||
9240         sliceLCUy < currentTile->TileStartLCUY ||
9241         sliceLCUx >= currentTile->TileStartLCUX + tile_column_width ||
9242         sliceLCUy >= currentTile->TileStartLCUY + tile_row_height)
9243     {
9244         // last LCU of the slice is out of the tile boundary
9245         *lastSliceInTile = *sliceInTile = false;
9246         return eStatus;
9247     }
9248 
9249     *sliceInTile = true;
9250 
9251     sliceLCUx++;
9252     sliceLCUy++;
9253 
9254     // the end of slice is at the boundary of tile
9255     *lastSliceInTile = (sliceLCUx == currentTile->TileStartLCUX + tile_column_width &&
9256                         sliceLCUy == currentTile->TileStartLCUY + tile_row_height);
9257 
9258     return eStatus;
9259 }
9260 
AddHcpRefIdxCmd(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_BATCH_BUFFER batchBuffer,PMHW_VDBOX_HEVC_SLICE_STATE params)9261 MOS_STATUS CodechalEncHevcStateG12::AddHcpRefIdxCmd(
9262     PMOS_COMMAND_BUFFER         cmdBuffer,
9263     PMHW_BATCH_BUFFER           batchBuffer,
9264     PMHW_VDBOX_HEVC_SLICE_STATE params)
9265 {
9266     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9267 
9268     CODECHAL_ENCODE_FUNCTION_ENTER;
9269 
9270     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
9271     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcSliceParams);
9272     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcPicParams);
9273 
9274     if (cmdBuffer == nullptr && batchBuffer == nullptr)
9275     {
9276         CODECHAL_ENCODE_ASSERTMESSAGE("There was no valid buffer to add the HW command to.");
9277         return MOS_STATUS_NULL_POINTER;
9278     }
9279 
9280     PCODEC_HEVC_ENCODE_PICTURE_PARAMS hevcPicParams = params->pEncodeHevcPicParams;
9281     PCODEC_HEVC_ENCODE_SLICE_PARAMS   hevcSlcParams = params->pEncodeHevcSliceParams;
9282 
9283     if (hevcSlcParams->slice_type != CODECHAL_ENCODE_HEVC_I_SLICE)
9284     {
9285         MHW_VDBOX_HEVC_REF_IDX_PARAMS_G12 refIdxParams;
9286 
9287         refIdxParams.CurrPic         = hevcPicParams->CurrReconstructedPic;
9288         refIdxParams.isEncode        = true;
9289         refIdxParams.ucList          = LIST_0;
9290         refIdxParams.ucNumRefForList = hevcSlcParams->num_ref_idx_l0_active_minus1 + 1;
9291         eStatus                      = MOS_SecureMemcpy(&refIdxParams.RefPicList, sizeof(refIdxParams.RefPicList), &hevcSlcParams->RefPicList, sizeof(hevcSlcParams->RefPicList));
9292         if (eStatus != MOS_STATUS_SUCCESS)
9293         {
9294             CODECHAL_ENCODE_ASSERTMESSAGE("Failed to copy memory.");
9295             return eStatus;
9296         }
9297 
9298         refIdxParams.hevcRefList  = (void **)m_refList;
9299         refIdxParams.poc_curr_pic = hevcPicParams->CurrPicOrderCnt;
9300         for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
9301         {
9302             refIdxParams.poc_list[i] = hevcPicParams->RefFramePOCList[i];
9303         }
9304 
9305         refIdxParams.pRefIdxMapping     = params->pRefIdxMapping;
9306         refIdxParams.RefFieldPicFlag    = 0;  // there is no interlaced support in encoder
9307         refIdxParams.RefBottomFieldFlag = 0;  // there is no interlaced support in encoder
9308 
9309         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpRefIdxStateCmd(cmdBuffer, batchBuffer, &refIdxParams));
9310 
9311         if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
9312         {
9313             refIdxParams.ucList          = LIST_1;
9314             refIdxParams.ucNumRefForList = hevcSlcParams->num_ref_idx_l1_active_minus1 + 1;
9315             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpRefIdxStateCmd(cmdBuffer, batchBuffer, &refIdxParams));
9316         }
9317     }
9318 
9319     return eStatus;
9320 }
9321 
SendPrologWithFrameTracking(PMOS_COMMAND_BUFFER cmdBuffer,bool frameTrackingRequested,MHW_MI_MMIOREGISTERS * mmioRegister)9322 MOS_STATUS CodechalEncHevcStateG12::SendPrologWithFrameTracking(
9323     PMOS_COMMAND_BUFFER   cmdBuffer,
9324     bool                  frameTrackingRequested,
9325     MHW_MI_MMIOREGISTERS *mmioRegister)
9326 {
9327     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9328 
9329     MOS_GPU_CONTEXT gpuContext = m_osInterface->pfnGetGpuContext(m_osInterface);
9330 
9331     MHW_MI_FORCE_WAKEUP_PARAMS forceWakeupParams;
9332     MOS_ZeroMemory(&forceWakeupParams, sizeof(MHW_MI_FORCE_WAKEUP_PARAMS));
9333     forceWakeupParams.bMFXPowerWellControl      = false;
9334     forceWakeupParams.bMFXPowerWellControlMask  = true;
9335     forceWakeupParams.bHEVCPowerWellControl     = true;
9336     forceWakeupParams.bHEVCPowerWellControlMask = true;
9337     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiForceWakeupCmd(
9338         cmdBuffer,
9339         &forceWakeupParams));
9340 
9341     if (UseRenderCommandBuffer())
9342     {
9343         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SendPrologWithFrameTracking(cmdBuffer, frameTrackingRequested, mmioRegister));
9344         return eStatus;
9345     }
9346 
9347 #ifdef _MMC_SUPPORTED
9348     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
9349     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SendPrologCmd(m_miInterface, cmdBuffer, gpuContext));
9350 #endif
9351 
9352     if (!IsLastPipe())
9353     {
9354         return eStatus;
9355     }
9356 
9357     PMOS_COMMAND_BUFFER commandBufferInUse;
9358     if (m_realCmdBuffer.pCmdBase)
9359     {
9360         commandBufferInUse = &m_realCmdBuffer;
9361     }
9362     else if (cmdBuffer && cmdBuffer->pCmdBase)
9363     {
9364         commandBufferInUse = cmdBuffer;
9365     }
9366     else
9367     {
9368         eStatus = MOS_STATUS_INVALID_PARAMETER;
9369         return eStatus;
9370     }
9371 
9372     // initialize command buffer attributes
9373     commandBufferInUse->Attributes.bTurboMode               = m_hwInterface->m_turboMode;
9374     commandBufferInUse->Attributes.dwNumRequestedEUSlices   = m_hwInterface->m_numRequestedEuSlices;
9375     commandBufferInUse->Attributes.dwNumRequestedSubSlices  = m_hwInterface->m_numRequestedSubSlices;
9376     commandBufferInUse->Attributes.dwNumRequestedEUs        = m_hwInterface->m_numRequestedEus;
9377     commandBufferInUse->Attributes.bValidPowerGatingRequest = true;
9378 
9379     if (frameTrackingRequested && m_frameTrackingEnabled)
9380     {
9381         commandBufferInUse->Attributes.bEnableMediaFrameTracking = true;
9382         commandBufferInUse->Attributes.resMediaFrameTrackingSurface =
9383             &m_encodeStatusBuf.resStatusBuffer;
9384         commandBufferInUse->Attributes.dwMediaFrameTrackingTag = m_storeData;
9385         // Set media frame tracking address offset(the offset from the encoder status buffer page)
9386         commandBufferInUse->Attributes.dwMediaFrameTrackingAddrOffset = 0;
9387     }
9388 
9389     MHW_GENERIC_PROLOG_PARAMS genericPrologParams;
9390     MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
9391     genericPrologParams.pOsInterface     = m_hwInterface->GetOsInterface();
9392     genericPrologParams.pvMiInterface    = m_hwInterface->GetMiInterface();
9393     genericPrologParams.bMmcEnabled      = m_mmcState ? m_mmcState->IsMmcEnabled() : false;
9394     genericPrologParams.dwStoreDataValue = m_storeData - 1;
9395 
9396     CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(commandBufferInUse, &genericPrologParams));
9397 
9398     return eStatus;
9399 }
9400 
ResizeOnResChange()9401 void CodechalEncHevcStateG12::ResizeOnResChange()
9402 {
9403     CODECHAL_ENCODE_FUNCTION_ENTER;
9404 
9405     CodechalEncoderState::ResizeOnResChange();
9406 
9407     // need to re-allocate surfaces according to resolution
9408     m_swScoreboardState->ReleaseResources();
9409 }
9410 
InitMmcState()9411 MOS_STATUS CodechalEncHevcStateG12::InitMmcState()
9412 {
9413     CODECHAL_ENCODE_FUNCTION_ENTER;
9414 #ifdef _MMC_SUPPORTED
9415     m_mmcState = MOS_New(CodechalMmcEncodeHevcG12, m_hwInterface, this);
9416     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
9417 #endif
9418     return MOS_STATUS_SUCCESS;
9419 }
9420 
9421 #if USE_CODECHAL_DEBUG_TOOL
9422 
9423 //MOS_STATUS CodechalEncHevcStateG12::CodecHal_DbgDumpHEVCMbEncCurbeG12(
9424 //    CodechalDebugInterface         *pDebugInterface,
9425 //    CODECHAL_MEDIA_STATE_TYPE       Function,
9426 //    PMOS_RESOURCE                   presDBuffer)
9427 //{
9428 //#define WRITE_CURBE_FIELD_TO_FILE(field) {\
9429 //    MOS_SecureStringPrint(sOutBuf, sizeof(sOutBuf), sizeof(sOutBuf), "field = %d\n", pCurbeData->field);\
9430 //    CodecHal_DbgAddStringToBufferNewLine(&FileParams, sOutBuf);}
9431 //
9432 //    PMOS_INTERFACE              m_osInterface = nullptr;
9433 //    PCCHAR                      pcFunction = nullptr;
9434 //    char                        sAttrib[125];
9435 //    char                        sOutBuf[MAX_FIELD_LENGTH];
9436 //    CODECHAL_DBG_FILE_PARAMS    FileParams;
9437 //    MOS_STATUS                  eStatus = MOS_STATUS_SUCCESS;
9438 //    MOS_LOCK_PARAMS             LockFlags;
9439 //    CodechalEncHevcStateG12::MBENC_COMBINED_BUFFER1 *pEncComBuf1 = nullptr;
9440 //
9441 //    CODECHAL_DEBUG_FUNCTION_ENTER;
9442 //
9443 //    CODECHAL_DEBUG_CHK_NULL(pDebugInterface);
9444 //    CODECHAL_DEBUG_CHK_NULL(pDebugInterface->pOsInterface);
9445 //    CODECHAL_DEBUG_CHK_NULL(pDebugInterface->pHwInterface);
9446 //    m_osInterface = pDebugInterface->pOsInterface;
9447 //
9448 //    pcFunction = CodecHal_DbgGetFunctionType(
9449 //        pDebugInterface, Function, DBG_CMD_BUFFER_DUMP_DEFAULT);
9450 //    CODECHAL_DEBUG_CHK_NULL(pcFunction);
9451 //
9452 //    MOS_SecureStringPrint(sAttrib, sizeof(sAttrib), sizeof(sAttrib), "%s%s", pcFunction, CODECHAL_DBG_STRING_CURBE);
9453 //
9454 //    MOS_ZeroMemory(&LockFlags, sizeof(MOS_LOCK_PARAMS));
9455 //    LockFlags.ReadOnly = 1;
9456 //
9457 //    pEncComBuf1 = (CodechalEncHevcStateG12::MBENC_COMBINED_BUFFER1*)m_osInterface->pfnLockResource(
9458 //        m_osInterface,
9459 //        presDBuffer,
9460 //        &LockFlags);
9461 //
9462 //    FileParams = g_cInitDbgFileParams;
9463 //
9464 //    if (!CodecHal_DbgAttribIsEnabled(pDebugInterface, sAttrib))
9465 //    {
9466 //        return eStatus;
9467 //    }
9468 //
9469 //    MOS_ZeroMemory(pDebugInterface->sPath, sizeof(pDebugInterface->sPath));
9470 //
9471 //    CODECHAL_DEBUG_CHK_STATUS(CodecHal_DbgConstructFilenameString(
9472 //        pDebugInterface,
9473 //        pcFunction,
9474 //        CODECHAL_DBG_STRING_CURBE,
9475 //        CODECHAL_DBG_STRING_TXT));
9476 //
9477 //    if (CodecHal_DbgAttribIsEnabled(pDebugInterface, CODECHAL_DBG_STRING_DUMPDATAINBINARY))
9478 //    {
9479 //        CODECHAL_DEBUG_CHK_STATUS(CodecHal_DbgDumpBufferInHexDwords(
9480 //            pDebugInterface,
9481 //            (uint8_t*)&pEncComBuf1->Curbe,
9482 //            sizeof(pEncComBuf1->Curbe)));
9483 //    }
9484 //    else
9485 //    {
9486 //        CodechalEncHevcStateG12::MBENC_CURBE* pCurbeData = &pEncComBuf1->Curbe;
9487 //
9488 //        FileParams.lRemaining = sizeof(char)* MAX_FIELD_LENGTH * MAX_NUM_ATTRIBUTES;
9489 //        FileParams.psWriteToFile = (char*)MOS_AllocAndZeroMemory(FileParams.lRemaining);
9490 //        CODECHAL_DEBUG_CHK_NULL(FileParams.psWriteToFile);
9491 //        FileParams.dwOffset = 0;
9492 //
9493 //        memset(sOutBuf, 0, sizeof(sOutBuf));
9494 //
9495 //        MOS_SecureStringPrint(sOutBuf, sizeof(sOutBuf), sizeof(sOutBuf), "# CURBE Parameters:");
9496 //        CodecHal_DbgAddStringToBufferNewLine(&FileParams, sOutBuf);
9497 //
9498 //        WRITE_CURBE_FIELD_TO_FILE(FrameWidthInSamples);
9499 //        WRITE_CURBE_FIELD_TO_FILE(FrameHeightInSamples);
9500 //
9501 //        WRITE_CURBE_FIELD_TO_FILE(Log2MaxCUSize);
9502 //        WRITE_CURBE_FIELD_TO_FILE(Log2MinCUSize);
9503 //        WRITE_CURBE_FIELD_TO_FILE(Log2MaxTUSize);
9504 //        WRITE_CURBE_FIELD_TO_FILE(Log2MinTUSize);
9505 //        WRITE_CURBE_FIELD_TO_FILE(MaxIntraRdeIter);
9506 //        WRITE_CURBE_FIELD_TO_FILE(QPType);
9507 //        WRITE_CURBE_FIELD_TO_FILE(MaxTransformDepthInter);
9508 //        WRITE_CURBE_FIELD_TO_FILE(MaxTransformDepthIntra);
9509 //        WRITE_CURBE_FIELD_TO_FILE(Log2ParallelMergeLevel);
9510 //
9511 //        WRITE_CURBE_FIELD_TO_FILE(CornerNeighborPixel);
9512 //        WRITE_CURBE_FIELD_TO_FILE(IntraNeighborAvailFlags);
9513 //        WRITE_CURBE_FIELD_TO_FILE(ChromaFormatType);
9514 //        WRITE_CURBE_FIELD_TO_FILE(SubPelMode);
9515 //        WRITE_CURBE_FIELD_TO_FILE(InterSADMeasure);
9516 //        WRITE_CURBE_FIELD_TO_FILE(IntraSADMeasure);
9517 //        WRITE_CURBE_FIELD_TO_FILE(IntraPrediction);
9518 //        WRITE_CURBE_FIELD_TO_FILE(RefIDCostMode);
9519 //        WRITE_CURBE_FIELD_TO_FILE(TUBasedCostSetting);
9520 //
9521 //        WRITE_CURBE_FIELD_TO_FILE(ExplictModeEn);
9522 //        WRITE_CURBE_FIELD_TO_FILE(AdaptiveEn);
9523 //        WRITE_CURBE_FIELD_TO_FILE(EarlyImeSuccessEn);
9524 //        WRITE_CURBE_FIELD_TO_FILE(IntraSpeedMode);
9525 //        WRITE_CURBE_FIELD_TO_FILE(IMECostCentersSel);
9526 //        WRITE_CURBE_FIELD_TO_FILE(RDEQuantRoundValue);
9527 //        WRITE_CURBE_FIELD_TO_FILE(IMERefWindowSize);
9528 //        WRITE_CURBE_FIELD_TO_FILE(IntraComputeType);
9529 //        WRITE_CURBE_FIELD_TO_FILE(Depth0IntraPredition);
9530 //        WRITE_CURBE_FIELD_TO_FILE(TUDepthControl);
9531 //        WRITE_CURBE_FIELD_TO_FILE(IntraTuRecFeedbackDisable);
9532 //        WRITE_CURBE_FIELD_TO_FILE(MergeListBiDisable);
9533 //        WRITE_CURBE_FIELD_TO_FILE(EarlyImeStop);
9534 //
9535 //        WRITE_CURBE_FIELD_TO_FILE(FrameQP);
9536 //        WRITE_CURBE_FIELD_TO_FILE(FrameQPSign);
9537 //        WRITE_CURBE_FIELD_TO_FILE(ConcurrentGroupNum);
9538 //        WRITE_CURBE_FIELD_TO_FILE(NumofUnitInWaveFront);
9539 //
9540 //        WRITE_CURBE_FIELD_TO_FILE(LoadBalenceEnable);
9541 //        WRITE_CURBE_FIELD_TO_FILE(NumberofMultiFrame);
9542 //        WRITE_CURBE_FIELD_TO_FILE(Degree45);
9543 //        WRITE_CURBE_FIELD_TO_FILE(Break12Dependency);
9544 //        WRITE_CURBE_FIELD_TO_FILE(ThreadNumber);
9545 //
9546 //        WRITE_CURBE_FIELD_TO_FILE(Pic_init_qp_B);
9547 //        WRITE_CURBE_FIELD_TO_FILE(Pic_init_qp_P);
9548 //        WRITE_CURBE_FIELD_TO_FILE(Pic_init_qp_I);
9549 //
9550 //        WRITE_CURBE_FIELD_TO_FILE(NumofRowTile);
9551 //        WRITE_CURBE_FIELD_TO_FILE(NumofColumnTile);
9552 //
9553 //        WRITE_CURBE_FIELD_TO_FILE(TransquantBypassEnableFlag);
9554 //        WRITE_CURBE_FIELD_TO_FILE(PCMEnabledFlag);
9555 //        WRITE_CURBE_FIELD_TO_FILE(CuQpDeltaEnabledFlag);
9556 //        WRITE_CURBE_FIELD_TO_FILE(Stepping);
9557 //        WRITE_CURBE_FIELD_TO_FILE(WaveFrontSplitsEnable);
9558 //        WRITE_CURBE_FIELD_TO_FILE(HMEFlag);
9559 //        WRITE_CURBE_FIELD_TO_FILE(SuperHME);
9560 //        WRITE_CURBE_FIELD_TO_FILE(UltraHME);
9561 //        WRITE_CURBE_FIELD_TO_FILE(Cu64SkipCheckOnly);
9562 //        WRITE_CURBE_FIELD_TO_FILE(EnableCu64Check);
9563 //        WRITE_CURBE_FIELD_TO_FILE(Cu642Nx2NCheckOnly);
9564 //        WRITE_CURBE_FIELD_TO_FILE(EnableCu64AmpCheck);
9565 //        WRITE_CURBE_FIELD_TO_FILE(DisablePIntra);
9566 //        WRITE_CURBE_FIELD_TO_FILE(DisableIntraTURec);
9567 //        WRITE_CURBE_FIELD_TO_FILE(InheritIntraModeFromTU0);
9568 //        WRITE_CURBE_FIELD_TO_FILE(CostScalingForRA);
9569 //        WRITE_CURBE_FIELD_TO_FILE(DisableIntraNxN);
9570 //
9571 //        WRITE_CURBE_FIELD_TO_FILE(MaxRefIdxL0);
9572 //        WRITE_CURBE_FIELD_TO_FILE(MaxRefIdxL1);
9573 //        WRITE_CURBE_FIELD_TO_FILE(MaxBRefIdxL0);
9574 //
9575 //        WRITE_CURBE_FIELD_TO_FILE(SkipEarlyTermination);
9576 //        WRITE_CURBE_FIELD_TO_FILE(SkipEarlyTermSize);
9577 //        WRITE_CURBE_FIELD_TO_FILE(Dynamic64Enable);
9578 //        WRITE_CURBE_FIELD_TO_FILE(Dynamic64Order);
9579 //        WRITE_CURBE_FIELD_TO_FILE(Dynamic64Th);
9580 //        WRITE_CURBE_FIELD_TO_FILE(DynamicOrderTh);
9581 //        WRITE_CURBE_FIELD_TO_FILE(PerBFrameQPOffset);
9582 //        WRITE_CURBE_FIELD_TO_FILE(IncreaseExitThresh);
9583 //        WRITE_CURBE_FIELD_TO_FILE(Dynamic64Min32);
9584 //        WRITE_CURBE_FIELD_TO_FILE(LastFrameIsIntra);
9585 //
9586 //        WRITE_CURBE_FIELD_TO_FILE(LenSP);
9587 //        WRITE_CURBE_FIELD_TO_FILE(MaxNumSU);
9588 //
9589 //        WRITE_CURBE_FIELD_TO_FILE(CostTableIndex);
9590 //
9591 //        WRITE_CURBE_FIELD_TO_FILE(SliceType);
9592 //        WRITE_CURBE_FIELD_TO_FILE(TemporalMvpEnableFlag);
9593 //        WRITE_CURBE_FIELD_TO_FILE(CollocatedFromL0Flag);
9594 //        WRITE_CURBE_FIELD_TO_FILE(theSameRefList);
9595 //        WRITE_CURBE_FIELD_TO_FILE(IsLowDelay);
9596 //        WRITE_CURBE_FIELD_TO_FILE(MaxNumMergeCand);
9597 //        WRITE_CURBE_FIELD_TO_FILE(NumRefIdxL0);
9598 //        WRITE_CURBE_FIELD_TO_FILE(NumRefIdxL1);
9599 //
9600 //        WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_0);
9601 //        WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_0);
9602 //        WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_1);
9603 //        WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_1);
9604 //
9605 //        WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_2);
9606 //        WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_2);
9607 //        WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_3);
9608 //        WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_3);
9609 //
9610 //        WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_4);
9611 //        WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_4);
9612 //        WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_5);
9613 //        WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_5);
9614 //
9615 //        WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_6);
9616 //        WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_6);
9617 //        WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_7);
9618 //        WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_7);
9619 //
9620 //        WRITE_CURBE_FIELD_TO_FILE(LongTermReferenceFlags_L0);
9621 //        WRITE_CURBE_FIELD_TO_FILE(LongTermReferenceFlags_L1);
9622 //
9623 //        WRITE_CURBE_FIELD_TO_FILE(RefFrameWinWidth);
9624 //        WRITE_CURBE_FIELD_TO_FILE(RefFrameWinHeight);
9625 //
9626 //        WRITE_CURBE_FIELD_TO_FILE(RoundingInter);
9627 //        WRITE_CURBE_FIELD_TO_FILE(RoundingIntra);
9628 //        WRITE_CURBE_FIELD_TO_FILE(MaxThreadWidth);
9629 //        WRITE_CURBE_FIELD_TO_FILE(MaxThreadHeight);
9630 //
9631 //        CODECHAL_DEBUG_CHK_STATUS(MosUtilities::MosWriteFileFromPtr(
9632 //            pDebugInterface->sPath,
9633 //            FileParams.psWriteToFile,
9634 //            FileParams.dwOffset));
9635 //    }
9636 //
9637 //finish:
9638 //    if (m_osInterface && pEncComBuf1)
9639 //    {
9640 //        m_osInterface->pfnUnlockResource(
9641 //            m_osInterface,
9642 //            presDBuffer);
9643 //    }
9644 //
9645 //    if (FileParams.psWriteToFile)
9646 //    {
9647 //        MOS_FreeMemory(FileParams.psWriteToFile);
9648 //    }
9649 //    return eStatus;
9650 //}
9651 
9652 #endif
VerifyCommandBufferSize()9653 MOS_STATUS CodechalEncHevcStateG12::VerifyCommandBufferSize()
9654 {
9655     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9656 
9657     CODECHAL_ENCODE_FUNCTION_ENTER;
9658 
9659     if (UseRenderCommandBuffer() || m_numPipe == 1)
9660     {
9661         // legacy mode & resize CommandBuffer Size for every BRC pass
9662         if (!m_singleTaskPhaseSupported)
9663         {
9664             CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
9665         }
9666         return eStatus;
9667     }
9668 
9669     // virtual engine
9670     uint32_t requestedSize =
9671         m_pictureStatesSize +
9672         m_extraPictureStatesSize +
9673         (m_sliceStatesSize * m_numSlices);
9674 
9675     requestedSize += (requestedSize * m_numPassesInOnePipe + m_hucCommandsSize);
9676 
9677     // Running in the multiple VDBOX mode
9678     int currentPipe = GetCurrentPipe();
9679     if (currentPipe < 0 || currentPipe >= m_numPipe)
9680     {
9681         eStatus = MOS_STATUS_INVALID_PARAMETER;
9682         return eStatus;
9683     }
9684     int currentPass = GetCurrentPass();
9685     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
9686     {
9687         eStatus = MOS_STATUS_INVALID_PARAMETER;
9688         return eStatus;
9689     }
9690 
9691     if (IsFirstPipe() && m_osInterface->bUsesPatchList)
9692     {
9693         CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
9694     }
9695 
9696     PMOS_COMMAND_BUFFER pCmdBuffer = m_singleTaskPhaseSupported ? &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass];
9697 
9698     if (Mos_ResourceIsNull(&pCmdBuffer->OsResource) ||
9699         m_sizeOfVeBatchBuffer < requestedSize)
9700     {
9701         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
9702 
9703         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
9704         allocParamsForBufferLinear.Type     = MOS_GFXRES_BUFFER;
9705         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
9706         allocParamsForBufferLinear.Format   = Format_Buffer;
9707         allocParamsForBufferLinear.dwBytes  = requestedSize;
9708         allocParamsForBufferLinear.pBufName = "Batch buffer for each VDBOX";
9709 
9710         if (!Mos_ResourceIsNull(&pCmdBuffer->OsResource))
9711         {
9712             if (pCmdBuffer->pCmdBase)
9713             {
9714                 m_osInterface->pfnUnlockResource(m_osInterface, &pCmdBuffer->OsResource);
9715             }
9716             m_osInterface->pfnFreeResource(m_osInterface, &pCmdBuffer->OsResource);
9717         }
9718 
9719         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
9720             m_osInterface,
9721             &allocParamsForBufferLinear,
9722             &pCmdBuffer->OsResource));
9723 
9724         m_sizeOfVeBatchBuffer = requestedSize;
9725     }
9726 
9727     if (pCmdBuffer->pCmdBase == nullptr)
9728     {
9729         MOS_LOCK_PARAMS lockParams;
9730         MOS_ZeroMemory(&lockParams, sizeof(lockParams));
9731         lockParams.WriteOnly = true;
9732         pCmdBuffer->pCmdPtr = pCmdBuffer->pCmdBase = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &pCmdBuffer->OsResource, &lockParams);
9733         pCmdBuffer->iRemaining                     = m_sizeOfVeBatchBuffer;
9734         pCmdBuffer->iOffset                        = 0;
9735 
9736         if (pCmdBuffer->pCmdBase == nullptr)
9737         {
9738             eStatus = MOS_STATUS_NULL_POINTER;
9739             return eStatus;
9740         }
9741     }
9742 
9743     return eStatus;
9744 }
9745 
GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)9746 MOS_STATUS CodechalEncHevcStateG12::GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)
9747 {
9748     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9749 
9750     CODECHAL_ENCODE_FUNCTION_ENTER;
9751 
9752     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
9753     CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
9754 
9755     if (UseRenderCommandBuffer() || m_numPipe == 1)
9756     {
9757         // legacy mode
9758         m_realCmdBuffer.pCmdBase = m_realCmdBuffer.pCmdPtr = nullptr;
9759         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, 0));
9760         return eStatus;
9761     }
9762 
9763     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &m_realCmdBuffer, 0));
9764 
9765     int currentPipe = GetCurrentPipe();
9766     if (currentPipe < 0 || currentPipe >= m_numPipe)
9767     {
9768         eStatus = MOS_STATUS_INVALID_PARAMETER;
9769         return eStatus;
9770     }
9771     int currentPass = GetCurrentPass();
9772     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
9773     {
9774         eStatus = MOS_STATUS_INVALID_PARAMETER;
9775         return eStatus;
9776     }
9777 
9778     *cmdBuffer = m_singleTaskPhaseSupported ? m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass];
9779 
9780     if (m_osInterface->osCpInterface->IsCpEnabled() && cmdBuffer->iOffset == 0)
9781     {
9782         // Insert CP Prolog
9783         CODECHAL_ENCODE_NORMALMESSAGE("Adding cp prolog for secure scalable encode");
9784         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, cmdBuffer));
9785     }
9786     return eStatus;
9787 }
9788 
ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)9789 MOS_STATUS CodechalEncHevcStateG12::ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)
9790 {
9791     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9792 
9793     CODECHAL_ENCODE_FUNCTION_ENTER;
9794 
9795     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
9796 
9797     if (UseRenderCommandBuffer() || m_numPipe == 1)
9798     {
9799         // legacy mode
9800         m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, 0);
9801         return eStatus;
9802     }
9803 
9804     int currentPipe = GetCurrentPipe();
9805     if (currentPipe < 0 || currentPipe >= m_numPipe)
9806     {
9807         eStatus = MOS_STATUS_INVALID_PARAMETER;
9808         return eStatus;
9809     }
9810     int currentPass = GetCurrentPass();
9811     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
9812     {
9813         eStatus = MOS_STATUS_INVALID_PARAMETER;
9814         return eStatus;
9815     }
9816     uint8_t passIndex                                               = m_singleTaskPhaseSupported ? 0 : currentPass;
9817     m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][passIndex] = *cmdBuffer;
9818     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
9819 
9820     return eStatus;
9821 }
9822 
SubmitCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer,bool bNullRendering)9823 MOS_STATUS CodechalEncHevcStateG12::SubmitCommandBuffer(
9824     PMOS_COMMAND_BUFFER cmdBuffer,
9825     bool                bNullRendering)
9826 {
9827     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9828 
9829     CODECHAL_ENCODE_FUNCTION_ENTER;
9830 
9831     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
9832 
9833     if (UseRenderCommandBuffer() || m_numPipe == 1)
9834     {
9835         // legacy mode
9836         if (!UseRenderCommandBuffer())  // Set VE Hints for video contexts only
9837         {
9838             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(cmdBuffer));
9839         }
9840 
9841         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, bNullRendering));
9842         return eStatus;
9843     }
9844 
9845     bool cmdBufferReadyForSubmit = IsLastPipe();
9846 
9847     // In STF, Hold the command buffer submission till last pass
9848     if (m_singleTaskPhaseSupported)
9849     {
9850         cmdBufferReadyForSubmit = cmdBufferReadyForSubmit && IsLastPass();
9851     }
9852 
9853     if (!cmdBufferReadyForSubmit)
9854     {
9855         return eStatus;
9856     }
9857 
9858     int currentPass = GetCurrentPass();
9859     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
9860     {
9861         eStatus = MOS_STATUS_INVALID_PARAMETER;
9862         return eStatus;
9863     }
9864     uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
9865 
9866     for (uint32_t i = 0; i < m_numPipe; i++)
9867     {
9868         PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex];
9869 
9870         if (cmdBuffer->pCmdBase)
9871         {
9872             m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
9873         }
9874 
9875         cmdBuffer->pCmdBase = 0;
9876         cmdBuffer->iOffset = cmdBuffer->iRemaining = 0;
9877     }
9878     m_sizeOfVeBatchBuffer = 0;
9879 
9880     if (eStatus == MOS_STATUS_SUCCESS)
9881     {
9882         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(&m_realCmdBuffer));
9883         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
9884     }
9885 
9886     return eStatus;
9887 }
9888 
SetSliceStructs()9889 MOS_STATUS CodechalEncHevcStateG12::SetSliceStructs()
9890 {
9891     MOS_STATUS eStatus   = MOS_STATUS_SUCCESS;
9892     eStatus              = CodechalEncodeHevcBase::SetSliceStructs();
9893     m_numPassesInOnePipe = m_numPasses;
9894     m_numPasses          = (m_numPasses + 1) * m_numPipe - 1;
9895     return eStatus;
9896 }
9897 
AllocateTileStatistics()9898 MOS_STATUS CodechalEncHevcStateG12::AllocateTileStatistics()
9899 {
9900     CODECHAL_ENCODE_FUNCTION_ENTER;
9901 
9902     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9903 
9904     if (!m_hevcPicParams->tiles_enabled_flag)
9905     {
9906         return eStatus;
9907     }
9908 
9909     auto num_tile_rows    = m_hevcPicParams->num_tile_rows_minus1 + 1;
9910     auto num_tile_columns = m_hevcPicParams->num_tile_columns_minus1 + 1;
9911     auto num_tiles        = num_tile_rows * num_tile_columns;
9912 
9913     MOS_ZeroMemory(&m_hevcFrameStatsOffset, sizeof(HEVC_TILE_STATS_INFO));
9914     MOS_ZeroMemory(&m_hevcTileStatsOffset, sizeof(HEVC_TILE_STATS_INFO));
9915     MOS_ZeroMemory(&m_hevcStatsSize, sizeof(HEVC_TILE_STATS_INFO));
9916 
9917     MOS_LOCK_PARAMS lockFlagsWriteOnly;
9918     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
9919     lockFlagsWriteOnly.WriteOnly = true;
9920 
9921     // Set the maximum size based on frame level statistics.
9922     m_hevcStatsSize.uiTileSizeRecord     = CODECHAL_CACHELINE_SIZE;
9923     m_hevcStatsSize.uiHevcPakStatistics  = m_sizeOfHcpPakFrameStats;
9924     m_hevcStatsSize.uiVdencStatistics    = 0;
9925     m_hevcStatsSize.uiHevcSliceStreamout = CODECHAL_CACHELINE_SIZE;
9926 
9927     // Maintain the offsets to use for patching addresses in to the HuC Pak Integration kernel Aggregated Frame Statistics Output Buffer
9928     // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions
9929     m_hevcFrameStatsOffset.uiTileSizeRecord     = 0;  // Tile Size Record is not present in resHuCPakAggregatedFrameStatsBuffer
9930     m_hevcFrameStatsOffset.uiHevcPakStatistics  = 0;
9931     m_hevcFrameStatsOffset.uiVdencStatistics    = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcPakStatistics + m_hevcStatsSize.uiHevcPakStatistics, CODECHAL_PAGE_SIZE);
9932     m_hevcFrameStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiVdencStatistics + m_hevcStatsSize.uiVdencStatistics, CODECHAL_PAGE_SIZE);
9933 
9934     // Frame level statistics
9935     m_hwInterface->m_pakIntAggregatedFrameStatsSize = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcSliceStreamout + (m_hevcStatsSize.uiHevcSliceStreamout * CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6), CODECHAL_PAGE_SIZE);
9936 
9937     // HEVC Frame Statistics Buffer - Output from HuC PAK Integration kernel
9938     if (Mos_ResourceIsNull(&m_resHuCPakAggregatedFrameStatsBuffer.sResource))
9939     {
9940         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
9941         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
9942         allocParamsForBufferLinear.Type     = MOS_GFXRES_BUFFER;
9943         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
9944         allocParamsForBufferLinear.Format   = Format_Buffer;
9945         allocParamsForBufferLinear.dwBytes  = m_hwInterface->m_pakIntAggregatedFrameStatsSize;
9946         allocParamsForBufferLinear.pBufName = "GEN11 HCP Aggregated Frame Statistics Streamout Buffer";
9947 
9948         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
9949             m_osInterface,
9950             &allocParamsForBufferLinear,
9951             &m_resHuCPakAggregatedFrameStatsBuffer.sResource));
9952         m_resHuCPakAggregatedFrameStatsBuffer.dwSize = m_hwInterface->m_pakIntAggregatedFrameStatsSize;
9953 
9954         uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource(
9955             m_osInterface,
9956             &m_resHuCPakAggregatedFrameStatsBuffer.sResource,
9957             &lockFlagsWriteOnly);
9958 
9959         CODECHAL_ENCODE_CHK_NULL_RETURN(pData);
9960         MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes);
9961         m_osInterface->pfnUnlockResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource);
9962     }
9963 
9964     // Maintain the offsets to use for patching addresses in to the Tile Based Statistics Buffer
9965     // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions
9966     m_hevcTileStatsOffset.uiTileSizeRecord     = 0;  // TileReord is in a separated resource
9967     m_hevcTileStatsOffset.uiHevcPakStatistics  = 0;  // PakStaticstics is head of m_resTileBasedStatisticsBuffer
9968     m_hevcTileStatsOffset.uiVdencStatistics    = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcPakStatistics + (m_hevcStatsSize.uiHevcPakStatistics * num_tiles), CODECHAL_PAGE_SIZE);
9969     m_hevcTileStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiVdencStatistics + (m_hevcStatsSize.uiVdencStatistics * num_tiles), CODECHAL_PAGE_SIZE);
9970     // Combined statistics size for all tiles
9971     m_hwInterface->m_pakIntTileStatsSize = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcSliceStreamout + m_hevcStatsSize.uiHevcSliceStreamout * CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6, CODECHAL_PAGE_SIZE);
9972 
9973     // Tile size record size for all tiles
9974     m_hwInterface->m_tileRecordSize = m_hevcStatsSize.uiTileSizeRecord * num_tiles;
9975 
9976     if (Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource) || m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_pakIntTileStatsSize)
9977     {
9978         if (!Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource))
9979         {
9980             m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource);
9981         }
9982         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
9983         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
9984         allocParamsForBufferLinear.Type     = MOS_GFXRES_BUFFER;
9985         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
9986         allocParamsForBufferLinear.Format   = Format_Buffer;
9987         allocParamsForBufferLinear.dwBytes  = m_hwInterface->m_pakIntTileStatsSize;
9988         allocParamsForBufferLinear.pBufName = "GEN11 HCP Tile Level Statistics Streamout Buffer";
9989 
9990         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
9991             m_osInterface,
9992             &allocParamsForBufferLinear,
9993             &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource));
9994         m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize = m_hwInterface->m_pakIntTileStatsSize;
9995 
9996         uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource(
9997             m_osInterface,
9998             &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
9999             &lockFlagsWriteOnly);
10000         CODECHAL_ENCODE_CHK_NULL_RETURN(pData);
10001 
10002         MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes);
10003         m_osInterface->pfnUnlockResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource);
10004     }
10005 
10006     if (Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource) || m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_tileRecordSize)
10007     {
10008         if (!Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource))
10009         {
10010             m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource);
10011         }
10012         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
10013         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
10014         allocParamsForBufferLinear.Type     = MOS_GFXRES_BUFFER;
10015         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
10016         allocParamsForBufferLinear.Format   = Format_Buffer;
10017         allocParamsForBufferLinear.dwBytes  = m_hwInterface->m_tileRecordSize;
10018         allocParamsForBufferLinear.pBufName = "Tile Record Buffer";
10019 
10020         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
10021             m_osInterface,
10022             &allocParamsForBufferLinear,
10023             &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource));
10024         m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize = m_hwInterface->m_tileRecordSize;
10025 
10026         uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
10027             m_osInterface,
10028             &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource,
10029             &lockFlagsWriteOnly);
10030         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
10031 
10032         MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
10033         m_osInterface->pfnUnlockResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource);
10034     }
10035 
10036     return eStatus;
10037 }
10038 
SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams)10039 void CodechalEncHevcStateG12::SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS &pipeBufAddrParams)
10040 {
10041     CODECHAL_ENCODE_FUNCTION_ENTER;
10042 
10043     CodechalEncodeHevcBase::SetHcpPipeBufAddrParams(pipeBufAddrParams);
10044 
10045     // SAO Row Store is GEN12 specific
10046     pipeBufAddrParams.presSaoRowStoreBuffer = &m_SAORowStoreBuffer;
10047 
10048     PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex];
10049     if (!Mos_ResourceIsNull(&tileStatisticsBuffer->sResource) && (m_numPipe > 1))
10050     {
10051         pipeBufAddrParams.presLcuBaseAddressBuffer     = &tileStatisticsBuffer->sResource;
10052         pipeBufAddrParams.dwLcuStreamOutOffset         = m_hevcTileStatsOffset.uiHevcSliceStreamout;
10053         pipeBufAddrParams.presFrameStatStreamOutBuffer = &tileStatisticsBuffer->sResource;
10054         pipeBufAddrParams.dwFrameStatStreamOutOffset   = m_hevcTileStatsOffset.uiHevcPakStatistics;
10055     }
10056 }
10057 
ReadSseStatistics(PMOS_COMMAND_BUFFER cmdBuffer)10058 MOS_STATUS CodechalEncHevcStateG12::ReadSseStatistics(PMOS_COMMAND_BUFFER cmdBuffer)
10059 {
10060     CODECHAL_ENCODE_FUNCTION_ENTER;
10061 
10062     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10063 
10064     if (!m_sseEnabled)
10065     {
10066         return eStatus;
10067     }
10068 
10069     // encodeStatus is offset by 2 DWs in the resource
10070     uint32_t sseOffsetinBytes = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2 + m_encodeStatusBuf.dwSumSquareErrorOffset;
10071     for (auto i = 0; i < 6; i++)  // 64 bit SSE values for luma/ chroma channels need to be copied
10072     {
10073         MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
10074         MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
10075         miCpyMemMemParams.presSrc     = m_hevcPicParams->tiles_enabled_flag && (m_numPipe > 1) ? &m_resHuCPakAggregatedFrameStatsBuffer.sResource : &m_resFrameStatStreamOutBuffer;
10076         miCpyMemMemParams.dwSrcOffset = (HEVC_PAK_STATISTICS_SSE_OFFSET + i) * sizeof(uint32_t);  // SSE luma offset is located at DW32 in Frame statistics, followed by chroma
10077         miCpyMemMemParams.presDst     = &m_encodeStatusBuf.resStatusBuffer;
10078         miCpyMemMemParams.dwDstOffset = sseOffsetinBytes + i * sizeof(uint32_t);
10079         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
10080     }
10081     return eStatus;
10082 }
10083 
SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS & indObjBaseAddrParams)10084 void CodechalEncHevcStateG12::SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS &indObjBaseAddrParams)
10085 {
10086     PCODECHAL_ENCODE_BUFFER tileRecordBuffer    = &m_tileRecordBuffer[m_virtualEngineBbIndex];
10087     bool                    useTileRecordBuffer = !Mos_ResourceIsNull(&tileRecordBuffer->sResource);
10088 
10089     MOS_ZeroMemory(&indObjBaseAddrParams, sizeof(indObjBaseAddrParams));
10090     indObjBaseAddrParams.Mode                        = CODECHAL_ENCODE_MODE_HEVC;
10091     indObjBaseAddrParams.presMvObjectBuffer          = IsPanicModePass() ? &m_skipFrameInfo.m_resMbCodeSkipFrameSurface : &m_resMbCodeSurface;
10092     indObjBaseAddrParams.dwMvObjectOffset            = m_mvOffset;
10093     indObjBaseAddrParams.dwMvObjectSize              = m_mbCodeSize - m_mvOffset;
10094     indObjBaseAddrParams.presPakBaseObjectBuffer     = &m_resBitstreamBuffer;
10095     indObjBaseAddrParams.dwPakBaseObjectSize         = m_bitstreamUpperBound;
10096     indObjBaseAddrParams.presPakTileSizeStasBuffer   = useTileRecordBuffer ? &tileRecordBuffer->sResource : nullptr;
10097     indObjBaseAddrParams.dwPakTileSizeStasBufferSize = useTileRecordBuffer ? m_hwInterface->m_tileRecordSize : 0;
10098     indObjBaseAddrParams.dwPakTileSizeRecordOffset   = useTileRecordBuffer ? m_hevcTileStatsOffset.uiTileSizeRecord : 0;
10099 }
10100 
UpdateCmdBufAttribute(PMOS_COMMAND_BUFFER cmdBuffer,bool renderEngineInUse)10101 MOS_STATUS CodechalEncHevcStateG12::UpdateCmdBufAttribute(
10102     PMOS_COMMAND_BUFFER cmdBuffer,
10103     bool                renderEngineInUse)
10104 {
10105     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10106 
10107     // should not be there. Will remove it in the next change
10108     CODECHAL_ENCODE_FUNCTION_ENTER;
10109     if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe)
10110     {
10111         PMOS_CMD_BUF_ATTRI_VE attriExt =
10112             (PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe);
10113 
10114         memset(attriExt, 0, sizeof(MOS_CMD_BUF_ATTRI_VE));
10115         attriExt->bUseVirtualEngineHint =
10116             attriExt->VEngineHintParams.NeedSyncWithPrevious = !renderEngineInUse;
10117     }
10118 
10119     return eStatus;
10120 }
10121 
SetAndPopulateVEHintParams(PMOS_COMMAND_BUFFER cmdBuffer)10122 MOS_STATUS CodechalEncHevcStateG12::SetAndPopulateVEHintParams(
10123     PMOS_COMMAND_BUFFER cmdBuffer)
10124 {
10125     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10126 
10127     CODECHAL_ENCODE_FUNCTION_ENTER;
10128 
10129     if (!MOS_VE_SUPPORTED(m_osInterface))
10130     {
10131         return eStatus;
10132     }
10133 
10134     CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS scalSetParms;
10135     MOS_ZeroMemory(&scalSetParms, sizeof(CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS));
10136 
10137     if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
10138     {
10139         scalSetParms.bNeedSyncWithPrevious = true;
10140     }
10141 
10142     if (m_numPipe >= 2)
10143     {
10144         int32_t currentPass = GetCurrentPass();
10145         if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
10146         {
10147             eStatus = MOS_STATUS_INVALID_PARAMETER;
10148             return eStatus;
10149         }
10150 
10151         uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
10152         for (auto i = 0; i < m_numPipe; i++)
10153         {
10154             scalSetParms.veBatchBuffer[i] = m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex].OsResource;
10155         }
10156     }
10157 
10158     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_SetHintParams(this, m_scalabilityState, &scalSetParms));
10159     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
10160     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_PopulateHintParams(m_scalabilityState, cmdBuffer));
10161 
10162     return eStatus;
10163 }
10164 
AddMediaVfeCmd(PMOS_COMMAND_BUFFER cmdBuffer,SendKernelCmdsParams * params)10165 MOS_STATUS CodechalEncHevcStateG12::AddMediaVfeCmd(
10166     PMOS_COMMAND_BUFFER   cmdBuffer,
10167     SendKernelCmdsParams *params)
10168 {
10169     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
10170 
10171     MHW_VFE_PARAMS_G12 vfeParams       = {};
10172     vfeParams.pKernelState             = params->pKernelState;
10173     vfeParams.eVfeSliceDisable         = MHW_VFE_SLICE_ALL;
10174     vfeParams.dwMaximumNumberofThreads = m_encodeVfeMaxThreads;
10175     vfeParams.bFusedEuDispatch         = false;  // legacy mode
10176 
10177     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaVfeCmd(cmdBuffer, &vfeParams));
10178 
10179     return MOS_STATUS_SUCCESS;
10180 }
10181 
10182 #if USE_CODECHAL_DEBUG_TOOL
DumpFrameStatsBuffer(CodechalDebugInterface * debugInterface)10183 MOS_STATUS CodechalEncHevcStateG12::DumpFrameStatsBuffer(CodechalDebugInterface *debugInterface)
10184 {
10185     CODECHAL_ENCODE_CHK_NULL_RETURN(debugInterface);
10186 
10187     PMOS_RESOURCE resBuffer = &m_resFrameStatStreamOutBuffer;
10188     uint32_t      offset    = 0;
10189     uint32_t      num_tiles = 1;
10190     //In scalable mode, HEVC PAK Frame Statistics gets dumped out for each tile
10191     if (m_numPipe > 1)
10192     {
10193         resBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;
10194         offset    = m_hevcTileStatsOffset.uiHevcPakStatistics;
10195         num_tiles = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1);
10196     }
10197     uint32_t size = MOS_ALIGN_CEIL(m_sizeOfHcpPakFrameStats * num_tiles, CODECHAL_CACHELINE_SIZE);
10198 
10199     CODECHAL_ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
10200         resBuffer,
10201         CodechalDbgAttr::attrFrameState,
10202         "FrameStatus",
10203         size,
10204         offset,
10205         CODECHAL_NUM_MEDIA_STATES));
10206 
10207     return MOS_STATUS_SUCCESS;
10208 }
10209 
DumpPakOutput()10210 MOS_STATUS CodechalEncHevcStateG12::DumpPakOutput()
10211 {
10212     std::string currPassName = "PAK_PASS" + std::to_string((int)m_currPass);
10213 
10214     CODECHAL_DEBUG_TOOL(
10215         int32_t currentPass = GetCurrentPass();
10216         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
10217             &m_resPakcuLevelStreamoutData.sResource,
10218             CodechalDbgAttr::attrCUStreamout,
10219             currPassName.data(),
10220             m_resPakcuLevelStreamoutData.dwSize,
10221             0,
10222             CODECHAL_NUM_MEDIA_STATES));
10223         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
10224             &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
10225             CodechalDbgAttr::attrTileBasedStats,
10226             currPassName.data(),
10227             m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize,
10228             0,
10229             CODECHAL_NUM_MEDIA_STATES));
10230         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
10231             &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite],
10232             CodechalDbgAttr::attrBrcPakStats,
10233             currPassName.data(),
10234             m_hevcBrcPakStatisticsSize,
10235             0,
10236             CODECHAL_NUM_MEDIA_STATES));
10237         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
10238             &m_HucStitchCmdBatchBuffer.OsResource,
10239             CodechalDbgAttr::attr2ndLvlBatchMfx,
10240             currPassName.data(),
10241             m_hwInterface->m_HucStitchCmdBatchBufferSize,
10242             0,
10243             CODECHAL_NUM_MEDIA_STATES));
10244         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
10245             &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass],
10246             CodechalDbgAttr::attrHuCStitchDataBuf,
10247             currPassName.data(),
10248             sizeof(HucCommandData),
10249             0,
10250             CODECHAL_NUM_MEDIA_STATES));
10251         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
10252             &m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass],
10253             sizeof(HucPakStitchDmemEncG12),
10254             currentPass,
10255             hucRegionDumpPakIntegrate));)
10256 
10257     return MOS_STATUS_SUCCESS;
10258 }
10259 #endif
10260 
EncodeMeKernel()10261 MOS_STATUS CodechalEncHevcStateG12::EncodeMeKernel()
10262 {
10263     CODECHAL_ENCODE_FUNCTION_ENTER;
10264 
10265     if (m_hmeKernel && m_hmeKernel->Is4xMeEnabled())
10266     {
10267         CodechalKernelHme::CurbeParam curbeParam;
10268         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMeCurbeParams(curbeParam));
10269 
10270         CodechalKernelHme::SurfaceParams surfaceParam;
10271         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMeSurfaceParams(surfaceParam));
10272 
10273         m_hmeKernel->setnoMEKernelForPFrame(m_lowDelay);
10274 
10275         if (m_hmeKernel->Is16xMeEnabled())
10276         {
10277             if (m_hmeKernel->Is32xMeEnabled())
10278             {
10279                 surfaceParam.downScaledWidthInMb         = m_downscaledWidthInMb32x;
10280                 surfaceParam.downScaledHeightInMb        = m_downscaledFrameFieldHeightInMb32x;
10281                 surfaceParam.downScaledBottomFieldOffset = m_scaled32xBottomFieldOffset;
10282                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel32x));
10283             }
10284             surfaceParam.downScaledWidthInMb         = m_downscaledWidthInMb16x;
10285             surfaceParam.downScaledHeightInMb        = m_downscaledFrameFieldHeightInMb16x;
10286             surfaceParam.downScaledBottomFieldOffset = m_scaled16xBottomFieldOffset;
10287             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel16x));
10288         }
10289         surfaceParam.downScaledWidthInMb         = m_downscaledWidthInMb4x;
10290         surfaceParam.downScaledHeightInMb        = m_downscaledFrameFieldHeightInMb4x;
10291         surfaceParam.downScaledBottomFieldOffset = m_scaledBottomFieldOffset;
10292         surfaceParam.meBrcDistortionSurface      = m_brcBuffers.meBrcDistortionSurface;
10293 
10294         curbeParam.sumMVThreshold = m_sumMVThreshold;
10295 
10296         m_lastTaskInPhase = true;
10297 
10298         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel4x));
10299     }
10300 
10301     return MOS_STATUS_SUCCESS;
10302 }
10303 
ResizeBufferOffset()10304 void CodechalEncHevcStateG12::ResizeBufferOffset()
10305 {
10306     CODECHAL_ENCODE_FUNCTION_ENTER;
10307 
10308     uint32_t size = 0;
10309     uint32_t numLcu64 = m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 64 / 64;
10310     MBENC_COMBINED_BUFFER2 fixedBuf;
10311 
10312     //Re-Calculate m_encBCombinedBuffer2 Size and Offsets
10313     m_historyOutBufferSize = MOS_ALIGN_CEIL(32 * numLcu64, CODECHAL_CACHELINE_SIZE);
10314     m_threadTaskBufferSize = MOS_ALIGN_CEIL(96 * numLcu64, CODECHAL_CACHELINE_SIZE);
10315 
10316     size = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE) + m_historyOutBufferSize + m_threadTaskBufferSize;
10317 
10318     m_historyOutBufferOffset = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE);
10319     m_threadTaskBufferOffset = m_historyOutBufferOffset + m_historyOutBufferSize;
10320 }
10321