1 /*
2 * Copyright (c) 2017-2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     codechal_encode_hevc_g11.cpp
24 //! \brief    HEVC dual-pipe encoder for GEN11.
25 //!
26 
27 #include "codechal_encode_hevc_g11.h"
28 #include "codechal_encode_csc_ds_g11.h"
29 #include "codechal_encode_wp_g11.h"
30 #include "codechal_kernel_header_g11.h"
31 #include "codechal_kernel_hme_g11.h"
32 #ifndef _FULL_OPEN_SOURCE
33 #include "igcodeckrn_g11.h"
34 #endif
35 #include "codeckrnheader.h"
36 #include "mhw_vdbox_hcp_g11_X.h"
37 #include "mhw_vdbox_g11_X.h"
38 #include "mos_util_user_interface.h"
39 
40 //! \cond SKIP_DOXYGEN
41 #define CRECOST(lambda, mode, lcu, slice)   (Map44LutValue((uint32_t)((lambda) * (m_modeBits[(lcu)][(mode)][(slice)]) * (m_modeBitsScale[(mode)][(slice)])), 0x8F))
42 #define RDEBITS62(mode, lcu, slice)         (GetU62ModeBits((float)((m_modeBits[(lcu)][(mode)][(slice)]) * (m_modeBitsScale[(mode)][(slice)]))))
43 //! \endcond
44 
AddHcpPipeModeSelectCmd(MOS_COMMAND_BUFFER * cmdBuffer)45 MOS_STATUS CodechalEncHevcStateG11::AddHcpPipeModeSelectCmd(MOS_COMMAND_BUFFER* cmdBuffer)
46 {
47     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
48 
49     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11 pipeModeSelectParams;
50     SetHcpPipeModeSelectParams(pipeModeSelectParams);
51     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
52 
53     return eStatus;
54 }
55 
SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS & vdboxPipeModeSelectParams)56 void CodechalEncHevcStateG11::SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS& vdboxPipeModeSelectParams)
57 {
58     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11& pipeModeSelectParams =
59         static_cast<MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11&>(vdboxPipeModeSelectParams);
60     pipeModeSelectParams = {};
61     CodechalEncodeHevcBase::SetHcpPipeModeSelectParams(vdboxPipeModeSelectParams);
62 
63     pipeModeSelectParams.pakPiplnStrmoutEnabled = m_pakPiplStrmOutEnable;
64     pipeModeSelectParams.pakFrmLvlStrmoutEnable = (m_brcEnabled && m_numPipe > 1);
65 
66     if (m_numPipe > 1)
67     {
68         // Running in the multiple VDBOX mode
69         if (IsFirstPipe())
70         {
71             pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT;
72         }
73         else if (IsLastPipe())
74         {
75             pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT;
76         }
77         else
78         {
79             pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE;
80         }
81         pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_CODEC_BE;
82     }
83     else
84     {
85         pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
86         pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY;
87     }
88 }
89 
SetHcpPicStateParams(MHW_VDBOX_HEVC_PIC_STATE & picStateParams)90 void CodechalEncHevcStateG11::SetHcpPicStateParams(MHW_VDBOX_HEVC_PIC_STATE& picStateParams)
91 {
92     CODECHAL_ENCODE_FUNCTION_ENTER;
93 
94     CodechalEncodeHevcBase::SetHcpPicStateParams(picStateParams);
95     picStateParams.sseEnabledInVmeEncode = m_sseEnabled;
96 
97 }
98 
UpdateYUY2SurfaceInfo(MOS_SURFACE & surface,bool is10Bit)99 MOS_STATUS CodechalEncHevcStateG11::UpdateYUY2SurfaceInfo(
100     MOS_SURFACE& surface,
101     bool         is10Bit)
102 {
103     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
104 
105     CODECHAL_ENCODE_FUNCTION_ENTER;
106 
107     if (surface.Format == Format_YUY2V)
108     {
109         // surface has been updated
110         return eStatus;
111     }
112 
113     if (surface.Format != Format_YUY2 &&
114         surface.Format != Format_Y210 &&
115         surface.Format != Format_Y216)
116     {
117         eStatus = MOS_STATUS_INVALID_PARAMETER;
118         return eStatus;
119     }
120 
121     if (surface.dwWidth < m_oriFrameWidth / 2 || surface.dwHeight < m_oriFrameHeight * 2)
122     {
123         eStatus = MOS_STATUS_INVALID_PARAMETER;
124         return eStatus;
125     }
126 
127     surface.Format = is10Bit ? Format_Y216V : Format_YUY2V;
128     surface.dwWidth = m_oriFrameWidth;
129     surface.dwHeight = m_oriFrameHeight;
130 
131     surface.YPlaneOffset.iSurfaceOffset = 0;
132     surface.YPlaneOffset.iXOffset = 0;
133     surface.YPlaneOffset.iYOffset = 0;
134 
135     surface.UPlaneOffset.iSurfaceOffset = surface.dwHeight * surface.dwPitch;
136     surface.UPlaneOffset.iXOffset = 0;
137     surface.UPlaneOffset.iYOffset = surface.dwHeight;
138 
139     surface.VPlaneOffset.iSurfaceOffset = surface.dwHeight * surface.dwPitch;
140     surface.VPlaneOffset.iXOffset = 0;
141     surface.VPlaneOffset.iYOffset = surface.dwHeight;
142 
143     return eStatus;
144 }
145 
InitializePicture(const EncoderParams & params)146 MOS_STATUS CodechalEncHevcStateG11::InitializePicture(const EncoderParams& params)
147 {
148     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
149 
150     CODECHAL_ENCODE_FUNCTION_ENTER;
151 
152     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::InitializePicture(params));
153 
154     if (m_resolutionChanged)
155     {
156         ResizeBufferOffset();
157     }
158 
159     m_sseEnabled = false;
160     // only 420 format support SSE output
161     // see TDR in scalability case, disable SSE for now before HW confirm the capability.
162     if (m_sseSupported &&
163         m_hevcSeqParams->chroma_format_idc == HCP_CHROMA_FORMAT_YUV420 &&
164         m_numPipe == 1)
165     {
166         m_sseEnabled = true;
167     }
168     // for HEVC VME, HUC based WP is not supported.
169     m_hevcPicParams->bEnableGPUWeightedPrediction = false;
170 
171     m_pakPiplStrmOutEnable = m_sseEnabled || (m_brcEnabled && m_numPipe > 1);
172 
173     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData(m_tileParams, params.dwBitstreamSize));
174     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileStatistics());
175     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResourcesVariableSize());
176 
177     return eStatus;
178 }
179 
SetPictureStructs()180 MOS_STATUS CodechalEncHevcStateG11::SetPictureStructs()
181 {
182     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
183 
184     CODECHAL_ENCODE_FUNCTION_ENTER;
185 
186     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::SetPictureStructs());
187 
188     m_virtualEngineBbIndex = m_currOriginalPic.FrameIdx;
189 
190     if ((uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_chromaFormat &&
191         (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat)
192     {
193         uint8_t currRefIdx = m_hevcPicParams->CurrReconstructedPic.FrameIdx;
194         UpdateYUY2SurfaceInfo(m_refList[currRefIdx]->sRefBuffer, m_is10BitHevc);
195 
196         if(m_pictureCodingType != I_TYPE)
197         {
198             for (uint32_t i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
199             {
200                 if (!m_picIdx[i].bValid || !m_currUsedRefPic[i])
201                 {
202                     continue;
203                 }
204                 uint8_t picIdx = m_picIdx[i].ucPicIdx;
205                 CODECHAL_ENCODE_ASSERT(picIdx < 127);
206 
207                 UpdateYUY2SurfaceInfo((m_refList[picIdx]->sRefBuffer), m_is10BitHevc);
208             }
209         }
210     }
211 
212     return eStatus;
213 }
214 
SetKernelParams(EncOperation encOperation,MHW_KERNEL_PARAM * kernelParams,uint32_t idx)215 MOS_STATUS CodechalEncHevcStateG11::SetKernelParams(
216     EncOperation                    encOperation,
217     MHW_KERNEL_PARAM*               kernelParams,
218     uint32_t                        idx)
219 {
220     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
221 
222     kernelParams->iThreadCount = m_hwInterface->GetRenderInterface()->GetHwCaps()->dwMaxThreads;
223     kernelParams->iIdCount = 1;
224 
225     uint32_t curbeAlignment = m_hwInterface->GetRenderInterface()->m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment();
226     switch (encOperation)
227     {
228     case ENC_MBENC:
229     {
230         switch (idx)
231         {
232         case MBENC_LCU32_KRNIDX:
233             kernelParams->iBTCount     = MBENC_B_FRAME_END - MBENC_B_FRAME_BEGIN;
234             kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MBENC_LCU32_BTI), (size_t)curbeAlignment);
235             kernelParams->iBlockWidth  = CODECHAL_HEVC_MAX_LCU_SIZE_G9;
236             kernelParams->iBlockHeight = CODECHAL_HEVC_MAX_LCU_SIZE_G9;
237             break;
238 
239         case MBENC_LCU64_KRNIDX:
240             kernelParams->iBTCount     = MBENC_B_FRAME_END - MBENC_B_FRAME_BEGIN;
241             kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MBENC_LCU64_BTI), (size_t)curbeAlignment);
242             kernelParams->iBlockWidth  = CODECHAL_HEVC_MAX_LCU_SIZE_G10;
243             kernelParams->iBlockHeight = CODECHAL_HEVC_MAX_LCU_SIZE_G10;
244             break;
245 
246         default:
247             CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported MBENC mode requested");
248             return MOS_STATUS_INVALID_PARAMETER;
249         }
250     }
251     break;
252 
253     case ENC_BRC:
254     {
255         switch (idx)
256         {
257         case CODECHAL_HEVC_BRC_INIT:
258         case CODECHAL_HEVC_BRC_RESET:
259             kernelParams->iBTCount = BRC_INIT_RESET_END - BRC_INIT_RESET_BEGIN;
260             kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(BRC_INITRESET_CURBE), (size_t)curbeAlignment);
261             kernelParams->iBlockWidth = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
262             kernelParams->iBlockHeight = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
263             break;
264 
265         case CODECHAL_HEVC_BRC_FRAME_UPDATE:
266             kernelParams->iBTCount = BRC_UPDATE_END - BRC_UPDATE_BEGIN;
267             kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(BRCUPDATE_CURBE), (size_t)curbeAlignment);
268             kernelParams->iBlockWidth = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
269             kernelParams->iBlockHeight = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
270             break;
271 
272         case CODECHAL_HEVC_BRC_LCU_UPDATE:
273             kernelParams->iBTCount = BRC_LCU_UPDATE_END - BRC_LCU_UPDATE_BEGIN;
274             kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(BRCUPDATE_CURBE), (size_t)curbeAlignment);
275             kernelParams->iBlockWidth = CODECHAL_HEVC_LCU_BRC_BLOCK_SIZE;
276             kernelParams->iBlockHeight = CODECHAL_HEVC_LCU_BRC_BLOCK_SIZE;
277             break;
278 
279         default:
280             CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested");
281             return MOS_STATUS_INVALID_PARAMETER;
282         }
283     }
284     break;
285 
286     default:
287         CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
288         return MOS_STATUS_INVALID_PARAMETER;
289     }
290 
291     return eStatus;
292 }
293 
SetBindingTable(EncOperation encOperation,PCODECHAL_ENCODE_BINDING_TABLE_GENERIC hevcEncBindingTable,uint32_t idx)294 MOS_STATUS CodechalEncHevcStateG11::SetBindingTable(
295     EncOperation                            encOperation,
296     PCODECHAL_ENCODE_BINDING_TABLE_GENERIC  hevcEncBindingTable,
297     uint32_t                                idx)
298 {
299     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
300 
301     CODECHAL_ENCODE_CHK_NULL_RETURN(hevcEncBindingTable);
302 
303     MOS_ZeroMemory(hevcEncBindingTable, sizeof(*hevcEncBindingTable));
304 
305     switch (encOperation)
306     {
307     case ENC_MBENC:
308     {
309         switch (idx)
310         {
311         case MBENC_LCU32_KRNIDX:
312         case MBENC_LCU64_KRNIDX:
313             hevcEncBindingTable->dwNumBindingTableEntries = MBENC_B_FRAME_END - MBENC_B_FRAME_BEGIN;
314             hevcEncBindingTable->dwBindingTableStartOffset = MBENC_B_FRAME_BEGIN;
315             break;
316 
317         default:
318             CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported MBENC mode requested");
319             return MOS_STATUS_INVALID_PARAMETER;
320         }
321     }
322     break;
323 
324     case ENC_BRC:
325     {
326         switch (idx)
327         {
328         case CODECHAL_HEVC_BRC_INIT:
329             hevcEncBindingTable->dwNumBindingTableEntries = BRC_INIT_RESET_END - BRC_INIT_RESET_BEGIN;
330             hevcEncBindingTable->dwBindingTableStartOffset = BRC_INIT_RESET_BEGIN;
331             break;
332 
333         case CODECHAL_HEVC_BRC_RESET:
334             hevcEncBindingTable->dwNumBindingTableEntries = BRC_INIT_RESET_END - BRC_INIT_RESET_BEGIN;
335             hevcEncBindingTable->dwBindingTableStartOffset = BRC_INIT_RESET_BEGIN;
336             break;
337 
338         case CODECHAL_HEVC_BRC_FRAME_UPDATE:
339             hevcEncBindingTable->dwNumBindingTableEntries = BRC_UPDATE_END - BRC_UPDATE_BEGIN;
340             hevcEncBindingTable->dwBindingTableStartOffset = BRC_UPDATE_BEGIN;
341             break;
342 
343         case CODECHAL_HEVC_BRC_LCU_UPDATE:
344             hevcEncBindingTable->dwNumBindingTableEntries = BRC_LCU_UPDATE_END - BRC_LCU_UPDATE_BEGIN;
345             hevcEncBindingTable->dwBindingTableStartOffset = BRC_LCU_UPDATE_BEGIN;
346             break;
347 
348         default:
349             CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested");
350             return MOS_STATUS_INVALID_PARAMETER;
351         }
352     }
353     break;
354 
355     default:
356         CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
357         return MOS_STATUS_INVALID_PARAMETER;
358     }
359 
360     for (uint32_t i = 0; i < hevcEncBindingTable->dwNumBindingTableEntries; i++)
361     {
362         hevcEncBindingTable->dwBindingTableEntries[i] = i;
363     }
364 
365     return eStatus;
366 }
367 
AllocateEncResources()368 MOS_STATUS CodechalEncHevcStateG11::AllocateEncResources()
369 {
370     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
371 
372     CODECHAL_ENCODE_FUNCTION_ENTER;
373 
374     // Surfaces used by I & B Kernels
375     uint32_t   width = 0, height = 0;
376     uint32_t   size = 0;
377 
378     // Intermediate CU Record surface
379     if (Mos_ResourceIsNull(&m_intermediateCuRecordSurfaceLcu32.OsResource))
380     {
381         width  = m_widthAlignedLcu32;
382         height = m_heightAlignedLcu32 >> 1;
383 
384         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
385             &m_intermediateCuRecordSurfaceLcu32,
386             width,
387             height,
388             "Intermediate CU record surface",
389             MOS_TILE_Y));
390     }
391 
392     // Scratch surface for I-kernel
393     if (Mos_ResourceIsNull(&m_scratchSurface.OsResource))
394     {
395         width  = m_widthAlignedLcu32 >> 3;
396         height = m_heightAlignedLcu32 >> 5;
397 
398         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
399             &m_scratchSurface,
400             width,
401             height,
402             "Scratch surface for I and B Kernels"));
403     }
404 
405     // LCU Level Input Data
406     for(uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_lcuLevelInputDataSurface); i++)
407     {
408         if (Mos_ResourceIsNull(&m_lcuLevelInputDataSurface[i].OsResource))
409         {
410             width  = 16 * ((m_widthAlignedMaxLcu >> 6) << 1);
411             height = ((m_heightAlignedMaxLcu >> 6) << 1);
412 
413             CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
414                 &m_lcuLevelInputDataSurface[i],
415                 width,
416                 height,
417                 "Lcu Level Data Input surface"));
418         }
419     }
420 
421     m_brcInputForEncKernelBuffer = nullptr;
422 
423     //Current Picture Y with Reconstructed boundary pixels
424     if (Mos_ResourceIsNull(&m_currPicWithReconBoundaryPix.OsResource))
425     {
426         width  = m_widthAlignedLcu32;
427         height = m_heightAlignedLcu32;
428 
429         if (m_isMaxLcu64)
430         {
431             width  = m_widthAlignedMaxLcu;
432             height = m_heightAlignedMaxLcu;
433         }
434 
435         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurface(
436             &m_currPicWithReconBoundaryPix,
437             width,
438             height,
439             "Current Picture Y with Reconstructed Boundary Pixels surface"));
440     }
441 
442     //Debug surface
443     for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_debugSurface); i++)
444     {
445         if (Mos_ResourceIsNull(&m_debugSurface[i].sResource))
446         {
447             size = m_debugSurfaceSize;
448 
449             CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
450                 &m_debugSurface[i],
451                 size,
452                 "Kernel debug surface"));
453         }
454     }
455 
456     // Surfaces used by B Kernels
457     // Enc constant table for B LCU32
458     if (Mos_ResourceIsNull(&m_encConstantTableForB.sResource))
459     {
460         size = m_encConstantDataLutSize;
461 
462         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
463             &m_encConstantTableForB,
464             size,
465             "Enc Constant Table surface For LCU32/LCU64"));
466     }
467 
468     if (m_hmeSupported)
469     {
470         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->AllocateResources());
471 
472         // BRC Distortion surface
473         if (Mos_ResourceIsNull(&m_brcBuffers.sMeBrcDistortionBuffer.OsResource))
474         {
475             width = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x << 3), 64);
476             height = MOS_ALIGN_CEIL((m_downscaledHeightInMb4x << 2), 8) << 1;
477 
478             CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
479                 &m_brcBuffers.sMeBrcDistortionBuffer,
480                 width,
481                 height,
482                 "Brc Distortion surface Buffer"));
483         }
484         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateMeResources());
485     }
486 
487     for(uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer1); i++)
488     {
489         if (Mos_ResourceIsNull(&m_encBCombinedBuffer1[i].sResource))
490         {
491             size = sizeof(MBENC_COMBINED_BUFFER1);
492 
493             CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
494                 &m_encBCombinedBuffer1[i],
495                 size,
496                 "Enc B combined buffer1"));
497             // no intialization needed here
498             // driver will write the curbe into this surface in the SetCurbeMbEncKernel
499 
500         }
501     }
502 
503     for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer2); i++)
504     {
505         if (Mos_ResourceIsNull(&m_encBCombinedBuffer2[i].sResource))
506         {
507             uint32_t                numLcu64 = m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 64 / 64;
508             MBENC_COMBINED_BUFFER2  fixedBuf;
509 
510             m_historyOutBufferSize = MOS_ALIGN_CEIL(32 * numLcu64, CODECHAL_CACHELINE_SIZE);
511             m_threadTaskBufferSize = MOS_ALIGN_CEIL(96 * numLcu64, CODECHAL_CACHELINE_SIZE);
512             m_encFrameLevelDistortionBufferSize = ENC_FRAME_LEVEL_DISTORTION_BUFFER;
513             m_encCtuLevelDistortionBufferSize   = MOS_ALIGN_CEIL(16 * numLcu64, CODECHAL_CACHELINE_SIZE);
514             size = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE) + m_historyOutBufferSize + m_threadTaskBufferSize +
515                     m_encFrameLevelDistortionBufferSize + m_encCtuLevelDistortionBufferSize;
516             m_historyOutBufferOffset    = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE);
517             m_threadTaskBufferOffset    = m_historyOutBufferOffset + m_historyOutBufferSize;
518 
519             CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
520                 &m_encBCombinedBuffer2[i],
521                 size,
522                 "Enc B combined buffer2"));
523             // no intialization needed here
524             // DS kernel will initialize the multi-thread task buffer to 0 (part of m_encBCombinedBuffer2)
525         }
526     }
527 
528     return eStatus;
529 }
530 
FreeEncResources()531 MOS_STATUS CodechalEncHevcStateG11::FreeEncResources()
532 {
533     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
534 
535     CODECHAL_ENCODE_FUNCTION_ENTER;
536 
537     MOS_DeleteArray(m_mbEncKernelStates);
538     m_mbEncKernelStates = nullptr;
539     MOS_FreeMemory(m_mbEncKernelBindingTable);
540     m_mbEncKernelBindingTable = nullptr;
541 
542     MOS_DeleteArray(m_brcKernelStates);
543     m_brcKernelStates = nullptr;
544     MOS_FreeMemory(m_brcKernelBindingTable);
545     m_brcKernelBindingTable = nullptr;
546 
547     HmeParams hmeParams;
548     MOS_ZeroMemory(&hmeParams, sizeof(hmeParams));
549     hmeParams.presMvAndDistortionSumSurface = &m_mvAndDistortionSumSurface.sResource;
550     CODECHAL_ENCODE_CHK_STATUS_RETURN(DestroyMEResources(&hmeParams));
551 
552     // Surfaces used by I kernel
553     // Release Intermediate CU Record surface
554     m_osInterface->pfnFreeResource(
555         m_osInterface,
556         &m_intermediateCuRecordSurfaceLcu32.OsResource);
557 
558     // Release Scratch surface for I-kernel
559     m_osInterface->pfnFreeResource(
560         m_osInterface,
561         &m_scratchSurface.OsResource);
562 
563     // Release LCU Level Input Data
564     for(uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_lcuLevelInputDataSurface); i++)
565     {
566         m_osInterface->pfnFreeResource(
567             m_osInterface,
568             &m_lcuLevelInputDataSurface[i].OsResource);
569     }
570 
571     // Release Current Picture Y with Reconstructed boundary pixels surface
572     m_osInterface->pfnFreeResource(
573         m_osInterface,
574         &m_currPicWithReconBoundaryPix.OsResource);
575 
576     // Release Debug surface
577     for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_debugSurface); i++)
578     {
579         m_osInterface->pfnFreeResource(
580             m_osInterface,
581             &m_debugSurface[i].sResource);
582     }
583 
584    // Surfaces used by B Kernels
585    // Enc constant table for B LCU32
586    m_osInterface->pfnFreeResource(
587        m_osInterface,
588        &m_encConstantTableForB.sResource);
589 
590     CODECHAL_ENCODE_CHK_STATUS_RETURN(FreeMeResources());
591 
592     for(uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer1); i++)
593     {
594         m_osInterface->pfnFreeResource(
595             m_osInterface,
596             &m_encBCombinedBuffer1[i].sResource);
597     }
598 
599     for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer2); i++)
600     {
601         m_osInterface->pfnFreeResource(
602             m_osInterface,
603             &m_encBCombinedBuffer2[i].sResource);
604     }
605 
606     if (m_swScoreboard)
607     {
608         MOS_FreeMemory(m_swScoreboard);
609         m_swScoreboard = nullptr;
610     }
611 
612     return eStatus;
613 }
614 
AllocateMeResources()615 MOS_STATUS CodechalEncHevcStateG11::AllocateMeResources()
616 {
617     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
618 
619     CODECHAL_ENCODE_FUNCTION_ENTER;
620 
621     // Mv and Distortion Summation surface
622     if (Mos_ResourceIsNull(&m_mvAndDistortionSumSurface.sResource))
623     {
624         uint32_t size = m_mvdistSummationSurfSize;
625 
626         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
627             &m_mvAndDistortionSumSurface,
628             size,
629             "Mv and Distortion Summation surface"));
630 
631         // Initialize the surface to zero for now till HME is updated to output the data into this surface
632         MOS_LOCK_PARAMS lockFlags;
633         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
634         lockFlags.WriteOnly = 1;
635         uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(
636             m_osInterface,
637             &m_mvAndDistortionSumSurface.sResource,
638             &lockFlags);
639         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
640 
641         MOS_ZeroMemory(data, size);
642 
643         m_osInterface->pfnUnlockResource(
644             m_osInterface,
645             &m_mvAndDistortionSumSurface.sResource);
646     }
647 
648     return eStatus;
649 }
650 
FreeMeResources()651 MOS_STATUS CodechalEncHevcStateG11::FreeMeResources()
652 {
653     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
654 
655     CODECHAL_ENCODE_FUNCTION_ENTER;
656 
657     m_osInterface->pfnFreeResource(
658         m_osInterface,
659         &m_brcBuffers.sMeBrcDistortionBuffer.OsResource);
660 
661     return eStatus;
662 }
663 
AllocatePakResources()664 MOS_STATUS CodechalEncHevcStateG11::AllocatePakResources()
665 {
666     CODECHAL_ENCODE_FUNCTION_ENTER;
667 
668     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
669 
670     uint32_t mvt_size = MOS_ALIGN_CEIL(((m_frameWidth + 63) >> 6)*((m_frameHeight + 15) >> 4), 2) * CODECHAL_CACHELINE_SIZE;
671     uint32_t mvtb_size = MOS_ALIGN_CEIL(((m_frameWidth + 31) >> 5)*((m_frameHeight + 31) >> 5), 2) * CODECHAL_CACHELINE_SIZE;
672     m_sizeOfMvTemporalBuffer = MOS_MAX(mvt_size, mvtb_size);
673 
674     const uint32_t picWidthInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_LCU_SIZE);        //assume smallest LCU to get max width
675     const uint32_t picHeightInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameHeight, CODECHAL_HEVC_MIN_LCU_SIZE);      //assume smallest LCU to get max height
676 
677     MHW_VDBOX_HCP_BUFFER_SIZE_PARAMS hcpBufSizeParam;
678     MOS_ZeroMemory(&hcpBufSizeParam, sizeof(hcpBufSizeParam));
679     hcpBufSizeParam.ucMaxBitDepth = m_bitDepth;
680     hcpBufSizeParam.ucChromaFormat = m_chromaFormat;
681     // We should move the buffer allocation to picture level if the size is dependent on LCU size
682     hcpBufSizeParam.dwCtbLog2SizeY = 6; //assume Max LCU size
683     hcpBufSizeParam.dwPicWidth = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE);
684     hcpBufSizeParam.dwPicHeight = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE);
685 
686     MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
687     MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
688     allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
689     allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
690     allocParamsForBufferLinear.Format = Format_Buffer;
691 
692     // Deblocking Filter Row Store Scratch data surface
693     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
694         MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_LINE,
695         &hcpBufSizeParam);
696 
697     if (eStatus != MOS_STATUS_SUCCESS)
698     {
699         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Row Store Scratch Buffer.");
700         return eStatus;
701     }
702 
703     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
704     allocParamsForBufferLinear.pBufName = "DeblockingScratchBuffer";
705 
706     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
707         m_osInterface,
708         &allocParamsForBufferLinear,
709         &m_resDeblockingFilterRowStoreScratchBuffer);
710 
711     if (eStatus != MOS_STATUS_SUCCESS)
712     {
713         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Row Store Scratch Buffer.");
714         return eStatus;
715     }
716 
717     // Deblocking Filter Tile Row Store Scratch data surface
718     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
719         MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_LINE,
720         &hcpBufSizeParam);
721 
722     if (eStatus != MOS_STATUS_SUCCESS)
723     {
724         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Row Store Scratch Buffer.");
725         return eStatus;
726     }
727 
728     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
729     allocParamsForBufferLinear.pBufName = "DeblockingTileRowScratchBuffer";
730 
731     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
732         m_osInterface,
733         &allocParamsForBufferLinear,
734         &m_resDeblockingFilterTileRowStoreScratchBuffer);
735 
736     if (eStatus != MOS_STATUS_SUCCESS)
737     {
738         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Row Store Scratch Buffer.");
739         return eStatus;
740     }
741 
742     // Deblocking Filter Column Row Store Scratch data surface
743     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
744         MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_COL,
745         &hcpBufSizeParam);
746 
747     if (eStatus != MOS_STATUS_SUCCESS)
748     {
749         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Column Store Scratch Buffer.");
750         return eStatus;
751     }
752 
753     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
754     allocParamsForBufferLinear.pBufName = "DeblockingColumnScratchBuffer";
755 
756     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
757         m_osInterface,
758         &allocParamsForBufferLinear,
759         &m_resDeblockingFilterColumnRowStoreScratchBuffer);
760 
761     if (eStatus != MOS_STATUS_SUCCESS)
762     {
763         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Column Row Store Scratch Buffer.");
764         return eStatus;
765     }
766 
767     // Metadata Line buffer
768     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
769         MHW_VDBOX_HCP_INTERNAL_BUFFER_META_LINE,
770         &hcpBufSizeParam);
771 
772     if (eStatus != MOS_STATUS_SUCCESS)
773     {
774         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Line Buffer.");
775         return eStatus;
776     }
777 
778     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
779     allocParamsForBufferLinear.pBufName = "MetadataLineBuffer";
780 
781     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
782         m_osInterface,
783         &allocParamsForBufferLinear,
784         &m_resMetadataLineBuffer);
785 
786     if (eStatus != MOS_STATUS_SUCCESS)
787     {
788         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Line Buffer.");
789         return eStatus;
790     }
791 
792     // Metadata Tile Line buffer
793     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
794         MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_LINE,
795         &hcpBufSizeParam);
796 
797     if (eStatus != MOS_STATUS_SUCCESS)
798     {
799         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Line Buffer.");
800         return eStatus;
801     }
802 
803     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
804     allocParamsForBufferLinear.pBufName = "MetadataTileLineBuffer";
805 
806     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
807         m_osInterface,
808         &allocParamsForBufferLinear,
809         &m_resMetadataTileLineBuffer);
810 
811     if (eStatus != MOS_STATUS_SUCCESS)
812     {
813         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Line Buffer.");
814         return eStatus;
815     }
816 
817     // Metadata Tile Column buffer
818     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
819         MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_COL,
820         &hcpBufSizeParam);
821 
822     if (eStatus != MOS_STATUS_SUCCESS)
823     {
824         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Column Buffer.");
825         return eStatus;
826     }
827 
828     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
829     allocParamsForBufferLinear.pBufName = "MetadataTileColumnBuffer";
830 
831     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
832         m_osInterface,
833         &allocParamsForBufferLinear,
834         &m_resMetadataTileColumnBuffer);
835 
836     if (eStatus != MOS_STATUS_SUCCESS)
837     {
838         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Column Buffer.");
839         return eStatus;
840     }
841 
842     // SAO Line buffer
843     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
844         MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_LINE,
845         &hcpBufSizeParam);
846 
847     if (eStatus != MOS_STATUS_SUCCESS)
848     {
849         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Line Buffer.");
850         return eStatus;
851     }
852 
853     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
854     allocParamsForBufferLinear.pBufName = "SaoLineBuffer";
855 
856     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
857         m_osInterface,
858         &allocParamsForBufferLinear,
859         &m_resSaoLineBuffer);
860 
861     if (eStatus != MOS_STATUS_SUCCESS)
862     {
863         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Line Buffer.");
864         return eStatus;
865     }
866 
867     // SAO Tile Line buffer
868     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
869         MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_LINE,
870         &hcpBufSizeParam);
871 
872     if (eStatus != MOS_STATUS_SUCCESS)
873     {
874         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Line Buffer.");
875         return eStatus;
876     }
877 
878     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
879     allocParamsForBufferLinear.pBufName = "SaoTileLineBuffer";
880 
881     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
882         m_osInterface,
883         &allocParamsForBufferLinear,
884         &m_resSaoTileLineBuffer);
885 
886     if (eStatus != MOS_STATUS_SUCCESS)
887     {
888         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Line Buffer.");
889         return eStatus;
890     }
891 
892     // SAO Tile Column buffer
893     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
894         MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_COL,
895         &hcpBufSizeParam);
896 
897     if (eStatus != MOS_STATUS_SUCCESS)
898     {
899         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Column Buffer.");
900         return eStatus;
901     }
902 
903     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
904     allocParamsForBufferLinear.pBufName = "SaoTileColumnBuffer";
905 
906     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
907         m_osInterface,
908         &allocParamsForBufferLinear,
909         &m_resSaoTileColumnBuffer);
910 
911     if (eStatus != MOS_STATUS_SUCCESS)
912     {
913         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Column Buffer.");
914         return eStatus;
915     }
916 
917     // Lcu ILDB StreamOut buffer
918     // This is not enabled with HCP_PIPE_MODE_SELECT yet, placeholder here
919     allocParamsForBufferLinear.dwBytes = CODECHAL_CACHELINE_SIZE;
920     allocParamsForBufferLinear.pBufName = "LcuILDBStreamOutBuffer";
921 
922     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
923         m_osInterface,
924         &allocParamsForBufferLinear,
925         &m_resLcuIldbStreamOutBuffer);
926 
927     if (eStatus != MOS_STATUS_SUCCESS)
928     {
929         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU ILDB StreamOut Buffer.");
930         return eStatus;
931     }
932 
933     // Lcu Base Address buffer
934     // HEVC Encoder Mode: Slice size is written to this buffer when slice size conformance is enabled.
935     // 1 CL (= 16 DWs = 64 bytes) per slice * Maximum number of slices in a frame.
936     // Align to page for HUC requirement
937     uint32_t maxLcu = picWidthInMinLCU * picHeightInMinLCU;
938     allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(maxLcu * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
939     allocParamsForBufferLinear.pBufName = "LcuBaseAddressBuffer";
940 
941     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
942         m_osInterface,
943         &allocParamsForBufferLinear,
944         &m_resLcuBaseAddressBuffer);
945 
946     if (eStatus != MOS_STATUS_SUCCESS)
947     {
948         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU Base Address Buffer.");
949         return eStatus;
950     }
951     // SAO StreamOut buffer
952     // size = MOS_ALIGN_CEIL(picWidthInMinLCU, 4) * 16
953     uint32_t size = MOS_ALIGN_CEIL(picWidthInMinLCU, 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU;
954     //extra added size to cover tile enabled case, per tile width aligned to 4.  20: max tile column No.
955     size += 3 * 20 * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU;
956     allocParamsForBufferLinear.dwBytes = size;
957     allocParamsForBufferLinear.pBufName = "SaoStreamOutBuffer";
958 
959     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
960         m_osInterface,
961         &allocParamsForBufferLinear,
962         &m_resSaoStreamOutBuffer);
963 
964     if (eStatus != MOS_STATUS_SUCCESS)
965     {
966         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO StreamOut Buffer.");
967         return eStatus;
968     }
969 
970     uint32_t   maxTileNumber = (MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE) / CODECHAL_HEVC_MIN_TILE_SIZE) *
971         (MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_HEVC_MIN_TILE_SIZE) / CODECHAL_HEVC_MIN_TILE_SIZE);
972 
973     MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
974     allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
975     allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
976     allocParamsForBufferLinear.Format = Format_Buffer;
977 
978     // Allocate Frame Statistics Streamout Data Destination Buffer. DW98-100 in HCP pipe buffer address command
979     allocParamsForBufferLinear.dwBytes = m_sizeOfHcpPakFrameStats * maxTileNumber;  //Each tile has 8 cache size bytes of data
980     allocParamsForBufferLinear.pBufName = "FrameStatStreamOutBuffer";
981 
982     CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
983         m_osInterface,
984         &allocParamsForBufferLinear,
985         &m_resFrameStatStreamOutBuffer));
986 
987     // PAK CU Level Streamout Data:   DW57-59 in HCP pipe buffer address command
988     // One CU has 16-byte. But, each tile needs to be aliged to the cache line
989     uint32_t frameWidthInCus   = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MIN_CU_SIZE);
990     uint32_t frameHeightInCus  = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MIN_CU_SIZE);
991     size = MOS_ALIGN_CEIL(frameWidthInCus * frameHeightInCus * 16, CODECHAL_CACHELINE_SIZE);
992     allocParamsForBufferLinear.dwBytes = size;
993     allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
994 
995     CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
996         m_osInterface,
997         &allocParamsForBufferLinear,
998         &m_resPakcuLevelStreamoutData.sResource));
999     m_resPakcuLevelStreamoutData.dwSize = size;
1000     CODECHAL_ENCODE_VERBOSEMESSAGE("first allocate cu steam out buffer, size=0x%x.\n", size);
1001 
1002     // Allocate SSE Source Pixel Row Store Buffer. Implementation for each tile column is shown as below:
1003     //   tileWidthInLCU = ((tileWidthInLCU+3) * BYTES_PER_CACHE_LINE)*(4+4) ; tileWidthInLCU <<= 1; // double the size as RTL treats it as 10 bit data
1004     // Here, we consider each LCU column is one tile column.
1005     m_sizeOfSseSrcPixelRowStoreBufferPerLcu = (CODECHAL_CACHELINE_SIZE * (4 + 4)) << 1;  //size per LCU plus 10-bit
1006     size = m_sizeOfSseSrcPixelRowStoreBufferPerLcu * (picWidthInMinLCU + 3);  // already aligned to cacheline size
1007     allocParamsForBufferLinear.dwBytes  = size;
1008     allocParamsForBufferLinear.pBufName = "SseSrcPixelRowStoreBuffer";
1009 
1010     CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
1011         m_osInterface,
1012         &allocParamsForBufferLinear,
1013         &m_resSseSrcPixelRowStoreBuffer));
1014 
1015     //HCP scalability Sync buffer
1016     size = CODECHAL_HEVC_MAX_NUM_HCP_PIPE * CODECHAL_CACHELINE_SIZE;
1017     allocParamsForBufferLinear.dwBytes  = size;
1018     allocParamsForBufferLinear.pBufName = "GEN11 Hcp scalability Sync buffer ";
1019 
1020     CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
1021         m_osInterface,
1022         &allocParamsForBufferLinear,
1023         &m_resHcpScalabilitySyncBuffer.sResource));
1024     m_resHcpScalabilitySyncBuffer.dwSize = size;
1025 
1026     // create the tile coding state parameters
1027     m_tileParams = (PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11)MOS_AllocAndZeroMemory
1028     (sizeof(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G11)* maxTileNumber);
1029 
1030     if(m_enableHWSemaphore)
1031     {
1032         allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1033         allocParamsForBufferLinear.pBufName = "SemaphoreMemory";
1034 
1035         MOS_LOCK_PARAMS lockFlagsWriteOnly;
1036         MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1037         lockFlagsWriteOnly.WriteOnly = 1;
1038 
1039         for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resBrcSemaphoreMem); i++)
1040         {
1041             eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1042                 m_osInterface,
1043                 &allocParamsForBufferLinear,
1044                 &m_resBrcSemaphoreMem[i].sResource);
1045             m_resBrcSemaphoreMem[i].dwSize = allocParamsForBufferLinear.dwBytes;
1046             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Cannot create BRC HW Semaphore Memory.");
1047 
1048             uint32_t *data = (uint32_t *)m_osInterface->pfnLockResource(
1049                 m_osInterface,
1050                 &m_resBrcSemaphoreMem[i].sResource,
1051                 &lockFlagsWriteOnly);
1052 
1053             CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1054 
1055             *data = 1;
1056 
1057             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1058                 m_osInterface,
1059                 &m_resBrcSemaphoreMem[i].sResource));
1060         }
1061 
1062         eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1063             m_osInterface,
1064             &allocParamsForBufferLinear,
1065             &m_resPipeStartSemaMem);
1066         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Cannot create Scalability pipe start sync HW semaphore.");
1067 
1068         uint32_t *data = (uint32_t *)m_osInterface->pfnLockResource(
1069             m_osInterface,
1070             &m_resPipeStartSemaMem,
1071             &lockFlagsWriteOnly);
1072 
1073         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1074         *data = 0;
1075         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1076             m_osInterface,
1077             &m_resPipeStartSemaMem));
1078 
1079         eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1080             m_osInterface,
1081             &allocParamsForBufferLinear,
1082             &m_resPipeCompleteSemaMem);
1083         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Cannot create Scalability pipe completion sync HW semaphore.");
1084 
1085         data = (uint32_t *)m_osInterface->pfnLockResource(
1086             m_osInterface,
1087             &m_resPipeCompleteSemaMem,
1088             &lockFlagsWriteOnly);
1089 
1090         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1091         *data = 0;
1092         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1093             m_osInterface,
1094             &m_resPipeCompleteSemaMem));
1095 
1096     }
1097 
1098     if (m_hucPakStitchEnabled)
1099     {
1100         if (Mos_ResourceIsNull(&m_resHucStatus2Buffer))
1101         {
1102             // HUC STATUS 2 Buffer for HuC status check in COND_BB_END
1103             allocParamsForBufferLinear.dwBytes = sizeof(uint64_t);
1104             allocParamsForBufferLinear.pBufName = "HUC STATUS 2 Buffer";
1105 
1106             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1107                 m_osInterface->pfnAllocateResource(
1108                     m_osInterface,
1109                     &allocParamsForBufferLinear,
1110                     &m_resHucStatus2Buffer),
1111                 "%s: Failed to allocate HUC STATUS 2 Buffer\n", __FUNCTION__);
1112         }
1113 
1114         uint8_t* data;
1115 
1116         // Pak stitch DMEM
1117         allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemEncG11), CODECHAL_CACHELINE_SIZE);
1118         allocParamsForBufferLinear.pBufName = "PAK Stitch Dmem Buffer";
1119         auto numOfPasses = CODECHAL_DP_MAX_NUM_BRC_PASSES;
1120         for (auto j = 0; j < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; j++)
1121         {
1122             for (auto i = 0; i < numOfPasses; i++)
1123             {
1124                 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1125                     m_osInterface->pfnAllocateResource(
1126                         m_osInterface,
1127                         &allocParamsForBufferLinear,
1128                         &m_resHucPakStitchDmemBuffer[j][i]),
1129                     "Failed to allocate PAK Stitch Dmem Buffer.");
1130 
1131             }
1132         }
1133         // BRC Data Buffer
1134         allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
1135         allocParamsForBufferLinear.pBufName = "BRC Data Buffer";
1136 
1137         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1138             m_osInterface->pfnAllocateResource(
1139                 m_osInterface,
1140                 &allocParamsForBufferLinear,
1141                 &m_resBrcDataBuffer),
1142             "Failed to allocate BRC Data Buffer Buffer.");
1143 
1144         MOS_LOCK_PARAMS lockFlags;
1145         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1146         lockFlags.WriteOnly = 1;
1147 
1148         data = (uint8_t*)m_osInterface->pfnLockResource(
1149             m_osInterface,
1150             &m_resBrcDataBuffer,
1151             &lockFlags);
1152 
1153         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1154 
1155         MOS_ZeroMemory(
1156             data,
1157             allocParamsForBufferLinear.dwBytes);
1158 
1159         m_osInterface->pfnUnlockResource(m_osInterface, &m_resBrcDataBuffer);
1160 
1161     for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
1162     {
1163         for (auto j = 0; j < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; j++)
1164         {
1165             // HuC stitching Data buffer
1166             allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE);
1167             allocParamsForBufferLinear.pBufName = "HEVC HuC Stitch Data Buffer";
1168             CODECHAL_ENCODE_CHK_STATUS_RETURN(
1169                 m_osInterface->pfnAllocateResource(
1170                     m_osInterface,
1171                     &allocParamsForBufferLinear,
1172                     &m_resHucStitchDataBuffer[i][j]));
1173 
1174             MOS_LOCK_PARAMS lockFlagsWriteOnly;
1175             MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1176             lockFlagsWriteOnly.WriteOnly = 1;
1177 
1178             uint8_t* pData = (uint8_t*)m_osInterface->pfnLockResource(
1179                 m_osInterface,
1180                 &m_resHucStitchDataBuffer[i][j],
1181                 &lockFlagsWriteOnly);
1182             CODECHAL_ENCODE_CHK_NULL_RETURN(pData);
1183             MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes);
1184             m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[i][j]);
1185         }
1186     }
1187 
1188     //Second level BB for huc stitching cmd
1189     MOS_ZeroMemory(&m_HucStitchCmdBatchBuffer, sizeof(m_HucStitchCmdBatchBuffer));
1190     m_HucStitchCmdBatchBuffer.bSecondLevel = true;
1191     CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
1192         m_osInterface,
1193         &m_HucStitchCmdBatchBuffer,
1194         nullptr,
1195         m_hwInterface->m_HucStitchCmdBatchBufferSize));
1196     }
1197 
1198     if (m_numDelay)
1199     {
1200         allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1201         allocParamsForBufferLinear.pBufName = "DelayMinusMemory";
1202 
1203         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1204             m_osInterface,
1205             &allocParamsForBufferLinear,
1206             &m_resDelayMinus), "Failed to allocate delay minus memory.");
1207 
1208         uint8_t* data;
1209         MOS_LOCK_PARAMS lockFlags;
1210         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1211         lockFlags.WriteOnly = 1;
1212         data = (uint8_t*)m_osInterface->pfnLockResource(
1213             m_osInterface,
1214             &m_resDelayMinus,
1215             &lockFlags);
1216 
1217         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1218 
1219         MOS_ZeroMemory(data, sizeof(uint32_t));
1220 
1221         m_osInterface->pfnUnlockResource(m_osInterface, &m_resDelayMinus);
1222     }
1223 
1224 
1225     return eStatus;
1226 }
1227 
FreePakResources()1228 MOS_STATUS CodechalEncHevcStateG11::FreePakResources()
1229 {
1230     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
1231 
1232     CODECHAL_ENCODE_FUNCTION_ENTER;
1233 
1234     // Release Frame Statistics Streamout Data Destination Buffer
1235     m_osInterface->pfnFreeResource(
1236         m_osInterface,
1237         &m_resFrameStatStreamOutBuffer);
1238 
1239     // PAK CU Level Stream out buffer
1240     m_osInterface->pfnFreeResource(
1241         m_osInterface,
1242         &m_resPakcuLevelStreamoutData.sResource);
1243 
1244     // Release SSE Source Pixel Row Store Buffer
1245     m_osInterface->pfnFreeResource(
1246         m_osInterface,
1247         &m_resSseSrcPixelRowStoreBuffer);
1248 
1249     // Release Hcp scalability Sync buffer
1250     m_osInterface->pfnFreeResource(
1251         m_osInterface,
1252         &m_resHcpScalabilitySyncBuffer.sResource);
1253 
1254     m_osInterface->pfnFreeResource(
1255         m_osInterface,
1256         &m_resPakcuLevelStreamoutData.sResource);
1257 
1258     m_osInterface->pfnFreeResource(
1259         m_osInterface,
1260         &m_resPakSliceLevelStreamoutData.sResource);
1261 
1262     for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resTileBasedStatisticsBuffer); i++)
1263     {
1264         m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[i].sResource);
1265     }
1266     for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileRecordBuffer); i++)
1267     {
1268         m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[i].sResource);
1269     }
1270     m_osInterface->pfnFreeResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource);
1271 
1272     MOS_FreeMemory(m_tileParams);
1273 
1274     if (m_useVirtualEngine)
1275     {
1276         for(auto i = 0; i < CODECHAL_NUM_UNCOMPRESSED_SURFACE_HEVC; i++)
1277         {
1278             for(auto j = 0; j < CODECHAL_HEVC_MAX_NUM_HCP_PIPE; j++)
1279             {
1280                 for (auto k = 0; k < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; k++)
1281                 {
1282                     PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[i][j][k];
1283                     if (cmdBuffer->pCmdBase)
1284                     {
1285                         m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
1286                     }
1287                     m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
1288                 }
1289             }
1290         }
1291     }
1292 
1293     for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_refSync); i++)
1294     {
1295         auto sync = &m_refSync[i];
1296 
1297         if (!Mos_ResourceIsNull(&sync->resSyncObject))
1298         {
1299             // if this object has been signaled before, we need to wait to ensure singal-wait is in pair.
1300             if (sync->uiSemaphoreObjCount || sync->bInUsed)
1301             {
1302                 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
1303                 syncParams.GpuContext = m_renderContext;
1304                 syncParams.presSyncResource = &sync->resSyncObject;
1305                 syncParams.uiSemaphoreCount = sync->uiSemaphoreObjCount;
1306                 m_osInterface->pfnEngineWait(m_osInterface, &syncParams);
1307             }
1308         }
1309         m_osInterface->pfnFreeResource(m_osInterface, &sync->resSemaphoreMem.sResource);
1310     }
1311 
1312     for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resBrcSemaphoreMem); i++)
1313     {
1314         m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcSemaphoreMem[i].sResource);
1315     }
1316     m_osInterface->pfnFreeResource(m_osInterface, &m_resPipeStartSemaMem);
1317     m_osInterface->pfnFreeResource(m_osInterface, &m_resPipeCompleteSemaMem);
1318 
1319     if (m_hucPakStitchEnabled)
1320     {
1321         m_osInterface->pfnFreeResource(m_osInterface, &m_resHucStatus2Buffer);
1322         m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcDataBuffer);
1323 
1324         for (int i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
1325         {
1326             for (int j = 0; j < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; j++)
1327             {
1328                 m_osInterface->pfnFreeResource(m_osInterface, &m_resHucPakStitchDmemBuffer[i][j]);
1329                 m_osInterface->pfnFreeResource(m_osInterface, &m_resHucStitchDataBuffer[i][j]);
1330             }
1331         }
1332         Mhw_FreeBb(m_osInterface, &m_HucStitchCmdBatchBuffer, nullptr);
1333     }
1334 
1335     if (m_numDelay)
1336     {
1337         m_osInterface->pfnFreeResource(m_osInterface, &m_resDelayMinus);
1338     }
1339 
1340     return CodechalEncHevcState::FreePakResources();
1341 }
1342 
GetKernelHeaderAndSize(void * binary,EncOperation operation,uint32_t krnStateIdx,void * krnHeader,uint32_t * krnSize)1343 MOS_STATUS CodechalEncHevcStateG11::GetKernelHeaderAndSize(
1344     void                           *binary,
1345     EncOperation                   operation,
1346     uint32_t                       krnStateIdx,
1347     void                           *krnHeader,
1348     uint32_t                       *krnSize)
1349 {
1350     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1351 
1352     CODECHAL_ENCODE_FUNCTION_ENTER;
1353 
1354     CODECHAL_ENCODE_CHK_NULL_RETURN(binary);
1355     CODECHAL_ENCODE_CHK_NULL_RETURN(krnHeader);
1356     CODECHAL_ENCODE_CHK_NULL_RETURN(krnSize);
1357 
1358     PCODECHAL_HEVC_KERNEL_HEADER kernelHeaderTable = (PCODECHAL_HEVC_KERNEL_HEADER)binary;
1359 
1360     PCODECHAL_KERNEL_HEADER currKrnHeader = nullptr;
1361     switch (operation)
1362     {
1363     case ENC_MBENC:
1364     {
1365         switch (krnStateIdx)
1366         {
1367         case MBENC_LCU32_KRNIDX:
1368             currKrnHeader = &kernelHeaderTable->HEVC_Enc_LCU32;
1369             break;
1370 
1371         case MBENC_LCU64_KRNIDX:
1372             currKrnHeader = &kernelHeaderTable->HEVC_Enc_LCU64;
1373             break;
1374 
1375         default:
1376             CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported MBENC mode requested");
1377             return MOS_STATUS_INVALID_PARAMETER;
1378         }
1379     }
1380     break;
1381 
1382     case ENC_BRC:
1383     {
1384         switch (krnStateIdx)
1385         {
1386         case CODECHAL_HEVC_BRC_INIT:
1387             currKrnHeader = &kernelHeaderTable->HEVC_brc_init;
1388             break;
1389 
1390         case CODECHAL_HEVC_BRC_RESET:
1391             currKrnHeader = &kernelHeaderTable->HEVC_brc_reset;
1392             break;
1393 
1394         case CODECHAL_HEVC_BRC_FRAME_UPDATE:
1395             currKrnHeader = &kernelHeaderTable->HEVC_brc_update;
1396             break;
1397 
1398         case CODECHAL_HEVC_BRC_LCU_UPDATE:
1399             currKrnHeader = &kernelHeaderTable->HEVC_brc_lcuqp;
1400             break;
1401 
1402         default:
1403             CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested");
1404             return MOS_STATUS_INVALID_PARAMETER;
1405         }
1406     }
1407     break;
1408 
1409     default:
1410         CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
1411         return MOS_STATUS_INVALID_PARAMETER;
1412     }
1413 
1414     *((PCODECHAL_KERNEL_HEADER)krnHeader) = *currKrnHeader;
1415 
1416     PCODECHAL_KERNEL_HEADER nextKrnHeader = (currKrnHeader + 1);
1417     PCODECHAL_KERNEL_HEADER invalidEntry = &(kernelHeaderTable->HEVC_brc_lcuqp) + 1;
1418     uint32_t nextKrnOffset = *krnSize;
1419     if (nextKrnHeader < invalidEntry)
1420     {
1421         nextKrnOffset = nextKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT;
1422     }
1423     *krnSize = nextKrnOffset - (currKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1424 
1425     return eStatus;
1426 }
1427 
InitKernelStateMbEnc()1428 MOS_STATUS CodechalEncHevcStateG11::InitKernelStateMbEnc()
1429 {
1430     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1431 
1432     CODECHAL_ENCODE_FUNCTION_ENTER;
1433 
1434     PMHW_STATE_HEAP_INTERFACE stateHeapInterface = m_hwInterface->GetRenderInterface()->m_stateHeapInterface;
1435     m_numMbEncEncKrnStates                       = MBENC_NUM_KRN;
1436 
1437     m_mbEncKernelStates = MOS_NewArray(MHW_KERNEL_STATE, m_numMbEncEncKrnStates);
1438     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
1439 
1440     m_mbEncKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory(
1441         sizeof(GenericBindingTable) *
1442         m_numMbEncEncKrnStates);
1443     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelBindingTable);
1444 
1445     PMHW_KERNEL_STATE kernelStatePtr = m_mbEncKernelStates;
1446 
1447     for (uint32_t krnStateIdx = 0; krnStateIdx < m_numMbEncEncKrnStates; krnStateIdx++)
1448     {
1449         auto kernelSize = m_combinedKernelSize;
1450         CODECHAL_KERNEL_HEADER currKrnHeader;
1451         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetKernelHeaderAndSize(
1452             m_kernelBinary,
1453             ENC_MBENC,
1454             krnStateIdx,
1455             &currKrnHeader,
1456             (uint32_t*)&kernelSize));
1457 
1458         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
1459             ENC_MBENC,
1460             &kernelStatePtr->KernelParams,
1461             krnStateIdx));
1462 
1463         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
1464             ENC_MBENC,
1465             &m_mbEncKernelBindingTable[krnStateIdx],
1466             krnStateIdx));
1467 
1468         kernelStatePtr->dwCurbeOffset = stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
1469         kernelStatePtr->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1470         kernelStatePtr->KernelParams.iSize = kernelSize;
1471 
1472         CODECHAL_ENCODE_CHK_STATUS_RETURN(stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
1473             stateHeapInterface,
1474             kernelStatePtr->KernelParams.iBTCount,
1475             &kernelStatePtr->dwSshSize,
1476             &kernelStatePtr->dwBindingTableSize));
1477 
1478         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(stateHeapInterface, kernelStatePtr));
1479 
1480         kernelStatePtr++;
1481     }
1482 
1483     return eStatus;
1484 }
1485 
InitKernelStateBrc()1486 MOS_STATUS CodechalEncHevcStateG11::InitKernelStateBrc()
1487 {
1488     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1489 
1490     CODECHAL_ENCODE_FUNCTION_ENTER;
1491 
1492     PMHW_STATE_HEAP_INTERFACE stateHeapInterface = m_hwInterface->GetRenderInterface()->m_stateHeapInterface;
1493     m_numBrcKrnStates                            = CODECHAL_HEVC_BRC_NUM;
1494 
1495     m_brcKernelStates = MOS_NewArray(MHW_KERNEL_STATE, m_numBrcKrnStates);
1496     CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
1497 
1498     m_brcKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory(
1499         sizeof(GenericBindingTable) *
1500         m_numBrcKrnStates);
1501 
1502     PMHW_KERNEL_STATE kernelStatePtr = m_brcKernelStates;
1503 
1504     kernelStatePtr++; // Skipping BRC_COARSE_INTRA as it not in Gen11
1505 
1506     // KrnStateIdx initialization starts at 1 as Gen11 does not support BRC_COARSE_INTRA kernel in BRC. It is part of the Combined Common Kernel
1507     for (uint32_t krnStateIdx = 1; krnStateIdx < m_numBrcKrnStates; krnStateIdx++)
1508     {
1509         auto kernelSize = m_combinedKernelSize;
1510         CODECHAL_KERNEL_HEADER  currKrnHeader;
1511 
1512         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetKernelHeaderAndSize(
1513             m_kernelBinary,
1514             ENC_BRC,
1515             krnStateIdx,
1516             &currKrnHeader,
1517             (uint32_t*)&kernelSize));
1518 
1519         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
1520             ENC_BRC,
1521             &kernelStatePtr->KernelParams,
1522             krnStateIdx));
1523 
1524         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
1525             ENC_BRC,
1526             &m_brcKernelBindingTable[krnStateIdx],
1527             krnStateIdx));
1528 
1529         kernelStatePtr->dwCurbeOffset = stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
1530         kernelStatePtr->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1531         kernelStatePtr->KernelParams.iSize = kernelSize;
1532 
1533         CODECHAL_ENCODE_CHK_STATUS_RETURN(stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
1534             stateHeapInterface,
1535             kernelStatePtr->KernelParams.iBTCount,
1536             &kernelStatePtr->dwSshSize,
1537             &kernelStatePtr->dwBindingTableSize));
1538 
1539         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(stateHeapInterface, kernelStatePtr));
1540 
1541         kernelStatePtr++;
1542     }
1543 
1544     return eStatus;
1545 }
1546 
GetMaxBtCount()1547 uint32_t CodechalEncHevcStateG11::GetMaxBtCount()
1548 {
1549 
1550     uint16_t btIdxAlignment = m_hwInterface->GetRenderInterface()->m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
1551 
1552     // BRC Init kernel
1553     uint32_t btCountPhase1 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_INIT].KernelParams.iBTCount, btIdxAlignment);
1554 
1555     // SwScoreboard kernel
1556     uint32_t btCountPhase2 = MOS_ALIGN_CEIL(m_swScoreboardState->GetBTCount(), btIdxAlignment);
1557 
1558     // Csc+Ds+Conversion kernel
1559     btCountPhase2 += MOS_ALIGN_CEIL(m_cscDsState->GetBTCount(), btIdxAlignment);
1560 
1561     // Intra Distortion kernel
1562     if (m_intraDistKernel)
1563     {
1564         btCountPhase2 += MOS_ALIGN_CEIL(m_intraDistKernel->GetBTCount(), btIdxAlignment);
1565     }
1566     // HME 4x, 16x, 32x kernel
1567     if (m_hmeKernel)
1568     {
1569         btCountPhase2 += (MOS_ALIGN_CEIL(m_hmeKernel->GetBTCount(), btIdxAlignment)) * 3;
1570     }
1571 
1572     // Weighted prediction kernel
1573     btCountPhase2 += MOS_ALIGN_CEIL(m_wpState->GetBTCount(), btIdxAlignment);
1574 
1575     // LCU32 kernel, BRC Frame Update kernel, BRC LCU Update kernel
1576     uint32_t btCountPhase3 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
1577                              MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
1578                              MOS_ALIGN_CEIL(m_mbEncKernelStates[MBENC_LCU32_KRNIDX].KernelParams.iBTCount, btIdxAlignment);
1579 
1580     // LCU64 kernel, BRC Frame Update kernel, BRC LCU Update kernel
1581     uint32_t btCountPhase4 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
1582                              MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
1583                              MOS_ALIGN_CEIL(m_mbEncKernelStates[MBENC_LCU64_KRNIDX].KernelParams.iBTCount, btIdxAlignment);
1584 
1585     uint32_t maxBtCount   = MOS_MAX(btCountPhase1, btCountPhase2);
1586     maxBtCount = MOS_MAX(maxBtCount, btCountPhase3);
1587     maxBtCount = MOS_MAX(maxBtCount, btCountPhase4);
1588 
1589     return maxBtCount;
1590 }
1591 
CalcScaledDimensions()1592 MOS_STATUS CodechalEncHevcStateG11::CalcScaledDimensions()
1593 {
1594     return MOS_STATUS_SUCCESS;
1595 }
1596 
GetMaxRefFrames(uint8_t & maxNumRef0,uint8_t & maxNumRef1)1597 void CodechalEncHevcStateG11::GetMaxRefFrames(uint8_t& maxNumRef0, uint8_t& maxNumRef1)
1598 {
1599     maxNumRef0 = m_maxNumVmeL0Ref;
1600     maxNumRef1 = m_maxNumVmeL1Ref;
1601 
1602     return;
1603 }
1604 
GetStatusReport(EncodeStatus * encodeStatus,EncodeStatusReport * encodeStatusReport)1605 MOS_STATUS CodechalEncHevcStateG11::GetStatusReport(
1606     EncodeStatus         *encodeStatus,
1607     EncodeStatusReport   *encodeStatusReport)
1608 {
1609     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
1610 
1611     CODECHAL_ENCODE_FUNCTION_ENTER;
1612 
1613     CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus);
1614     CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport);
1615 
1616     if(encodeStatusReport->UsedVdBoxNumber <= 1)
1617     {
1618         m_syntaxElementOnlyBitCnt = encodeStatus->dwMFCBitstreamSyntaxElementOnlyBitCount;
1619         return CodechalEncodeHevcBase::GetStatusReport(encodeStatus, encodeStatusReport);
1620     }
1621 
1622     PCODECHAL_ENCODE_BUFFER tileSizeStatusReport = &m_tileRecordBuffer[encodeStatusReport->CurrOriginalPic.FrameIdx];
1623 
1624     MOS_LOCK_PARAMS lockFlags;
1625     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1626     CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface);
1627     HCPPakHWTileSizeRecord_G11* tileStatusReport = (HCPPakHWTileSizeRecord_G11*)m_osInterface->pfnLockResource(
1628         m_osInterface,
1629         &tileSizeStatusReport->sResource,
1630         &lockFlags);
1631     CODECHAL_ENCODE_CHK_NULL_RETURN(tileStatusReport);
1632 
1633     encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
1634     encodeStatusReport->PanicMode          = false;
1635     encodeStatusReport->AverageQp          = 0;
1636     encodeStatusReport->QpY                = 0;
1637     encodeStatusReport->SuggestedQpYDelta  = 0;
1638     encodeStatusReport->NumberPasses       = 1;
1639     encodeStatusReport->bitstreamSize      = 0;
1640     encodeStatus->ImageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQp = 0;
1641 
1642     uint32_t totalCU = 0;
1643     double sumQp = 0.0;
1644     for(uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
1645     {
1646         if(tileStatusReport[i].Length == 0)
1647         {
1648             encodeStatusReport->CodecStatus = CODECHAL_STATUS_INCOMPLETE;
1649             return eStatus;
1650         }
1651 
1652         encodeStatusReport->bitstreamSize += tileStatusReport[i].Length;
1653         totalCU += (m_tileParams[i].TileHeightInMinCbMinus1 + 1) * (m_tileParams[i].TileWidthInMinCbMinus1 + 1);
1654         sumQp += tileStatusReport[i].Hcp_Qp_Status_Count;
1655     }
1656     encodeStatusReport->NumberPasses = (uint8_t)encodeStatus->dwNumberPasses + 1;
1657     CODECHAL_ENCODE_VERBOSEMESSAGE("Scalability Mode Exectued PAK Pass number: %d.\n", encodeStatusReport->NumberPasses);
1658 
1659     if (encodeStatusReport->bitstreamSize == 0 ||
1660         encodeStatusReport->bitstreamSize >m_bitstreamUpperBound)
1661     {
1662         encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR;
1663         encodeStatusReport->bitstreamSize = 0;
1664         CODECHAL_ENCODE_ASSERTMESSAGE("Bit-stream size exceeds upper bound!");
1665         return MOS_STATUS_INVALID_FILE_SIZE;
1666     }
1667 
1668     if (m_sseEnabled)
1669     {
1670         CODECHAL_ENCODE_CHK_STATUS_RETURN(CalculatePSNR(encodeStatus, encodeStatusReport));
1671     }
1672 
1673     CODECHAL_ENCODE_CHK_COND_RETURN(totalCU == 0, "ERROR - totalCU cannot be zero.");
1674     encodeStatusReport->QpY = encodeStatusReport->AverageQp =
1675         (uint8_t)((sumQp / (double)totalCU) / 4.0); // due to TU is 4x4 and there are 4 TUs in one CU
1676 
1677     if(m_enableTileStitchByHW)
1678     {
1679         return eStatus;
1680     }
1681 
1682     uint8_t *tempBsBuffer = nullptr,*bufPtr = nullptr;
1683     tempBsBuffer = bufPtr = (uint8_t*)MOS_AllocAndZeroMemory(encodeStatusReport->bitstreamSize);
1684     CODECHAL_ENCODE_CHK_NULL_RETURN(tempBsBuffer);
1685 
1686     CODEC_REF_LIST currRefList = *(encodeStatus->encodeStatusReport.pCurrRefList);
1687     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1688     lockFlags.ReadOnly = 1;
1689     uint8_t* bitstream = (uint8_t*)m_osInterface->pfnLockResource(
1690         m_osInterface,
1691         &currRefList.resBitstreamBuffer,
1692         &lockFlags);
1693     if (bitstream == nullptr)
1694     {
1695         MOS_SafeFreeMemory(tempBsBuffer);
1696         CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr);
1697     }
1698 
1699     for(uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
1700     {
1701         uint32_t offset = m_tileParams[i].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
1702         uint32_t len = tileStatusReport[i].Length;
1703 
1704         MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len);
1705         bufPtr += len;
1706     }
1707 
1708     MOS_SecureMemcpy(bitstream, encodeStatusReport->bitstreamSize, tempBsBuffer, encodeStatusReport->bitstreamSize);
1709     MOS_ZeroMemory(&bitstream[encodeStatusReport->bitstreamSize],
1710        m_bitstreamUpperBound - encodeStatusReport->bitstreamSize);
1711 
1712     if(tempBsBuffer)
1713     {
1714         MOS_FreeMemory(tempBsBuffer);
1715     }
1716 
1717     if(m_osInterface && bitstream)
1718     {
1719         m_osInterface->pfnUnlockResource(m_osInterface, &currRefList.resBitstreamBuffer);
1720     }
1721 
1722     if(m_osInterface && tileStatusReport)
1723     {
1724         // clean-up the tile status report buffer
1725         MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * encodeStatusReport->NumberTilesInFrame);
1726 
1727         m_osInterface->pfnUnlockResource(m_osInterface, &tileSizeStatusReport->sResource);
1728     }
1729 
1730     return eStatus;
1731 }
1732 
AllocateResourcesVariableSize()1733 MOS_STATUS CodechalEncHevcStateG11::AllocateResourcesVariableSize()
1734 {
1735     MOS_STATUS                  eStatus = MOS_STATUS_SUCCESS;
1736 
1737     CODECHAL_ENCODE_FUNCTION_ENTER;
1738 
1739     if (!m_hevcPicParams->tiles_enabled_flag)
1740     {
1741         return eStatus;
1742     }
1743 
1744     uint32_t bufSize = 0;
1745     if (m_pakPiplStrmOutEnable)
1746     {
1747         // PAK CU Level Streamout Data:   DW57-59 in HCP pipe buffer address command
1748         // One CU has 16-byte. But, each tile needs to be aliged to the cache line
1749         uint32_t tileWidthInCus = 0;
1750         uint32_t tileHeightInCus = 0;
1751         uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
1752         uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
1753         for(uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
1754         {
1755             for(uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
1756             {
1757                 uint32_t idx = tileRow * numTileColumns + tileCol;
1758 
1759                 tileHeightInCus = m_tileParams[idx].TileHeightInMinCbMinus1 + 1;
1760                 tileWidthInCus  = m_tileParams[idx].TileWidthInMinCbMinus1 + 1;
1761                 bufSize += (tileWidthInCus * tileHeightInCus * 16);
1762                 bufSize = MOS_ALIGN_CEIL(bufSize, CODECHAL_CACHELINE_SIZE);
1763             }
1764         }
1765         if (Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource) ||
1766             (bufSize > m_resPakcuLevelStreamoutData.dwSize))
1767         {
1768             if (!Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource))
1769             {
1770                 m_osInterface->pfnFreeResource(m_osInterface, &m_resPakcuLevelStreamoutData.sResource);
1771             }
1772 
1773             MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
1774             MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1775             allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
1776             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1777             allocParamsForBufferLinear.Format = Format_Buffer;
1778             allocParamsForBufferLinear.dwBytes  = bufSize;
1779             allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
1780 
1781             CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
1782                 m_osInterface,
1783                 &allocParamsForBufferLinear,
1784                 &m_resPakcuLevelStreamoutData.sResource));
1785             m_resPakcuLevelStreamoutData.dwSize = bufSize;
1786             CODECHAL_ENCODE_VERBOSEMESSAGE("reallocate cu steam out buffer, size=0x%x.\n", bufSize);
1787         }
1788     }
1789 
1790     return eStatus;
1791 }
1792 
ExecutePictureLevel()1793 MOS_STATUS CodechalEncHevcStateG11::ExecutePictureLevel()
1794 {
1795     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
1796 
1797     CODECHAL_ENCODE_FUNCTION_ENTER;
1798 
1799     m_firstTaskInPhase = m_singleTaskPhaseSupported? IsFirstPass(): true;
1800     m_lastTaskInPhase  = m_singleTaskPhaseSupported? IsLastPass(): true;
1801     PerfTagSetting perfTag;
1802     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE);
1803 
1804     CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifyCommandBufferSize());
1805 
1806     if (!m_singleTaskPhaseSupportedInPak)
1807     {
1808         // Command buffer or patch list size are too small and so we cannot submit multiple pass of PAKs together
1809         m_firstTaskInPhase = true;
1810         m_lastTaskInPhase  = true;
1811     }
1812 
1813     if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex())
1814     {
1815         CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum");
1816         eStatus = MOS_STATUS_INVALID_PARAMETER;
1817         return eStatus;
1818     }
1819 
1820     MOS_COMMAND_BUFFER cmdBuffer;
1821     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
1822 
1823     if ((!m_singleTaskPhaseSupported) || m_firstTaskInPhase)
1824     {
1825         // Send command buffer header at the beginning (OS dependent)
1826         // frame tracking tag is only added in the last command buffer header
1827         bool bRequestFrameTracking = m_singleTaskPhaseSupported ?
1828             m_firstTaskInPhase :
1829             m_lastTaskInPhase;
1830 
1831         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, bRequestFrameTracking));
1832     }
1833 
1834     // clean-up per VDBOX semaphore memory
1835     int32_t currentPipe = GetCurrentPipe();
1836     if (currentPipe < 0)
1837     {
1838         eStatus = MOS_STATUS_INVALID_PARAMETER;
1839         return eStatus;
1840     }
1841 
1842     if (m_numPipe >= 2 &&
1843         ((m_singleTaskPhaseSupported && IsFirstPass()) ||
1844             !m_singleTaskPhaseSupported))
1845     {
1846         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStopCmd(&cmdBuffer));
1847 
1848         //HW Semaphore cmd to make sure all pipes start encode at the same time
1849         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeStartSemaMem, 1, MHW_MI_ATOMIC_INC, &cmdBuffer));
1850         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
1851             &m_resPipeStartSemaMem,
1852             &cmdBuffer,
1853             m_numPipe));
1854 
1855         // Program some placeholder cmds to resolve the hazard between BEs sync
1856         MHW_MI_STORE_DATA_PARAMS dataParams;
1857         dataParams.pOsResource = &m_resDelayMinus;
1858         dataParams.dwResourceOffset = 0;
1859         dataParams.dwValue = 0xDE1A;
1860         for (uint32_t i = 0; i < m_numDelay; i++)
1861         {
1862             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
1863                 &cmdBuffer,
1864                 &dataParams));
1865         }
1866         //clean HW semaphore memory
1867         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeStartSemaMem, 1, MHW_MI_ATOMIC_DEC, &cmdBuffer));
1868 
1869         //Start Watchdog Timer
1870         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStartCmd(&cmdBuffer));
1871         //To help test media reset, this hw semaphore wait will never be reached.
1872         if (m_enableTestMediaReset)
1873         {
1874             CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
1875                 &m_resPipeStartSemaMem,
1876                 &cmdBuffer,
1877                 m_numPipe + 2));
1878         }
1879     }
1880 
1881     if (m_brcEnabled && !IsFirstPass())  // Only the regular BRC passes have the conditional batch buffer end
1882     {
1883         // Ensure the previous PAK BRC pass is done, mainly for pipes other than pipe0.
1884         if (m_singleTaskPhaseSupported && m_numPipe >= 2 &&
1885             !Mos_ResourceIsNull(&m_resBrcSemaphoreMem[currentPipe].sResource))
1886         {
1887             CODECHAL_ENCODE_CHK_STATUS_RETURN(
1888                 SendHWWaitCommand(
1889                     &m_resBrcSemaphoreMem[currentPipe].sResource,
1890                     &cmdBuffer,
1891                     1));
1892         }
1893 
1894         // Insert conditional batch buffer end
1895         MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
1896         MOS_ZeroMemory(
1897             &miConditionalBatchBufferEndParams,
1898             sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
1899         uint32_t BaseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
1900                 sizeof(uint32_t) * 2;  // pEncodeStatus is offset by 2 DWs in the resource       ;
1901 
1902         if (m_hucPakStitchEnabled && m_numPipe >= 2)  //BRC scalability
1903         {
1904             CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwHuCStatusMaskOffset & 7) == 0); // Make sure uint64_t aligned
1905             CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwHuCStatusMaskOffset + sizeof(uint32_t)) == m_encodeStatusBuf.dwHuCStatusRegOffset);
1906 
1907             miConditionalBatchBufferEndParams.presSemaphoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
1908             miConditionalBatchBufferEndParams.dwOffset = BaseOffset + m_encodeStatusBuf.dwHuCStatusMaskOffset;
1909         }
1910         else
1911         {
1912             CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwImageStatusMaskOffset & 7) == 0); // Make sure uint64_t aligned
1913             CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwImageStatusMaskOffset + sizeof(uint32_t)) == m_encodeStatusBuf.dwImageStatusCtrlOffset);
1914 
1915             miConditionalBatchBufferEndParams.presSemaphoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
1916             miConditionalBatchBufferEndParams.dwOffset = BaseOffset + m_encodeStatusBuf.dwImageStatusMaskOffset;
1917         }
1918 
1919         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
1920             &cmdBuffer,
1921             &miConditionalBatchBufferEndParams));
1922 
1923         auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
1924         CODECHAL_ENCODE_CHK_NULL_RETURN(mmioRegisters);
1925         MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
1926         MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
1927         if (m_hucPakStitchEnabled && m_numPipe >= 2)
1928         {
1929             // Write back the HCP image control register with HUC PAK Int Kernel output
1930             MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
1931             MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
1932             miLoadRegMemParams.presStoreBuffer = &m_resBrcDataBuffer;
1933             miLoadRegMemParams.dwOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, HCP_ImageStatusControl);
1934             miLoadRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1935             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(&cmdBuffer, &miLoadRegMemParams));
1936 
1937             if (IsFirstPipe())
1938             {
1939                 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
1940                 miCpyMemMemParams.presSrc     = &m_resBrcDataBuffer;
1941                 miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, HCP_ImageStatusControl);
1942                 miCpyMemMemParams.presDst     = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
1943                 miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
1944                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
1945 
1946                 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
1947                 miStoreRegMemParams.presStoreBuffer =  &m_encodeStatusBuf.resStatusBuffer;
1948                 miStoreRegMemParams.dwOffset = BaseOffset + m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset;
1949                 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1950                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
1951             }
1952        }
1953        else
1954        {
1955            // Write back the HCP image control register for RC6 may clean it out
1956            MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
1957            MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
1958            miLoadRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
1959            miLoadRegMemParams.dwOffset = BaseOffset + m_encodeStatusBuf.dwImageStatusCtrlOffset;
1960            miLoadRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1961            CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(&cmdBuffer, &miLoadRegMemParams));
1962 
1963            MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
1964            miStoreRegMemParams.presStoreBuffer = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
1965            miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
1966            miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1967            CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
1968 
1969            MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
1970            miStoreRegMemParams.presStoreBuffer =  &m_encodeStatusBuf.resStatusBuffer;
1971            miStoreRegMemParams.dwOffset = BaseOffset + m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset;
1972            miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1973            CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
1974        }
1975     }
1976 
1977     if (IsFirstPipe() && IsFirstPass() && m_osInterface->bTagResourceSync)
1978     {
1979         // This is a short term solution to solve the sync tag issue: the sync tag write for PAK is inserted at the end of 2nd pass PAK BB
1980         // which may be skipped in multi-pass PAK enabled case. The idea here is to insert the previous frame's tag at the beginning
1981         // of the BB and keep the current frame's tag at the end of the BB. There will be a delay for tag update but it should be fine
1982         // as long as Dec/VP/Enc won't depend on this PAK so soon.
1983 
1984         PMOS_RESOURCE globalGpuContextSyncTagBuffer = nullptr;
1985         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource(
1986             m_osInterface,
1987             globalGpuContextSyncTagBuffer));
1988         CODECHAL_ENCODE_CHK_NULL_RETURN(globalGpuContextSyncTagBuffer);
1989 
1990         MHW_MI_STORE_DATA_PARAMS params;
1991         params.pOsResource = globalGpuContextSyncTagBuffer;
1992         params.dwResourceOffset = m_osInterface->pfnGetGpuStatusTagOffset(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
1993         uint32_t value = m_osInterface->pfnGetGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
1994         params.dwValue = (value > 0) ? (value - 1) : 0;
1995         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &params));
1996     }
1997 
1998     if (IsFirstPipe())
1999     {
2000         CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2001     }
2002 
2003     if (m_numPipe >= 2)
2004     {
2005         // clean up hw semaphore for BRC PAK pass sync, used only in single task phase.
2006         if (m_singleTaskPhaseSupported &&
2007             m_brcEnabled &&
2008             !Mos_ResourceIsNull(&m_resBrcSemaphoreMem[currentPipe].sResource))
2009         {
2010             MHW_MI_STORE_DATA_PARAMS storeDataParams;
2011             MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2012             storeDataParams.pOsResource      = &m_resBrcSemaphoreMem[currentPipe].sResource;
2013             storeDataParams.dwResourceOffset = 0;
2014             storeDataParams.dwValue = 0;
2015 
2016             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2017                 &cmdBuffer,
2018                 &storeDataParams));
2019         }
2020     }
2021 
2022     CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPipeModeSelectCmd(&cmdBuffer));
2023 
2024     CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpSurfaceStateCmds(&cmdBuffer));
2025 
2026     CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPipeBufAddrCmd(&cmdBuffer));
2027 
2028     MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
2029     SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
2030     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams));
2031 
2032     MHW_VDBOX_QM_PARAMS fqmParams, qmParams;
2033     SetHcpQmStateParams(fqmParams, qmParams);
2034     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpFqmStateCmd(&cmdBuffer, &fqmParams));
2035     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpQmStateCmd(&cmdBuffer, &qmParams));
2036 
2037     if (m_brcEnabled)
2038     {
2039         uint32_t picStateCmdOffset;
2040         if (m_hucPakStitchEnabled && m_numPipe >= 2)
2041         {
2042             //for non fist PAK pass, always use the 2nd HCP PIC STATE cmd buffer
2043             picStateCmdOffset = IsFirstPass() ? 0 : 1;
2044         }
2045         else
2046         {
2047             picStateCmdOffset = GetCurrentPass();
2048         }
2049 
2050         MHW_BATCH_BUFFER batchBuffer;
2051         MOS_ZeroMemory(&batchBuffer, sizeof(batchBuffer));
2052         batchBuffer.OsResource   = m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx];
2053         batchBuffer.dwOffset     = picStateCmdOffset * BRC_IMG_STATE_SIZE_PER_PASS_G11;
2054         batchBuffer.bSecondLevel = true;
2055 
2056         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
2057             &cmdBuffer,
2058             &batchBuffer));
2059     }
2060     else
2061     {
2062         CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPictureStateCmd(&cmdBuffer));
2063     }
2064 
2065     // Send HEVC_VP9_RDOQ_STATE command
2066     if (m_hevcRdoqEnabled)
2067     {
2068         MHW_VDBOX_HEVC_PIC_STATE picStateParams;
2069         SetHcpPicStateParams(picStateParams);
2070 
2071         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&cmdBuffer, &picStateParams));
2072     }
2073 
2074     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2075 
2076     return eStatus;
2077 }
2078 
SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceState)2079 void CodechalEncHevcStateG11::SetHcpSliceStateCommonParams(
2080     MHW_VDBOX_HEVC_SLICE_STATE& sliceState)
2081 {
2082     CodechalEncHevcState::SetHcpSliceStateCommonParams(sliceState);
2083 
2084     sliceState.RoundingIntra         = m_roundingIntraInUse;
2085     sliceState.RoundingInter         = m_roundingInterInUse;
2086 
2087     if ((m_hevcSliceParams->slice_type == CODECHAL_HEVC_P_SLICE && m_hevcPicParams->weighted_pred_flag) ||
2088         (m_hevcSliceParams->slice_type == CODECHAL_HEVC_B_SLICE && m_hevcPicParams->weighted_bipred_flag))
2089     {
2090         sliceState.bWeightedPredInUse = true;
2091     }
2092     else
2093     {
2094         sliceState.bWeightedPredInUse = false;
2095     }
2096 
2097     static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G11 &>(sliceState).dwNumPipe = m_numPipe;
2098 }
2099 
SetHcpSliceStateParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceState,PCODEC_ENCODER_SLCDATA slcData,uint16_t slcCount,PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 tileCodingParams,bool lastSliceInTile,uint32_t idx)2100 void CodechalEncHevcStateG11::SetHcpSliceStateParams(
2101     MHW_VDBOX_HEVC_SLICE_STATE&         sliceState,
2102     PCODEC_ENCODER_SLCDATA              slcData,
2103     uint16_t                            slcCount,
2104     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11   tileCodingParams,
2105     bool                                lastSliceInTile,
2106     uint32_t                            idx)
2107 {
2108     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
2109 
2110     sliceState.pEncodeHevcSliceParams                                                     = &m_hevcSliceParams[slcCount];
2111     sliceState.dwDataBufferOffset           = slcData[slcCount].CmdOffset;
2112     sliceState.dwOffset                     = slcData[slcCount].SliceOffset;
2113     sliceState.dwLength                     = slcData[slcCount].BitSize;
2114     sliceState.uiSkipEmulationCheckCount    = slcData[slcCount].SkipEmulationByteCount;
2115     sliceState.dwSliceIndex                 = (uint32_t)slcCount;
2116     sliceState.bLastSlice                   = (slcCount == m_numSlices - 1);
2117     sliceState.bLastSliceInTile             = lastSliceInTile ? true : false;
2118     sliceState.bLastSliceInTileColumn       = (lastSliceInTile & tileCodingParams[idx].IsLastTileofColumn) ? true : false;
2119     sliceState.bFirstPass                   = IsFirstPass();
2120     sliceState.bLastPass                    = IsLastPass();
2121     sliceState.bInsertBeforeSliceHeaders    = (slcCount == 0);
2122     sliceState.bSaoLumaFlag                                                               = (m_hevcSeqParams->SAO_enabled_flag) ? m_hevcSliceParams[slcCount].slice_sao_luma_flag : 0;
2123     sliceState.bSaoChromaFlag                                                             = (m_hevcSeqParams->SAO_enabled_flag) ? m_hevcSliceParams[slcCount].slice_sao_chroma_flag : 0;
2124     static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G11&>(sliceState).pTileCodingParams            = tileCodingParams + idx;
2125     static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G11&>(sliceState).dwTileID                     = idx;
2126 
2127     CalcTransformSkipParameters(sliceState.EncodeHevcTransformSkipParams);
2128 }
2129 
ExecuteSliceLevel()2130 MOS_STATUS CodechalEncHevcStateG11::ExecuteSliceLevel()
2131 {
2132     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2133 
2134     CODECHAL_ENCODE_FUNCTION_ENTER;
2135     CODECHAL_ENCODE_CHK_NULL_RETURN(m_slcData);
2136 
2137     if (m_pakOnlyTest)
2138     {
2139         CODECHAL_ENCODE_CHK_STATUS_RETURN(LoadPakCommandAndCuRecordFromFile());
2140     }
2141 
2142     if (!m_hevcPicParams->tiles_enabled_flag)
2143     {
2144         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::ExecuteSliceLevel());
2145     }
2146     else
2147     {
2148         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncTileLevel());
2149     }
2150 
2151     return eStatus;
2152 }
2153 
EncTileLevel()2154 MOS_STATUS CodechalEncHevcStateG11::EncTileLevel()
2155 {
2156     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
2157 
2158     CODECHAL_ENCODE_FUNCTION_ENTER;
2159 
2160     int32_t currentPipe = GetCurrentPipe();
2161     int32_t currentPass  = GetCurrentPass();
2162 
2163     if(currentPipe < 0 || currentPass < 0)
2164     {
2165         CODECHAL_ENCODE_ASSERTMESSAGE("Invalid pipe number or pass number");
2166         return MOS_STATUS_INVALID_PARAMETER;
2167     }
2168 
2169     MHW_VDBOX_HEVC_SLICE_STATE_G11 sliceState;
2170     SetHcpSliceStateCommonParams(sliceState);
2171 
2172     MOS_COMMAND_BUFFER cmdBuffer;
2173     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2174 
2175     uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
2176     uint32_t numTileRows    = m_hevcPicParams->num_tile_rows_minus1 + 1;
2177 
2178     for(uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
2179     {
2180         for(uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
2181         {
2182             PCODEC_ENCODER_SLCDATA  slcData = m_slcData;
2183             uint32_t                slcCount, idx, sliceNumInTile = 0;
2184 
2185             idx = tileRow * numTileColumns + tileCol;
2186 
2187             if ((m_numPipe > 1) && (tileCol != currentPipe))
2188             {
2189                 continue;
2190             }
2191 
2192             // HCP_TILE_CODING commmand
2193             CODECHAL_ENCODE_CHK_STATUS_RETURN(static_cast<MhwVdboxHcpInterfaceG11*>(m_hcpInterface)->AddHcpTileCodingCmd(&cmdBuffer, &m_tileParams[idx]));
2194 
2195             for (slcCount = 0; slcCount < m_numSlices; slcCount++)
2196             {
2197                 bool lastSliceInTile = false, sliceInTile = false;
2198 
2199                 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
2200                     &m_tileParams[idx],
2201                     &sliceInTile,
2202                     &lastSliceInTile));
2203 
2204                 if(!sliceInTile)
2205                 {
2206                     continue;
2207                 }
2208 
2209                 if (IsFirstPass())
2210                 {
2211                     uint32_t startLCU = 0;
2212                     for(uint32_t ii = 0; ii < slcCount; ii++)
2213                     {
2214                         startLCU += m_hevcSliceParams[ii].NumLCUsInSlice;
2215                     }
2216                     slcData[slcCount].CmdOffset = startLCU * (m_hwInterface->GetHcpInterface()->GetHcpPakObjSize()) * sizeof(uint32_t);
2217                 }
2218 
2219                 SetHcpSliceStateParams(sliceState, slcData, (uint16_t)slcCount, m_tileParams, lastSliceInTile, idx);
2220 
2221                 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHwSliceEncodeCommand(&cmdBuffer, &sliceState));
2222 
2223                 sliceNumInTile++;
2224             } // end of slice
2225 
2226             if(0 == sliceNumInTile)
2227             {
2228                 // One tile must have at least one slice
2229                 CODECHAL_ENCODE_ASSERT(false);
2230                 eStatus = MOS_STATUS_INVALID_PARAMETER;
2231                 return eStatus;
2232             }
2233         } // end of row tile
2234     } // end of column tile
2235 
2236     // Insert end of sequence/stream if set
2237     if ((m_lastPicInStream || m_lastPicInSeq) && IsLastPipe())
2238     {
2239         MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
2240         MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
2241         pakInsertObjectParams.bLastPicInSeq     = m_lastPicInSeq;
2242         pakInsertObjectParams.bLastPicInStream  = m_lastPicInStream;
2243         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&cmdBuffer, &pakInsertObjectParams));
2244     }
2245 
2246     // Send VD_PIPELINE_FLUSH command
2247     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2248     MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2249     vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
2250     vdPipelineFlushParams.Flags.bFlushHEVC = 1;
2251     vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2252     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
2253 
2254     // Send MI_FLUSH command
2255     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2256     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2257     flushDwParams.bVideoPipelineCacheInvalidate = true;
2258     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2259 
2260     //HW Semaphore cmd to make sure all pipes completion encode
2261     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeCompleteSemaMem, 1, MHW_MI_ATOMIC_INC, &cmdBuffer));
2262 
2263     if(IsFirstPipe())
2264     {
2265         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
2266             &m_resPipeCompleteSemaMem,
2267             &cmdBuffer,
2268             m_numPipe));
2269 
2270         //clean HW semaphore memory
2271         MHW_MI_STORE_DATA_PARAMS    storeDataParams;
2272         MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2273         storeDataParams.pOsResource      = &m_resPipeCompleteSemaMem;
2274         storeDataParams.dwValue          = 0;
2275         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2276             &cmdBuffer,
2277             &storeDataParams));
2278 
2279         // Use HW stitch commands only in the scalable mode
2280         if (m_numPipe > 1 && m_enableTileStitchByHW)
2281         {
2282             //call PAK Int Kernel in scalability case
2283             if (m_hucPakStitchEnabled)
2284             {
2285                 CODECHAL_ENCODE_CHK_STATUS_RETURN(HucPakIntegrate(&cmdBuffer));
2286                 // 2nd level BB buffer for stitching cmd
2287                 // current location to add cmds in 2nd level batch buffer
2288                 m_HucStitchCmdBatchBuffer.iCurrent = 0;
2289                 // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
2290                 m_HucStitchCmdBatchBuffer.dwOffset = 0;
2291                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_HucStitchCmdBatchBuffer));
2292                 // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases
2293                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, m_osInterface->osCpInterface->IsCpEnabled() ? true : false));
2294             }
2295         }
2296 
2297         CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(&cmdBuffer));
2298 
2299         CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2300 
2301         if (m_numPipe <= 1)  // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer
2302         {
2303             CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
2304 
2305             // BRC PAK statistics different for each pass
2306             if (m_brcEnabled)
2307             {
2308                 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStats(&cmdBuffer));
2309             }
2310         }
2311         else
2312         {   //scalability mode
2313             if (m_brcEnabled)
2314             {
2315                 //MMIO register is not used in scalability BRC case. all information is in TileSizeRecord stream out buffer
2316                 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStatisticsForScalability(&cmdBuffer));
2317             }
2318             else
2319             {
2320                 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
2321             }
2322         }
2323 
2324         #if (_DEBUG || _RELEASE_INTERNAL)
2325         //this is to support BRC scalbility test to match with single pipe. Will be removed later after enhanced BRC Scalability is enabled.
2326         if (m_brcEnabled && m_forceSinglePakPass)
2327         {
2328             CODECHAL_ENCODE_CHK_STATUS_RETURN(ResetImgCtrlRegInPAKStatisticsBuffer(&cmdBuffer));
2329         }
2330         #endif
2331 
2332         if (m_singleTaskPhaseSupported &&
2333             m_brcEnabled && m_numPipe >= 2 && !IsLastPass())
2334         {
2335             // Signal HW semaphore for the BRC dependency (i.e., next BRC pass waits for the current BRC pass)
2336             for (auto i = 0; i < m_numPipe; i++)
2337             {
2338                 if (!Mos_ResourceIsNull(&m_resBrcSemaphoreMem[i].sResource))
2339                 {
2340                     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2341                     storeDataParams.pOsResource      = &m_resBrcSemaphoreMem[i].sResource;
2342                     storeDataParams.dwValue          = 1;
2343 
2344                     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2345                         &cmdBuffer,
2346                         &storeDataParams));
2347                 }
2348             }
2349         }
2350     }
2351 
2352     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2353     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2354 
2355     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2356     {
2357         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
2358     }
2359 
2360     std::string pakPassName = "PAK_PASS" + std::to_string(static_cast<uint32_t>(m_currPass));
2361     CODECHAL_DEBUG_TOOL(
2362         CODECHAL_ENCODE_CHK_STATUS_RETURN( m_debugInterface->DumpCmdBuffer(
2363             &cmdBuffer,
2364             CODECHAL_NUM_MEDIA_STATES,
2365             pakPassName.data()));)
2366 
2367     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2368 
2369     if (IsFirstPipe() &&
2370         (m_pakOnlyTest == 0) &&  // In the PAK only test, no need to wait for ENC's completion
2371         IsFirstPass() &&
2372         !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
2373     {
2374         MOS_SYNC_PARAMS syncParams      = g_cInitSyncParams;
2375         syncParams.GpuContext           = m_videoContext;
2376         syncParams.presSyncResource     = &m_resSyncObjectRenderContextInUse;
2377 
2378         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
2379     }
2380 
2381     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2382     {
2383         bool nullRendering = m_videoContextUsesNullHw;
2384         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, nullRendering));
2385 
2386         CODECHAL_DEBUG_TOOL(
2387             CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpHucDebugOutputBuffers());
2388             CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpPakOutput());
2389             if (m_mmcState)
2390             {
2391                 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
2392             }
2393         )
2394 
2395         if ((IsLastPipe()) &&
2396             (IsLastPass()) &&
2397             m_signalEnc &&
2398             m_currRefSync &&
2399             !Mos_ResourceIsNull(&m_currRefSync->resSyncObject))
2400         {
2401             // signal semaphore
2402             MOS_SYNC_PARAMS             syncParams;
2403             syncParams                  = g_cInitSyncParams;
2404             syncParams.GpuContext       = m_videoContext;
2405             syncParams.presSyncResource = &m_currRefSync->resSyncObject;
2406 
2407             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
2408             m_currRefSync->uiSemaphoreObjCount++;
2409             m_currRefSync->bInUsed = true;
2410         }
2411     }
2412 
2413     // Reset parameters for next PAK execution
2414     if (IsLastPipe() && IsLastPass())
2415     {
2416         if (!m_singleTaskPhaseSupported)
2417         {
2418             m_osInterface->pfnResetPerfBufferID(m_osInterface);
2419         }
2420 
2421         m_currPakSliceIdx = (m_currPakSliceIdx + 1) % CODECHAL_HEVC_NUM_PAK_SLICE_BATCH_BUFFERS;
2422 
2423         if (m_hevcSeqParams->ParallelBRC)
2424         {
2425             m_brcBuffers.uiCurrBrcPakStasIdxForWrite =
2426                 (m_brcBuffers.uiCurrBrcPakStasIdxForWrite + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
2427         }
2428 
2429         m_newPpsHeader = 0;
2430         m_newSeqHeader = 0;
2431         m_frameNum++;
2432     }
2433 
2434     return eStatus;
2435 }
2436 
DecideEncodingPipeNumber()2437 MOS_STATUS CodechalEncHevcStateG11::DecideEncodingPipeNumber()
2438 {
2439     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
2440 
2441     CODECHAL_ENCODE_FUNCTION_ENTER;
2442 
2443     m_numPipe = m_numVdbox;
2444 
2445     uint8_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
2446 
2447     if (numTileColumns > m_numPipe)
2448     {
2449         m_numPipe = 1;
2450     }
2451 
2452     if (numTileColumns < m_numPipe)
2453     {
2454         if(numTileColumns >= 1 && numTileColumns <= 4)
2455         {
2456             m_numPipe = numTileColumns;
2457         }
2458         else
2459         {
2460             m_numPipe = 1;  // invalid tile column test cases and switch back to the single VDBOX mode
2461         }
2462     }
2463 
2464     m_useVirtualEngine = true;  //always use virtual engine interface for single pipe and scalability mode
2465 
2466     if (!m_forceScalability)
2467     {
2468         //resolution < 4K, always go with single pipe
2469         if (m_frameWidth * m_frameHeight < ENCODE_HEVC_4K_PIC_WIDTH * ENCODE_HEVC_4K_PIC_HEIGHT)
2470         {
2471             m_numPipe = 1;
2472         }
2473     }
2474 
2475     m_numUsedVdbox       = m_numPipe;
2476     m_numberTilesInFrame = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1);
2477 
2478     if (m_scalabilityState)
2479     {
2480         // Create/ re-use a GPU context with 2 pipes
2481         m_scalabilityState->ucScalablePipeNum = m_numPipe;
2482     }
2483 
2484     return eStatus;
2485 }
2486 
PlatformCapabilityCheck()2487 MOS_STATUS CodechalEncHevcStateG11::PlatformCapabilityCheck()
2488 {
2489     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
2490 
2491     CODECHAL_ENCODE_FUNCTION_ENTER;
2492 
2493     CODECHAL_ENCODE_CHK_STATUS_RETURN(DecideEncodingPipeNumber());
2494 
2495     if (MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
2496     {
2497         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ChkGpuCtxReCreation(this, m_scalabilityState,
2498             (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
2499     }
2500 
2501     if (m_frameWidth * m_frameHeight > ENCODE_HEVC_MAX_8K_PIC_WIDTH * ENCODE_HEVC_MAX_8K_PIC_HEIGHT)
2502     {
2503         eStatus = MOS_STATUS_INVALID_PARAMETER;
2504         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Frame resolution greater than 8k not supported");
2505     }
2506 
2507     if ((uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_chromaFormat &&
2508         (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat &&
2509         Format_YUY2 == m_reconSurface.Format)
2510     {
2511         if (m_reconSurface.dwHeight < m_oriFrameHeight * 2 ||
2512             m_reconSurface.dwWidth < m_oriFrameWidth / 2)
2513         {
2514             return MOS_STATUS_INVALID_PARAMETER;
2515         }
2516     }
2517 
2518     // set RDOQ Intra blocks Threshold for Gen11+
2519     m_rdoqIntraTuThreshold = 0;
2520     if (m_hevcRdoqEnabled)
2521     {
2522         if (1 == m_hevcSeqParams->TargetUsage)
2523         {
2524             m_rdoqIntraTuThreshold = 0xffff;
2525         }
2526         else if (4 == m_hevcSeqParams->TargetUsage)
2527         {
2528             m_rdoqIntraTuThreshold = m_picWidthInMb * m_picHeightInMb;
2529             m_rdoqIntraTuThreshold = MOS_MIN(m_rdoqIntraTuThreshold / 10, 0xffff);
2530         }
2531     }
2532 
2533     return eStatus;
2534 }
2535 
CheckSupportedFormat(PMOS_SURFACE surface)2536 bool CodechalEncHevcStateG11::CheckSupportedFormat(PMOS_SURFACE surface)
2537 {
2538     CODECHAL_ENCODE_FUNCTION_ENTER;
2539 
2540     bool isColorFormatSupported = false;
2541 
2542     if (nullptr == surface)
2543     {
2544         CODECHAL_ENCODE_ASSERTMESSAGE("Invalid (nullptr) Pointer.");
2545         return isColorFormatSupported;
2546     }
2547 
2548     switch (surface->Format)
2549     {
2550     case Format_NV12:
2551         isColorFormatSupported = IS_Y_MAJOR_TILE_FORMAT(surface->TileType);
2552         break;
2553     case Format_YUY2:
2554     case Format_YUYV:
2555     case Format_A8R8G8B8:
2556     case Format_P010:
2557     case Format_Y210:
2558         break;
2559     default:
2560         CODECHAL_ENCODE_ASSERTMESSAGE("Input surface color format = %d not supported!", surface->Format);
2561         break;
2562     }
2563 
2564     return isColorFormatSupported;
2565 }
2566 
GetSystemPipeNumberCommon()2567 MOS_STATUS CodechalEncHevcStateG11::GetSystemPipeNumberCommon()
2568 {
2569     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2570 
2571     CODECHAL_ENCODE_FUNCTION_ENTER;
2572 
2573     MOS_USER_FEATURE_VALUE_DATA userFeatureData;
2574     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2575 
2576     MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
2577     statusKey = MOS_UserFeature_ReadValue_ID(
2578         nullptr,
2579         __MEDIA_USER_FEATURE_VALUE_ENCODE_DISABLE_SCALABILITY,
2580         &userFeatureData,
2581         m_osInterface->pOsContext);
2582 
2583     bool disableScalability = true; // m_hwInterface->IsDisableScalability() default false
2584     if (statusKey == MOS_STATUS_SUCCESS)
2585     {
2586         disableScalability = userFeatureData.i32Data ? true : false;
2587     }
2588 
2589     MEDIA_SYSTEM_INFO *gtSystemInfo = m_osInterface->pfnGetGtSystemInfo(m_osInterface);
2590     CODECHAL_ENCODE_CHK_NULL_RETURN(gtSystemInfo);
2591 
2592     if (gtSystemInfo && disableScalability == false)
2593     {
2594         // Both VE mode and media solo mode should be able to get the VDBOX number via the same interface
2595         m_numVdbox = (uint8_t)(gtSystemInfo->VDBoxInfo.NumberOfVDBoxEnabled);
2596     }
2597     else
2598     {
2599         m_numVdbox = 1;
2600     }
2601 
2602     return eStatus;
2603 }
2604 
HucPakIntegrate(PMOS_COMMAND_BUFFER cmdBuffer)2605 MOS_STATUS CodechalEncHevcStateG11::HucPakIntegrate(
2606     PMOS_COMMAND_BUFFER cmdBuffer)
2607 {
2608     CODECHAL_ENCODE_FUNCTION_ENTER;
2609 
2610     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2611 
2612     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2613 
2614     CODECHAL_ENCODE_CHK_COND_RETURN(
2615         (m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()),
2616         "ERROR - vdbox index exceed the maximum");
2617 
2618     auto mmioRegisters = m_hwInterface->GetHucInterface()->GetMmioRegisters(m_vdboxIndex);
2619 
2620     // load kernel from WOPCM into L2 storage RAM
2621     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
2622     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
2623     imemParams.dwKernelDescriptor = VDBOX_HUC_PAK_INTEGRATION_KERNEL_DESCRIPTOR;
2624 
2625     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucImemStateCmd(cmdBuffer, &imemParams));
2626 
2627     // pipe mode select
2628     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
2629     pipeModeSelectParams.Mode = m_mode;
2630     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
2631 
2632     // DMEM set
2633     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
2634     if (m_brcEnabled && m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ)
2635     {
2636         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakIntegrate(&dmemParams));
2637     }
2638     else
2639     {
2640         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakIntegrateCqp(&dmemParams));
2641     }
2642     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucDmemStateCmd(cmdBuffer, &dmemParams));
2643 
2644     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
2645     if (m_brcEnabled && m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ)
2646     {
2647         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCPakIntegrate(&virtualAddrParams));
2648     }
2649     else
2650     {
2651         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCPakIntegrateCqp(&virtualAddrParams));
2652     }
2653 
2654     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucVirtualAddrStateCmd(cmdBuffer, &virtualAddrParams));
2655 
2656     // Write HUC_STATUS2 mask - bit 6 - valid IMEM loaded
2657     MHW_MI_STORE_DATA_PARAMS storeDataParams;
2658     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2659     storeDataParams.pOsResource = &m_resHucStatus2Buffer;
2660     storeDataParams.dwResourceOffset = 0;
2661     storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatus2ImemLoadedMask();
2662     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
2663 
2664     // Store HUC_STATUS2 register
2665     MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
2666     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
2667     storeRegParams.presStoreBuffer = &m_resHucStatus2Buffer;
2668     storeRegParams.dwOffset = sizeof(uint32_t);
2669     storeRegParams.dwRegister = mmioRegisters->hucStatus2RegOffset;
2670     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams));
2671 
2672     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucStartCmd(cmdBuffer, true));
2673 
2674     // wait Huc completion (use HEVC bit for now)
2675     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
2676     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
2677     vdPipeFlushParams.Flags.bFlushHEVC = 1;
2678     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
2679     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetVdencInterface()->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams));
2680 
2681     // Flush the engine to ensure memory written out
2682     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2683     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2684     flushDwParams.bVideoPipelineCacheInvalidate = true;
2685     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
2686 
2687     EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
2688 
2689     uint32_t baseOffset =
2690         (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2;  // pEncodeStatus is offset by 2 DWs in the resource
2691 
2692                                                                                              // Write HUC_STATUS mask
2693     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2694     storeDataParams.pOsResource = &encodeStatusBuf.resStatusBuffer;
2695     storeDataParams.dwResourceOffset = baseOffset + encodeStatusBuf.dwHuCStatusMaskOffset;
2696     storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatusReEncodeMask();
2697     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2698         cmdBuffer,
2699         &storeDataParams));
2700 
2701     // store HUC_STATUS register
2702     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
2703     storeRegParams.presStoreBuffer = &encodeStatusBuf.resStatusBuffer;
2704     storeRegParams.dwOffset = baseOffset + encodeStatusBuf.dwHuCStatusRegOffset;
2705     storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
2706     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(
2707         cmdBuffer,
2708         &storeRegParams));
2709 
2710     return eStatus;
2711 }
2712 
Initialize(CodechalSetting * settings)2713 MOS_STATUS CodechalEncHevcStateG11::Initialize(CodechalSetting * settings)
2714 {
2715     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2716 
2717     CODECHAL_ENCODE_FUNCTION_ENTER;
2718 
2719     // Common initialization
2720     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::Initialize(settings));
2721 
2722     m_numDelay                              = 15; //Value suggested by HW team.
2723     m_bmeMethodTable                        = (uint8_t *)m_meMethod;
2724     m_b4XMeDistortionBufferSupported        = true;
2725     m_brcBuffers.dwBrcConstantSurfaceWidth  = HEVC_BRC_CONSTANT_SURFACE_WIDTH_G9;
2726     m_brcBuffers.dwBrcConstantSurfaceHeight = HEVC_BRC_CONSTANT_SURFACE_HEIGHT_G10;
2727     m_brcHistoryBufferSize = HEVC_BRC_HISTORY_BUFFER_SIZE_G11;
2728     m_maxNumSlicesSupported                 = CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6;
2729     m_brcBuffers.dwBrcHcpPicStateSize       = BRC_IMG_STATE_SIZE_PER_PASS_G11 * CODECHAL_ENCODE_BRC_MAXIMUM_NUM_PASSES;
2730 
2731     MOS_USER_FEATURE_VALUE_DATA userFeatureData;
2732     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2733     MOS_UserFeature_ReadValue_ID(
2734         nullptr,
2735         __MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID,
2736         &userFeatureData,
2737         m_osInterface->pOsContext);
2738     m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false;
2739 
2740     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2741     MOS_UserFeature_ReadValue_ID(
2742         nullptr,
2743         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_REGION_NUMBER_ID,
2744         &userFeatureData,
2745         m_osInterface->pOsContext);
2746     // Region number must be greater than 1
2747     m_numberConcurrentGroup = (userFeatureData.i32Data < 1) ? 1 : userFeatureData.i32Data;
2748 
2749     if (m_numberConcurrentGroup > 16)
2750     {
2751         // Region number cannot be larger than 16
2752         m_numberConcurrentGroup = 16;
2753     }
2754 
2755     // Subthread number used in the ENC kernel
2756     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2757     MOS_UserFeature_ReadValue_ID(
2758         nullptr,
2759         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_SUBTHREAD_NUM_ID,
2760         &userFeatureData,
2761         m_osInterface->pOsContext);
2762     m_numberEncKernelSubThread = (userFeatureData.i32Data < 1) ? 1 : userFeatureData.i32Data;
2763 
2764     if (m_numberEncKernelSubThread > m_hevcThreadTaskDataNum)
2765     {
2766         m_numberEncKernelSubThread = m_hevcThreadTaskDataNum; // support up to 2 sub-threads in one LCU64x64
2767     }
2768 
2769     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2770     MOS_UserFeature_ReadValue_ID(
2771         nullptr,
2772         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_26Z_ENABLE_ID,
2773         &userFeatureData,
2774         m_osInterface->pOsContext);
2775     m_enable26WalkingPattern = (userFeatureData.i32Data) ? false : true;
2776 
2777     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2778     MOS_UserFeature_ReadValue_ID(
2779         nullptr,
2780         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_RDOQ_ENABLE_ID,
2781         &userFeatureData,
2782         m_osInterface->pOsContext);
2783     m_hevcRdoqEnabled = userFeatureData.i32Data ? true : false;
2784 
2785     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2786     MOS_UserFeature_ReadValue_ID(
2787         nullptr,
2788         __MEDIA_USER_FEATURE_VALUE_HEVC_VME_ENCODE_SSE_ENABLE_ID,
2789         &userFeatureData,
2790         m_osInterface->pOsContext);
2791     m_sseSupported = userFeatureData.i32Data ? true : false;
2792 
2793     // Overriding the defaults here with 32 aligned dimensions
2794     // 2x Scaling WxH
2795     m_downscaledWidth2x                  =
2796         CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_frameWidth);
2797     m_downscaledHeight2x                 =
2798         CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_frameHeight);
2799 
2800     // HME Scaling WxH
2801     m_downscaledWidth4x                   =
2802         CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_frameWidth);
2803     m_downscaledHeight4x                  =
2804         CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_frameHeight);
2805     m_downscaledWidthInMb4x               =
2806         CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth4x);
2807     m_downscaledHeightInMb4x              =
2808         CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight4x);
2809 
2810     // SuperHME Scaling WxH
2811     m_downscaledWidth16x                  =
2812         CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_downscaledWidth4x);
2813     m_downscaledHeight16x                 =
2814         CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_downscaledHeight4x);
2815     m_downscaledWidthInMb16x              =
2816         CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth16x);
2817     m_downscaledHeightInMb16x             =
2818         CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight16x);
2819 
2820     // UltraHME Scaling WxH
2821     m_downscaledWidth32x                  =
2822         CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_downscaledWidth16x);
2823     m_downscaledHeight32x                 =
2824         CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_downscaledHeight16x);
2825     m_downscaledWidthInMb32x              =
2826         CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth32x);
2827     m_downscaledHeightInMb32x             =
2828         CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight32x);
2829 
2830     // disable MMCD if we enable Codechal dump. Because dump code changes the surface state from compressed to uncompressed,
2831     // this causes mis-match issue between dump is enabled or disabled.
2832     CODECHAL_DEBUG_TOOL(
2833         if (m_mmcState && m_debugInterface && m_debugInterface->m_dbgCfgHead){
2834             //m_mmcState->SetMmcDisabled();
2835         })
2836 
2837     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetSystemPipeNumberCommon());
2838 
2839     if (MOS_VE_SUPPORTED(m_osInterface))
2840     {
2841         m_scalabilityState = (PCODECHAL_ENCODE_SCALABILITY_STATE)MOS_AllocAndZeroMemory(sizeof(CODECHAL_ENCODE_SCALABILITY_STATE));
2842         CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalabilityState);
2843         //scalability initialize
2844         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_InitializeState(m_scalabilityState, m_hwInterface));
2845     }
2846 
2847     MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
2848     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2849     statusKey = MOS_UserFeature_ReadValue_ID(
2850         nullptr,
2851         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_STITCH,
2852         &userFeatureData,
2853         m_osInterface->pOsContext);
2854      m_enableTileStitchByHW = userFeatureData.i32Data ? true : false;
2855 
2856      statusKey = MOS_STATUS_SUCCESS;
2857      MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2858     statusKey = MOS_UserFeature_ReadValue_ID(
2859         nullptr,
2860         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_SEMAPHORE,
2861         &userFeatureData,
2862         m_osInterface->pOsContext);
2863      m_enableHWSemaphore = userFeatureData.i32Data ? true : false;
2864 
2865     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2866     statusKey = MOS_UserFeature_ReadValue_ID(
2867         nullptr,
2868         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_WP_SUPPORT_ID,
2869         &userFeatureData,
2870         m_osInterface->pOsContext);
2871     m_weightedPredictionSupported = userFeatureData.i32Data ? true : false;
2872 
2873 #if (_DEBUG || _RELEASE_INTERNAL)
2874     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2875     statusKey = MOS_UserFeature_ReadValue_ID(
2876         nullptr,
2877         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE,
2878         &userFeatureData,
2879         m_osInterface->pOsContext);
2880     m_kmdVeOveride.Value = (uint64_t)userFeatureData.i64Data;
2881 
2882     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2883     statusKey = MOS_UserFeature_ReadValue_ID(
2884         nullptr,
2885         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_MEDIARESET_TEST_ID,
2886         &userFeatureData,
2887         m_osInterface->pOsContext);
2888     m_enableTestMediaReset = userFeatureData.i32Data ? true : false;
2889 
2890     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2891     MOS_UserFeature_ReadValue_ID(
2892         nullptr,
2893         __MEDIA_USER_FEATURE_VALUE_HEVC_VME_FORCE_SCALABILITY_ID,
2894         &userFeatureData,
2895         m_osInterface->pOsContext);
2896     m_forceScalability = userFeatureData.i32Data ? true : false;
2897 
2898     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2899     MOS_UserFeature_ReadValue_ID(
2900         nullptr,
2901         __MEDIA_USER_FEATURE_VALUE_HEVC_VME_BRC_LTR_INTERVAL_ID,
2902         &userFeatureData,
2903         m_osInterface->pOsContext);
2904     m_ltrInterval = (uint32_t)(userFeatureData.i32Data);
2905 
2906     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2907     MOS_UserFeature_ReadValue_ID(
2908         nullptr,
2909         __MEDIA_USER_FEATURE_VALUE_HEVC_VME_BRC_LTR_DISABLE_ID,
2910         &userFeatureData,
2911         m_osInterface->pOsContext);
2912     m_enableBrcLTR = (userFeatureData.i32Data) ? false : true;
2913 #endif
2914 
2915      if (m_codecFunction != CODECHAL_FUNCTION_PAK)
2916      {
2917          MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2918          MOS_UserFeature_ReadValue_ID(
2919              nullptr,
2920              __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ME_ENABLE_ID,
2921              &userFeatureData,
2922              m_osInterface->pOsContext);
2923          m_hmeSupported = (userFeatureData.i32Data) ? true : false;
2924 
2925          MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2926          MOS_UserFeature_ReadValue_ID(
2927              nullptr,
2928              __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_16xME_ENABLE_ID,
2929              &userFeatureData,
2930              m_osInterface->pOsContext);
2931          m_16xMeSupported = (userFeatureData.i32Data) ? true : false;
2932 
2933          MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2934          MOS_UserFeature_ReadValue_ID(
2935              nullptr,
2936              __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_32xME_ENABLE_ID,
2937              &userFeatureData,
2938              m_osInterface->pOsContext);
2939          // Keeping UHME by Default ON for Gen11
2940          m_32xMeSupported = (userFeatureData.i32Data) ? false : true;
2941 
2942          MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2943          MOS_UserFeature_ReadValue_ID(
2944              nullptr,
2945              __MEDIA_USER_FEATURE_VALUE_HEVC_NUM_THREADS_PER_LCU_ID,
2946              &userFeatureData,
2947              m_osInterface->pOsContext);
2948          m_totalNumThreadsPerLcu = (uint16_t)userFeatureData.i32Data;
2949 
2950          if (m_totalNumThreadsPerLcu < m_minThreadsPerLcuB || m_totalNumThreadsPerLcu > m_maxThreadsPerLcuB)
2951          {
2952              return MOS_STATUS_INVALID_PARAMETER;
2953          }
2954      }
2955 
2956 
2957     if (m_frameWidth < 128 || m_frameHeight < 128)
2958     {
2959         m_16xMeSupported = false;
2960         m_32xMeSupported = false;
2961     }
2962 
2963     else if (m_frameWidth < 512 || m_frameHeight < 512)
2964     {
2965         m_32xMeSupported = false;
2966     }
2967 
2968     char    stringData[MOS_USER_CONTROL_MAX_DATA_SIZE];
2969     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2970     userFeatureData.StringData.pStringData = stringData;
2971     statusKey = MOS_UserFeature_ReadValue_ID(
2972         nullptr,
2973         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_PAK_ONLY_ID,
2974         &userFeatureData,
2975         m_osInterface->pOsContext);
2976 
2977     if (statusKey == MOS_STATUS_SUCCESS && userFeatureData.StringData.uSize > 0)
2978     {
2979         MOS_SecureStrcpy(m_pakOnlyDataFolder,
2980             sizeof(m_pakOnlyDataFolder) / sizeof(m_pakOnlyDataFolder[0]),
2981             stringData);
2982 
2983         uint32_t len = strlen(m_pakOnlyDataFolder);
2984         if (m_pakOnlyDataFolder[len - 1] == '\\')
2985         {
2986             m_pakOnlyDataFolder[len - 1] = 0;
2987         }
2988 
2989         m_pakOnlyTest = true;
2990         // PAK only mode does not need to init any kernel
2991     }
2992 
2993     return eStatus;
2994 }
2995 
LoadCosts(uint8_t sliceType,uint8_t qp)2996 void CodechalEncHevcStateG11::LoadCosts(uint8_t sliceType, uint8_t qp)
2997 {
2998     if (sliceType >= CODECHAL_HEVC_NUM_SLICE_TYPES)
2999     {
3000         CODECHAL_ENCODE_ASSERTMESSAGE("Invalid slice type");
3001         sliceType = CODECHAL_HEVC_I_SLICE;
3002     }
3003 
3004     double  qpScale = 0.60;
3005     int32_t qpMinus12 = qp - 12;
3006     double lambda = sqrt(qpScale * pow(2.0, MOS_MAX(0, qpMinus12) / 3.0));
3007     uint8_t lcuIdx    = ((m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3) == 6) ? 1 : 0;
3008     m_lambdaRD = (uint16_t)(qpScale * pow(2.0, MOS_MAX(0, qpMinus12) / 3.0) * 4 + 0.5);
3009 
3010     m_modeCostCre[LUTCREMODE_INTRA_32X32] = CRECOST(lambda, LUTMODEBITS_INTRA_32X32, lcuIdx, sliceType);
3011     m_modeCostCre[LUTCREMODE_INTRA_16X16] = CRECOST(lambda, LUTMODEBITS_INTRA_16X16, lcuIdx, sliceType);
3012     m_modeCostCre[LUTCREMODE_INTRA_8X8] = CRECOST(lambda, LUTMODEBITS_INTRA_8X8, lcuIdx, sliceType);
3013     m_modeCostCre[LUTCREMODE_INTRA_CHROMA] = CRECOST(lambda, LUTMODEBITS_INTRA_CHROMA, lcuIdx, sliceType);
3014     m_modeCostCre[LUTCREMODE_INTER_32X32] = CRECOST(lambda, LUTMODEBITS_INTER_32X32, lcuIdx, sliceType);
3015     m_modeCostCre[LUTCREMODE_INTER_32X16] = CRECOST(lambda, LUTMODEBITS_INTER_32X16, lcuIdx, sliceType);
3016     m_modeCostCre[LUTCREMODE_INTER_16X16] = CRECOST(lambda, LUTMODEBITS_INTER_16X16, lcuIdx, sliceType);
3017     m_modeCostCre[LUTCREMODE_INTER_16X8] = CRECOST(lambda, LUTMODEBITS_INTER_16X8, lcuIdx, sliceType);
3018     m_modeCostCre[LUTCREMODE_INTER_8X8] = CRECOST(lambda, LUTMODEBITS_INTER_8X8, lcuIdx, sliceType);
3019     m_modeCostCre[LUTCREMODE_INTER_BIDIR] = CRECOST(lambda, LUTMODEBITS_INTER_BIDIR, lcuIdx, sliceType);
3020     m_modeCostCre[LUTCREMODE_INTER_SKIP] = CRECOST(lambda, LUTMODEBITS_INTER_SKIP, lcuIdx, sliceType);
3021     m_modeCostCre[LUTCREMODE_INTRA_NONDC_32X32] = CRECOST(lambda, LUTMODEBITS_INTRA_NONDC_32X32, lcuIdx, sliceType);
3022     m_modeCostCre[LUTCREMODE_INTRA_NONDC_16X16] = CRECOST(lambda, LUTMODEBITS_INTRA_NONDC_16X16, lcuIdx, sliceType);
3023     m_modeCostCre[LUTCREMODE_INTRA_NONDC_8X8] = CRECOST(lambda, LUTMODEBITS_INTRA_NONDC_8X8, lcuIdx, sliceType);
3024     m_modeCostCre[LUTCREMODE_INTRA_NONPRED] = CRECOST(lambda, LUTMODEBITS_INTRA_MPM, lcuIdx, sliceType);
3025 
3026     m_modeCostRde[LUTRDEMODE_INTRA_64X64] = RDEBITS62(LUTMODEBITS_INTRA_64X64, lcuIdx, sliceType);
3027     m_modeCostRde[LUTRDEMODE_INTRA_32X32] = RDEBITS62(LUTMODEBITS_INTRA_32X32, lcuIdx, sliceType);
3028     m_modeCostRde[LUTRDEMODE_INTRA_16X16] = RDEBITS62(LUTMODEBITS_INTRA_16X16, lcuIdx, sliceType);
3029     m_modeCostRde[LUTRDEMODE_INTRA_8X8] = RDEBITS62(LUTMODEBITS_INTRA_8X8, lcuIdx, sliceType);
3030     m_modeCostRde[LUTRDEMODE_INTRA_NXN] = RDEBITS62(LUTMODEBITS_INTRA_NXN, lcuIdx, sliceType);
3031     m_modeCostRde[LUTRDEMODE_INTRA_MPM] = RDEBITS62(LUTMODEBITS_INTRA_MPM, lcuIdx, sliceType);
3032     m_modeCostRde[LUTRDEMODE_INTRA_DC_32X32] = RDEBITS62(LUTMODEBITS_INTRA_DC_32X32, lcuIdx, sliceType);
3033     m_modeCostRde[LUTRDEMODE_INTRA_DC_8X8] = RDEBITS62(LUTMODEBITS_INTRA_DC_8X8, lcuIdx, sliceType);
3034     m_modeCostRde[LUTRDEMODE_INTRA_NONDC_32X32] = RDEBITS62(LUTMODEBITS_INTRA_NONDC_32X32, lcuIdx, sliceType);
3035     m_modeCostRde[LUTRDEMODE_INTRA_NONDC_8X8] = RDEBITS62(LUTMODEBITS_INTRA_NONDC_8X8, lcuIdx, sliceType);
3036     m_modeCostRde[LUTRDEMODE_INTER_BIDIR] = RDEBITS62(LUTMODEBITS_INTER_BIDIR, lcuIdx, sliceType);
3037     m_modeCostRde[LUTRDEMODE_INTER_REFID] = RDEBITS62(LUTMODEBITS_INTER_REFID, lcuIdx, sliceType);
3038     m_modeCostRde[LUTRDEMODE_SKIP_64X64] = RDEBITS62(LUTMODEBITS_SKIP_64X64, lcuIdx, sliceType);
3039     m_modeCostRde[LUTRDEMODE_SKIP_32X32] = RDEBITS62(LUTMODEBITS_SKIP_32X32, lcuIdx, sliceType);
3040     m_modeCostRde[LUTRDEMODE_SKIP_16X16] = RDEBITS62(LUTMODEBITS_SKIP_16X16, lcuIdx, sliceType);
3041     m_modeCostRde[LUTRDEMODE_SKIP_8X8] = RDEBITS62(LUTMODEBITS_SKIP_8X8, lcuIdx, sliceType);
3042     m_modeCostRde[LUTRDEMODE_MERGE_64X64] = RDEBITS62(LUTMODEBITS_MERGE_64X64, lcuIdx, sliceType);
3043     m_modeCostRde[LUTRDEMODE_MERGE_32X32] = RDEBITS62(LUTMODEBITS_MERGE_32X32, lcuIdx, sliceType);
3044     m_modeCostRde[LUTRDEMODE_MERGE_16X16] = RDEBITS62(LUTMODEBITS_MERGE_16X16, lcuIdx, sliceType);
3045     m_modeCostRde[LUTRDEMODE_MERGE_8X8] = RDEBITS62(LUTMODEBITS_MERGE_8X8, lcuIdx, sliceType);
3046     m_modeCostRde[LUTRDEMODE_INTER_32X32] = RDEBITS62(LUTMODEBITS_INTER_32X32, lcuIdx, sliceType);
3047     m_modeCostRde[LUTRDEMODE_INTER_32X16] = RDEBITS62(LUTMODEBITS_INTER_32X16, lcuIdx, sliceType);
3048     m_modeCostRde[LUTRDEMODE_INTER_16X16] = RDEBITS62(LUTMODEBITS_INTER_16X16, lcuIdx, sliceType);
3049     m_modeCostRde[LUTRDEMODE_INTER_16X8] = RDEBITS62(LUTMODEBITS_INTER_16X8, lcuIdx, sliceType);
3050     m_modeCostRde[LUTRDEMODE_INTER_8X8] = RDEBITS62(LUTMODEBITS_INTER_8X8, lcuIdx, sliceType);
3051     m_modeCostRde[LUTRDEMODE_TU_DEPTH_0] = RDEBITS62(LUTMODEBITS_TU_DEPTH_0, lcuIdx, sliceType);
3052     m_modeCostRde[LUTRDEMODE_TU_DEPTH_1] = RDEBITS62(LUTMODEBITS_TU_DEPTH_1, lcuIdx, sliceType);
3053 
3054     for (uint8_t i = 0; i < 8; i++)
3055     {
3056         m_modeCostRde[LUTRDEMODE_CBF + i] = RDEBITS62(LUTMODEBITS_CBF + i, lcuIdx, sliceType);
3057     }
3058 }
3059 
3060 // ------------------------------------------------------------------------------
3061 //| Purpose:    Setup curbe for HEVC MbEnc B Kernels
3062 //| Return:     N/A
3063 //------------------------------------------------------------------------------
SetCurbeMbEncKernel()3064 MOS_STATUS CodechalEncHevcStateG11::SetCurbeMbEncKernel()
3065 {
3066     uint32_t            curIdx = m_currRecycledBufIdx;
3067     MOS_LOCK_PARAMS lockFlags;
3068     MOS_STATUS      eStatus = MOS_STATUS_SUCCESS;
3069 
3070     uint8_t tuMapping = ((m_hevcSeqParams->TargetUsage) / 3) % 3;  // Map TU 1,4,6 to 0,1,2
3071 
3072     // Initialize the CURBE data
3073     MBENC_CURBE curbe;
3074 
3075     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
3076     {
3077         if(m_encodeParams.bMbQpDataEnabled)
3078         {
3079             curbe.QPType            = QP_TYPE_CU_LEVEL; // !< Even though CQP mode, as mbqpbuffer surface is updated with Application Qp map
3080             // !< QP type should be set to QP_TYPE_CU_LEVEL for mbenc kernel to consider this surface.
3081         }
3082         else
3083             curbe.QPType            = QP_TYPE_CONSTANT;
3084         curbe.ROIEnable  = m_hevcPicParams->NumROI ? true : false;
3085     }
3086     else
3087     {
3088         curbe.QPType = m_lcuBrcEnabled ? QP_TYPE_CU_LEVEL : QP_TYPE_FRAME;
3089     }
3090 
3091     // TU based settings
3092     curbe.EnableCu64Check        = m_tuSettings[EnableCu64CheckTuParam][tuMapping];
3093     curbe.MaxNumIMESearchCenter  = m_tuSettings[MaxNumIMESearchCenterTuParam][tuMapping];
3094     curbe.MaxTransformDepthInter = m_tuSettings[Log2TUMaxDepthInterTuParam][tuMapping];
3095     curbe.MaxTransformDepthIntra = m_tuSettings[Log2TUMaxDepthIntraTuParam][tuMapping];
3096     curbe.Dynamic64Order         = m_tuSettings[Dynamic64OrderTuParam][tuMapping];
3097     curbe.DynamicOrderTh         = m_tuSettings[DynamicOrderThTuParam][tuMapping];
3098     curbe.Dynamic64Enable        = m_tuSettings[Dynamic64EnableTuParam][tuMapping];
3099     curbe.Dynamic64Th            = m_tuSettings[Dynamic64ThTuParam][tuMapping];
3100     curbe.IncreaseExitThresh     = m_tuSettings[IncreaseExitThreshTuParam][tuMapping];
3101     curbe.IntraSpotCheck         = m_tuSettings[IntraSpotCheckFlagTuParam][tuMapping];
3102     curbe.Fake32Enable           = m_tuSettings[Fake32EnableTuParam][tuMapping];
3103     curbe.Dynamic64Min32         = m_tuSettings[Dynamic64Min32][tuMapping];
3104 
3105     curbe.FrameWidthInSamples   = m_frameWidth;
3106     curbe.FrameHeightInSamples  = m_frameHeight;
3107 
3108     curbe.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
3109     curbe.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
3110     curbe.Log2MaxTUSize = m_hevcSeqParams->log2_max_transform_block_size_minus2 + 2;
3111     curbe.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
3112 
3113     curbe.ChromaFormatType = m_hevcSeqParams->chroma_format_idc;
3114 
3115     curbe.TUDepthControl = curbe.MaxTransformDepthInter;
3116 
3117     int32_t sliceQp               = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
3118     curbe.FrameQP                 = abs(sliceQp);
3119     curbe.FrameQPSign             = (sliceQp > 0) ? 0 : 1;
3120 
3121 #if 0 // no need in the optimized kernel because kernel does the table look-up
3122     LoadCosts(CODECHAL_HEVC_B_SLICE, (uint8_t)sliceQp);
3123     curbe.DW4_ModeIntra32x32Cost      = m_modeCostCre[LUTCREMODE_INTRA_32X32];
3124     curbe.DW4_ModeIntraNonDC32x32Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_32X32];
3125 
3126     curbe.DW5_ModeIntra16x16Cost      = m_modeCostCre[LUTCREMODE_INTRA_16X16];
3127     curbe.DW5_ModeIntraNonDC16x16Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_16X16];
3128     curbe.DW5_ModeIntra8x8Cost        = m_modeCostCre[LUTCREMODE_INTRA_8X8];
3129     curbe.DW5_ModeIntraNonDC8x8Cost   = m_modeCostCre[LUTCREMODE_INTRA_NONDC_8X8];
3130 
3131     curbe.DW6_ModeIntraNonPred = m_modeCostCre[LUTCREMODE_INTRA_NONPRED];
3132 
3133     curbe.DW7_ChromaIntraModeCost = m_modeCostCre[LUTCREMODE_INTRA_CHROMA];
3134 
3135     curbe.DW12_IntraModeCostMPM = m_modeCostRde[LUTRDEMODE_INTRA_MPM];
3136 
3137     curbe.DW13_IntraTUDept0Cost = m_modeCostRde[LUTRDEMODE_TU_DEPTH_0];
3138     curbe.DW13_IntraTUDept1Cost = m_modeCostRde[LUTRDEMODE_TU_DEPTH_1];
3139 
3140     curbe.DW14_IntraTU4x4CBFCost   = m_modeCostRde[LUTRDEMODE_INTRA_CBF_4X4];
3141     curbe.DW14_IntraTU8x8CBFCost   = m_modeCostRde[LUTRDEMODE_INTRA_CBF_8X8];
3142     curbe.DW14_IntraTU16x16CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_16X16];
3143     curbe.DW14_IntraTU32x32CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_32X32];
3144     curbe.DW15_LambdaRD = (uint16_t)m_lambdaRD;
3145     curbe.DW17_IntraNonDC8x8Penalty   = m_modeCostRde[LUTRDEMODE_INTRA_NONDC_8X8];
3146     curbe.DW17_IntraNonDC32x32Penalty = m_modeCostRde[LUTRDEMODE_INTRA_NONDC_32X32];
3147 #endif
3148 
3149     curbe.NumofColumnTile = m_hevcPicParams->num_tile_columns_minus1 + 1;
3150     curbe.NumofRowTile    = m_hevcPicParams->num_tile_rows_minus1 + 1;
3151     curbe.HMEFlag      = m_hmeSupported ? 3 : 0;
3152 
3153     curbe.MaxRefIdxL0  = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10 - 1;
3154     curbe.MaxRefIdxL1  = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L1_REF_G10 - 1;
3155     curbe.MaxBRefIdxL0 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10 - 1;
3156 
3157     // Check whether Last Frame is I frame or not
3158     if (m_frameNum == 0 || m_picHeightInMb == I_TYPE || (m_frameNum && m_lastPictureCodingType==I_TYPE))
3159     {
3160         // This is the flag to notify kernel not to use the history buffer
3161         curbe.LastFrameIsIntra    = true;
3162     }
3163     else
3164     {
3165         curbe.LastFrameIsIntra    = false;
3166     }
3167 
3168     curbe.SliceType             = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
3169     curbe.TemporalMvpEnableFlag = m_hevcSliceParams->slice_temporal_mvp_enable_flag;
3170     curbe.CollocatedFromL0Flag  = m_hevcSliceParams->collocated_from_l0_flag;
3171     curbe.theSameRefList        = m_sameRefList;
3172     curbe.IsLowDelay            = m_lowDelay;
3173     curbe.NumRefIdxL0           = m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1;
3174     curbe.NumRefIdxL1           = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? 0 : (m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1);
3175     if (m_hevcSeqParams->TargetUsage == 1)
3176     {
3177         // MaxNumMergeCand C Model uses 4 for TU1,
3178         // for quality consideration, make sure not larger than the value from App as it will be used in PAK
3179         curbe.MaxNumMergeCand   = MOS_MIN(m_hevcSliceParams->MaxNumMergeCand, 4);
3180     }
3181     else
3182     {
3183         // MaxNumMergeCand C Model uses 2 for TU4 and TU7,
3184         // for quality consideration, make sure not larger than the value from App as it will be used in PAK
3185        curbe.MaxNumMergeCand   = MOS_MIN(m_hevcSliceParams->MaxNumMergeCand, 2);
3186     }
3187 
3188     int32_t tbRefListL0[CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10] = { 0 }, tbRefListL1[CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L1_REF_G10] = {0};
3189     curbe.FwdPocNumber_L0_mTb_0 = tbRefListL0[0] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][0]);
3190     curbe.BwdPocNumber_L1_mTb_0 = tbRefListL1[0] = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][0]) : ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][0]);
3191     curbe.FwdPocNumber_L0_mTb_1 = tbRefListL0[1] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][1]);
3192     curbe.BwdPocNumber_L1_mTb_1 = tbRefListL1[1] = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][1]) : ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][1]);
3193 
3194     curbe.FwdPocNumber_L0_mTb_2 = tbRefListL0[2] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][2]);
3195     curbe.BwdPocNumber_L1_mTb_2 = tbRefListL1[2] = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][2]) : ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][2]);
3196     curbe.FwdPocNumber_L0_mTb_3 = tbRefListL0[3] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][3]);
3197     curbe.BwdPocNumber_L1_mTb_3 = tbRefListL1[3] = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][3]) : ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][3]);
3198 
3199     curbe.RefFrameWinHeight     = m_frameHeight;
3200     curbe.RefFrameWinWidth      = m_frameWidth;
3201 
3202     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::GetRoundingIntraInterToUse());
3203 
3204     curbe.RoundingInter      = (m_roundingInterInUse + 1) << 4;  // Should be an input from par in the cmodel (slice state)
3205     curbe.RoundingIntra      = (m_roundingIntraInUse + 1) << 4;  // Should be an input from par in the cmodel (slice state)
3206     curbe.RDEQuantRoundValue = (m_roundingInterInUse + 1) << 4;
3207 
3208     uint32_t gopB = m_hevcSeqParams->GopRefDist;
3209 
3210     curbe.CostScalingForRA = 1;  // default setting
3211 
3212     // get the min distance between current pic and ref pics
3213     uint32_t minPocDist     = 255;
3214     uint32_t costTableIndex = 0;
3215 
3216     if (curbe.SliceType == CODECHAL_ENCODE_HEVC_B_SLICE)
3217     {
3218         if (curbe.CostScalingForRA == 1)
3219         {
3220             for (uint8_t ref = 0; ref < curbe.NumRefIdxL0; ref++)
3221             {
3222                 if ((uint32_t)abs(tbRefListL0[ref]) < minPocDist)
3223                     minPocDist = abs(tbRefListL0[ref]);
3224             }
3225             for (uint8_t ref = 0; ref < curbe.NumRefIdxL1; ref++)
3226             {
3227                 if ((uint32_t)abs(tbRefListL1[ref]) < minPocDist)
3228                     minPocDist = abs(tbRefListL1[ref]);
3229             }
3230 
3231             if (gopB == 4)
3232             {
3233                 costTableIndex = minPocDist;
3234                 if (minPocDist == 4)
3235                     costTableIndex -= 1;
3236             }
3237             if (gopB == 8)
3238             {
3239                 costTableIndex = minPocDist + 3;
3240                 if (minPocDist == 4)
3241                     costTableIndex -= 1;
3242                 if (minPocDist == 8)
3243                     costTableIndex -= 4;
3244             }
3245         }
3246     }
3247     else if (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE)
3248     {
3249         costTableIndex = 8;
3250     }
3251     else
3252     {
3253         costTableIndex = 9;
3254     }
3255 
3256     curbe.CostTableIndex = costTableIndex;
3257 
3258     // the following fields are needed by the new optimized kernel in v052417
3259     curbe.Log2ParallelMergeLevel    = m_hevcPicParams->log2_parallel_merge_level_minus2 + 2;
3260     curbe.MaxIntraRdeIter           = 1;
3261     curbe.CornerNeighborPixel       = 0;
3262     curbe.IntraNeighborAvailFlags   = 0;
3263     curbe.SubPelMode                = 3; // qual-pel search
3264     curbe.InterSADMeasure           = 2; // Haar transform
3265     curbe.IntraSADMeasure           = 2; // Haar transform
3266     curbe.IntraPrediction           = 0; // enable 32x32, 16x16, and 8x8 luma intra prediction
3267     curbe.RefIDCostMode             = 1; // 0: AVC and 1: linear method
3268     curbe.TUBasedCostSetting        = 0;
3269     curbe.ConcurrentGroupNum        = m_numberConcurrentGroup;
3270     curbe.NumofUnitInWaveFront      = m_numWavefrontInOneRegion;
3271     curbe.LoadBalenceEnable         = 0; // when this flag is false, kernel does not use LoadBalance (or MBENC_B_FRAME_CONCURRENT_TG_DATA) buffe
3272     curbe.ThreadNumber              = MOS_MIN(2, m_numberEncKernelSubThread);
3273     curbe.Pic_init_qp_B             = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
3274     curbe.Pic_init_qp_P             = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
3275     curbe.Pic_init_qp_I             = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
3276     curbe.WaveFrontSplitsEnable     = (m_numberConcurrentGroup == 1) ? false : true;
3277     curbe.SuperHME                  = m_16xMeSupported;
3278     curbe.UltraHME                  = m_32xMeSupported;
3279     curbe.PerBFrameQPOffset         = 0;
3280 
3281     switch (m_hevcSeqParams->TargetUsage)
3282     {
3283     case 1:
3284         curbe.Degree45              = 0;
3285         curbe.Break12Dependency     = 0;
3286         curbe.DisableTemporal16and8 = 0;
3287         break;
3288     case 4:
3289         curbe.Degree45              = 1;
3290         curbe.Break12Dependency     = 1;
3291         curbe.DisableTemporal16and8 = 0;
3292         break;
3293     default:
3294         curbe.Degree45              = 1;
3295         curbe.Break12Dependency     = 1;
3296         curbe.DisableTemporal16and8 = 1;
3297         break;
3298     }
3299 
3300     curbe.LongTermReferenceFlags_L0   = 0;
3301     for (uint32_t i = 0; i < curbe.NumRefIdxL0; i++)
3302     {
3303         curbe.LongTermReferenceFlags_L0 |= (m_hevcSliceParams->RefPicList[0][i].PicFlags & PICTURE_LONG_TERM_REFERENCE) << i;
3304     }
3305     curbe.LongTermReferenceFlags_L1 = 0;
3306     for (uint32_t i = 0; i < curbe.NumRefIdxL1; i++)
3307     {
3308         curbe.LongTermReferenceFlags_L1 |= (m_hevcSliceParams->RefPicList[1][i].PicFlags & PICTURE_LONG_TERM_REFERENCE) << i;
3309     }
3310 
3311     curbe.Stepping           = 0;
3312     curbe.Cu64SkipCheckOnly  = 0;
3313     curbe.Cu642Nx2NCheckOnly = 0;
3314     curbe.EnableCu64AmpCheck = 1;
3315     curbe.IntraSpeedMode     = 0; // 35 mode
3316     curbe.DisableIntraNxN    = 0;
3317 
3318 #if 0 //needed only when using A stepping on simu/emu
3319     curbe.Stepping = 1;
3320     curbe.TUDepthControl = 1;
3321     curbe.MaxTransformDepthInter = 1;
3322     curbe.MaxTransformDepthIntra = 0;
3323     curbe.Cu64SkipCheckOnly = 0;
3324     curbe.Cu642Nx2NCheckOnly = 1;
3325     curbe.EnableCu64AmpCheck = 0;
3326     curbe.DisableIntraNxN = 1;
3327     curbe.MaxNumMergeCand = 1;
3328 #endif
3329 
3330     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3331     lockFlags.WriteOnly = 1;
3332     auto buf = (PMBENC_COMBINED_BUFFER1)m_osInterface->pfnLockResource(
3333         m_osInterface,
3334         &m_encBCombinedBuffer1[curIdx].sResource,
3335         &lockFlags);
3336     CODECHAL_ENCODE_CHK_NULL_RETURN(buf);
3337 
3338     if(curbe.Degree45)
3339     {
3340         MOS_ZeroMemory(&buf->concurrent, sizeof(buf->concurrent));
3341     }
3342     buf->Curbe = curbe;
3343 
3344     m_osInterface->pfnUnlockResource(
3345         m_osInterface,
3346         &m_encBCombinedBuffer1[curIdx].sResource);
3347 
3348     if(m_initEncConstTable)
3349     {
3350         // Initialize the Enc Constant Table surface
3351         MOS_LOCK_PARAMS lockFlags;
3352         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3353         lockFlags.WriteOnly = 1;
3354 
3355         auto data = (uint8_t*)m_osInterface->pfnLockResource(
3356             m_osInterface,
3357             &m_encConstantTableForB.sResource,
3358             &lockFlags);
3359         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
3360 
3361         if (m_isMaxLcu64)
3362         {
3363             MOS_SecureMemcpy(data, m_encConstantTableForB.dwSize,
3364                 (const void*)m_encLcu64ConstantDataLut, sizeof(m_encLcu64ConstantDataLut));
3365         }
3366         else
3367         {
3368             MOS_SecureMemcpy(data, m_encConstantTableForB.dwSize,
3369                 (const void*)m_encLcu32ConstantDataLut, sizeof(m_encLcu32ConstantDataLut));
3370         }
3371 
3372         m_osInterface->pfnUnlockResource(
3373             m_osInterface,
3374             &m_encConstantTableForB.sResource);
3375         m_initEncConstTable = false;
3376     }
3377 
3378     // binding table index
3379     MBENC_COMBINED_BTI params;
3380     if (m_isMaxLcu64)
3381     {
3382         for (uint32_t i = 0; i < MAX_MULTI_FRAME_NUMBER; i++)
3383         {
3384             params.BTI_LCU64.Combined1DSurIndexMF1[i] = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER1;
3385             params.BTI_LCU64.Combined1DSurIndexMF2[i] = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER2;
3386             params.BTI_LCU64.VMEInterPredictionSurfIndexMF[i] = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0;
3387             params.BTI_LCU64.SrcSurfIndexMF[i] = MBENC_B_FRAME_CURR_Y;
3388             params.BTI_LCU64.SrcReconSurfIndexMF[i] = MBENC_B_FRAME_CURR_Y_WITH_RECON_BOUNDARY_PIX;
3389             params.BTI_LCU64.CURecordSurfIndexMF[i] = MBENC_B_FRAME_ENC_CU_RECORD;
3390             params.BTI_LCU64.PAKObjectSurfIndexMF[i] = MBENC_B_FRAME_PAK_OBJ;
3391             params.BTI_LCU64.CUPacketSurfIndexMF[i] = MBENC_B_FRAME_PAK_CU_RECORD;
3392             params.BTI_LCU64.SWScoreBoardSurfIndexMF[i] = MBENC_B_FRAME_SW_SCOREBOARD;
3393             params.BTI_LCU64.QPCU16SurfIndexMF[i] = MBENC_B_FRAME_CU_QP_DATA;
3394             params.BTI_LCU64.LCULevelDataSurfIndexMF[i] = MBENC_B_FRAME_LCU_LEVEL_DATA_INPUT;
3395             params.BTI_LCU64.TemporalMVSurfIndexMF[i] = MBENC_B_FRAME_COLOCATED_CU_MV_DATA;
3396             params.BTI_LCU64.HmeDataSurfIndexMF[i] = MBENC_B_FRAME_HME_MOTION_PREDICTOR_DATA;
3397             params.BTI_LCU64.VME2XInterPredictionSurfIndexMF[i] = MBENC_B_FRAME_VME_PRED_FOR_2X_DS_CURR;
3398         }
3399         params.BTI_LCU64.DebugSurfIndexMF[0] = MBENC_B_FRAME_DEBUG_SURFACE;
3400         params.BTI_LCU64.DebugSurfIndexMF[1] = MBENC_B_FRAME_DEBUG_SURFACE1;
3401         params.BTI_LCU64.DebugSurfIndexMF[2] = MBENC_B_FRAME_DEBUG_SURFACE2;
3402         params.BTI_LCU64.DebugSurfIndexMF[3] = MBENC_B_FRAME_DEBUG_SURFACE3;
3403         params.BTI_LCU64.HEVCCnstLutSurfIndex = MBENC_B_FRAME_ENC_CONST_TABLE;
3404         params.BTI_LCU64.LoadBalenceSurfIndex = MBENC_B_FRAME_CONCURRENT_TG_DATA;
3405     }
3406     else
3407     {
3408         for (uint32_t i = 0; i < MAX_MULTI_FRAME_NUMBER; i++)
3409         {
3410             params.BTI_LCU32.Combined1DSurIndexMF1[i] = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER1;
3411             params.BTI_LCU32.Combined1DSurIndexMF2[i] = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER2;
3412             params.BTI_LCU32.VMEInterPredictionSurfIndexMF[i] = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0;
3413             params.BTI_LCU32.SrcSurfIndexMF[i] = MBENC_B_FRAME_CURR_Y;
3414             params.BTI_LCU32.SrcReconSurfIndexMF[i] = MBENC_B_FRAME_CURR_Y_WITH_RECON_BOUNDARY_PIX;
3415             params.BTI_LCU32.CURecordSurfIndexMF[i] = MBENC_B_FRAME_ENC_CU_RECORD;
3416             params.BTI_LCU32.PAKObjectSurfIndexMF[i] = MBENC_B_FRAME_PAK_OBJ;
3417             params.BTI_LCU32.CUPacketSurfIndexMF[i] = MBENC_B_FRAME_PAK_CU_RECORD;
3418             params.BTI_LCU32.SWScoreBoardSurfIndexMF[i] = MBENC_B_FRAME_SW_SCOREBOARD;
3419             params.BTI_LCU32.QPCU16SurfIndexMF[i] = MBENC_B_FRAME_CU_QP_DATA;
3420             params.BTI_LCU32.LCULevelDataSurfIndexMF[i] = MBENC_B_FRAME_LCU_LEVEL_DATA_INPUT;
3421             params.BTI_LCU32.TemporalMVSurfIndexMF[i] = MBENC_B_FRAME_COLOCATED_CU_MV_DATA;
3422             params.BTI_LCU32.HmeDataSurfIndexMF[i] = MBENC_B_FRAME_HME_MOTION_PREDICTOR_DATA;
3423         }
3424         params.BTI_LCU32.DebugSurfIndexMF[0] = MBENC_B_FRAME_DEBUG_SURFACE;
3425         params.BTI_LCU32.DebugSurfIndexMF[1] = MBENC_B_FRAME_DEBUG_SURFACE1;
3426         params.BTI_LCU32.DebugSurfIndexMF[2] = MBENC_B_FRAME_DEBUG_SURFACE2;
3427         params.BTI_LCU32.DebugSurfIndexMF[3] = MBENC_B_FRAME_DEBUG_SURFACE3;
3428         params.BTI_LCU32.HEVCCnstLutSurfIndex = MBENC_B_FRAME_ENC_CONST_TABLE;
3429         params.BTI_LCU32.LoadBalenceSurfIndex = MBENC_B_FRAME_CONCURRENT_TG_DATA;
3430     }
3431 
3432     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
3433     PMHW_KERNEL_STATE kernelState = m_isMaxLcu64 ? &m_mbEncKernelStates[MBENC_LCU64_KRNIDX] : &m_mbEncKernelStates[MBENC_LCU32_KRNIDX];
3434     CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
3435         &params,
3436         kernelState->dwCurbeOffset,
3437         sizeof(params)));
3438 
3439     return eStatus;
3440 }
3441 
3442 // ------------------------------------------------------------------------------
3443 //| Purpose:    Setup curbe for HEVC BrcInitReset Kernel
3444 //| Return:     N/A
3445 //------------------------------------------------------------------------------
SetCurbeBrcInitReset(CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx)3446 MOS_STATUS CodechalEncHevcStateG11::SetCurbeBrcInitReset(
3447     CODECHAL_HEVC_BRC_KRNIDX  brcKrnIdx)
3448 {
3449     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
3450 
3451     CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
3452 
3453     if (brcKrnIdx != CODECHAL_HEVC_BRC_INIT && brcKrnIdx != CODECHAL_HEVC_BRC_RESET)
3454     {
3455         CODECHAL_ENCODE_ASSERTMESSAGE("Brc kernel requested is not init or reset\n");
3456         return MOS_STATUS_INVALID_PARAMETER;
3457     }
3458 
3459     // Initialize the CURBE data
3460     BRC_INITRESET_CURBE curbe = m_brcInitResetCurbeInit;
3461 
3462     uint32_t   profileLevelMaxFrame = GetProfileLevelMaxFrameSize();
3463 
3464     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR ||
3465         m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR ||
3466         m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
3467     {
3468         if (m_hevcSeqParams->InitVBVBufferFullnessInBit == 0)
3469         {
3470             CODECHAL_ENCODE_ASSERTMESSAGE("Initial VBV Buffer Fullness is zero\n");
3471             return MOS_STATUS_INVALID_PARAMETER;
3472         }
3473 
3474         if (m_hevcSeqParams->VBVBufferSizeInBit == 0)
3475         {
3476             CODECHAL_ENCODE_ASSERTMESSAGE("VBV buffer size in bits is zero\n");
3477             return MOS_STATUS_INVALID_PARAMETER;
3478         }
3479     }
3480 
3481     curbe.DW0_ProfileLevelMaxFrame = profileLevelMaxFrame;
3482     curbe.DW1_InitBufFull          = m_hevcSeqParams->InitVBVBufferFullnessInBit;
3483     curbe.DW2_BufSize              = m_hevcSeqParams->VBVBufferSizeInBit;
3484     curbe.DW3_TargetBitRate        = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;  //DDI in Kbits
3485     curbe.DW4_MaximumBitRate       = m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS;
3486     curbe.DW5_MinimumBitRate = 0;
3487     curbe.DW6_FrameRateM           = m_hevcSeqParams->FrameRate.Numerator;
3488     curbe.DW7_FrameRateD           = m_hevcSeqParams->FrameRate.Denominator;
3489     curbe.DW8_BRCFlag = BRCINIT_IGNORE_PICTURE_HEADER_SIZE;  // always ignore the picture header size set in BRC Update curbe
3490     if (m_hevcPicParams->NumROI)
3491     {
3492         curbe.DW8_BRCFlag |=  BRCINIT_DISABLE_MBBRC; // BRC ROI need disable MBBRC logic in LcuBrc Kernel
3493     }
3494     else
3495     {
3496         curbe.DW8_BRCFlag |= (m_lcuBrcEnabled) ? 0 : BRCINIT_DISABLE_MBBRC;
3497     }
3498     curbe.DW8_BRCFlag |= (m_brcEnabled && m_numPipe > 1) ? BRCINIT_USEHUCBRC : 0;
3499 
3500     // For non-ICQ, ACQP Buffer always set to 1
3501     curbe.DW25_ACQPBuffer = 1;
3502 
3503     curbe.DW25_SlidingWindowSize = m_slidingWindowSize;
3504 
3505     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR)
3506     {
3507         curbe.DW4_MaximumBitRate = curbe.DW3_TargetBitRate;
3508         curbe.DW8_BRCFlag |= BRCINIT_ISCBR;
3509     }
3510     else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR)
3511     {
3512         if (curbe.DW4_MaximumBitRate < curbe.DW3_TargetBitRate)
3513         {
3514             curbe.DW4_MaximumBitRate = 2 * curbe.DW3_TargetBitRate;
3515         }
3516         curbe.DW8_BRCFlag |= BRCINIT_ISVBR;
3517     }
3518     else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
3519     {
3520         curbe.DW8_BRCFlag |= BRCINIT_ISAVBR;
3521         // For AVBR, max bitrate = target bitrate,
3522         curbe.DW3_TargetBitRate  = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;  //DDI in Kbits
3523         curbe.DW4_MaximumBitRate = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
3524     }
3525     else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_ICQ)
3526     {
3527         curbe.DW8_BRCFlag |= BRCINIT_ISICQ;
3528         curbe.DW25_ACQPBuffer = m_hevcSeqParams->ICQQualityFactor;
3529     }
3530     else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_VCM)
3531     {
3532         curbe.DW4_MaximumBitRate = curbe.DW3_TargetBitRate;
3533         curbe.DW8_BRCFlag |= BRCINIT_ISVCM;
3534     }
3535     else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
3536     {
3537         curbe.DW8_BRCFlag = BRCINIT_ISCQP;
3538     }
3539     else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_QVBR)
3540     {
3541         if (curbe.DW4_MaximumBitRate < curbe.DW3_TargetBitRate)
3542         {
3543             curbe.DW4_MaximumBitRate = curbe.DW3_TargetBitRate; // Use max bit rate for HRD compliance
3544         }
3545         curbe.DW8_BRCFlag = curbe.DW8_BRCFlag | BRCINIT_ISQVBR | BRCINIT_ISVBR; // We need to make sure that VBR is used for QP determination.
3546         // use ICQQualityFactor to determine the larger Qp for each MB
3547         curbe.DW25_ACQPBuffer = m_hevcSeqParams->ICQQualityFactor;
3548     }
3549 
3550     curbe.DW9_FrameWidth = m_oriFrameWidth;
3551     curbe.DW10_FrameHeight = m_oriFrameHeight;
3552     curbe.DW10_AVBRAccuracy    = m_usAvbrAccuracy;
3553     curbe.DW11_AVBRConvergence = m_usAvbrConvergence;
3554     curbe.DW12_NumberSlice = m_numSlices;
3555 
3556     /**********************************************************************
3557     In case of non-HB/BPyramid Structure
3558     BRC_Param_A = GopP
3559     BRC_Param_B = GopB
3560     In case of HB/BPyramid GOP Structure
3561     BRC_Param_A, BRC_Param_B, BRC_Param_C, BRC_Param_D are
3562     BRC Parameters set as follows as per CModel equation
3563     ***********************************************************************/
3564     // BPyramid GOP
3565     m_hevcSeqParams->GopRefDist = m_hevcSeqParams->GopRefDist == 0 ? 1 : m_hevcSeqParams->GopRefDist;
3566     if (m_hevcSeqParams->NumOfBInGop[1] != 0 || m_hevcSeqParams->NumOfBInGop[2] != 0)
3567     {
3568         curbe.DW8_BRCGopP      = ((m_hevcSeqParams->GopPicSize) / m_hevcSeqParams->GopRefDist);
3569         curbe.DW9_BRCGopB      = curbe.DW8_BRCGopP;
3570         curbe.DW13_BRCGopB1    = curbe.DW8_BRCGopP * 2;
3571         curbe.DW14_BRCGopB2    = ((m_hevcSeqParams->GopPicSize) - (curbe.DW8_BRCGopP) - (curbe.DW13_BRCGopB1) - (curbe.DW9_BRCGopB));
3572         // B1 Level GOP
3573         if (m_hevcSeqParams->NumOfBInGop[2] == 0)
3574         {
3575             curbe.DW14_MaxBRCLevel = 3;
3576         }
3577         // B2 Level GOP
3578         else
3579         {
3580             curbe.DW14_MaxBRCLevel = 4;
3581         }
3582     }
3583     // For Regular GOP - No BPyramid
3584     else
3585     {
3586         curbe.DW14_MaxBRCLevel = 1;
3587         curbe.DW8_BRCGopP      = (m_hevcSeqParams->GopRefDist) ? ((m_hevcSeqParams->GopPicSize - 1) / m_hevcSeqParams->GopRefDist) : 0;
3588         curbe.DW9_BRCGopB      = m_hevcSeqParams->GopPicSize - 1 - curbe.DW8_BRCGopP;
3589     }
3590 
3591     // Set dynamic thresholds
3592     double inputBitsPerFrame = (double)((double)curbe.DW4_MaximumBitRate * (double)curbe.DW7_FrameRateD);
3593     inputBitsPerFrame = (double)(inputBitsPerFrame / curbe.DW6_FrameRateM);
3594 
3595     if (curbe.DW2_BufSize < (uint32_t)inputBitsPerFrame * 4)
3596     {
3597         curbe.DW2_BufSize = (uint32_t)inputBitsPerFrame * 4;
3598     }
3599 
3600     if (curbe.DW1_InitBufFull == 0)
3601     {
3602         curbe.DW1_InitBufFull = 7 * curbe.DW2_BufSize / 8;
3603     }
3604     if (curbe.DW1_InitBufFull < (uint32_t)(inputBitsPerFrame * 2))
3605     {
3606         curbe.DW1_InitBufFull = (uint32_t)(inputBitsPerFrame * 2);
3607     }
3608     if (curbe.DW1_InitBufFull > curbe.DW2_BufSize)
3609     {
3610         curbe.DW1_InitBufFull = curbe.DW2_BufSize;
3611     }
3612 
3613     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
3614     {
3615         // For AVBR, Buffer size =  2*Bitrate, InitVBV = 0.75 * BufferSize
3616         curbe.DW2_BufSize     = 2 * m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
3617         curbe.DW1_InitBufFull = (uint32_t)(0.75 * curbe.DW2_BufSize);
3618     }
3619 
3620 
3621     if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)
3622     {
3623         curbe.DW15_LongTermInterval = 0; // no LTR for low delay brc
3624     }
3625     else
3626     {
3627         curbe.DW15_LongTermInterval = (m_enableBrcLTR && m_ltrInterval) ? m_ltrInterval : m_enableBrcLTR ? HEVC_BRC_LONG_TERM_REFRENCE_FLAG : 0;
3628     }
3629 
3630     double bpsRatio = ( (double) inputBitsPerFrame / ( (double)(curbe.DW2_BufSize) / 30));
3631     bpsRatio = (bpsRatio < 0.1) ? 0.1 : (bpsRatio > 3.5) ? 3.5 : bpsRatio;
3632 
3633     curbe.DW19_DeviationThreshold0_PBframe = (uint32_t)(-50 * pow(0.90, bpsRatio));
3634     curbe.DW19_DeviationThreshold1_PBframe = (uint32_t)(-50 * pow(0.66, bpsRatio));
3635     curbe.DW19_DeviationThreshold2_PBframe = (uint32_t)(-50 * pow(0.46, bpsRatio));
3636     curbe.DW19_DeviationThreshold3_PBframe = (uint32_t)(-50 * pow(0.3, bpsRatio));
3637 
3638     curbe.DW20_DeviationThreshold4_PBframe = (uint32_t)(50 * pow(0.3, bpsRatio));
3639     curbe.DW20_DeviationThreshold5_PBframe = (uint32_t)(50 * pow(0.46, bpsRatio));
3640     curbe.DW20_DeviationThreshold6_PBframe = (uint32_t)(50 * pow(0.7, bpsRatio));
3641     curbe.DW20_DeviationThreshold7_PBframe = (uint32_t)(50 * pow(0.9, bpsRatio));
3642 
3643     curbe.DW21_DeviationThreshold0_VBRcontrol = (uint32_t)(-50 * pow(0.9, bpsRatio));
3644     curbe.DW21_DeviationThreshold1_VBRcontrol = (uint32_t)(-50 * pow(0.7, bpsRatio));
3645     curbe.DW21_DeviationThreshold2_VBRcontrol = (uint32_t)(-50 * pow(0.5, bpsRatio));
3646     curbe.DW21_DeviationThreshold3_VBRcontrol = (uint32_t)(-50 * pow(0.3, bpsRatio));
3647 
3648     curbe.DW22_DeviationThreshold4_VBRcontrol = (uint32_t)(100 * pow(0.4, bpsRatio));
3649     curbe.DW22_DeviationThreshold5_VBRcontrol = (uint32_t)(100 * pow(0.5, bpsRatio));
3650     curbe.DW22_DeviationThreshold6_VBRcontrol = (uint32_t)(100 * pow(0.75, bpsRatio));
3651     curbe.DW22_DeviationThreshold7_VBRcontrol = (uint32_t)(100 * pow(0.9, bpsRatio));
3652 
3653     curbe.DW23_DeviationThreshold0_Iframe = (uint32_t)(-50 * pow(0.8, bpsRatio));
3654     curbe.DW23_DeviationThreshold1_Iframe = (uint32_t)(-50 * pow(0.6, bpsRatio));
3655     curbe.DW23_DeviationThreshold2_Iframe = (uint32_t)(-50 * pow(0.34, bpsRatio));
3656     curbe.DW23_DeviationThreshold3_Iframe = (uint32_t)(-50 * pow(0.2, bpsRatio));
3657 
3658     curbe.DW24_DeviationThreshold4_Iframe = (uint32_t)(50 * pow(0.2, bpsRatio));
3659     curbe.DW24_DeviationThreshold5_Iframe = (uint32_t)(50 * pow(0.4, bpsRatio));
3660     curbe.DW24_DeviationThreshold6_Iframe = (uint32_t)(50 * pow(0.66, bpsRatio));
3661     curbe.DW24_DeviationThreshold7_Iframe = (uint32_t)(50 * pow(0.9, bpsRatio));
3662 
3663     curbe.DW26_RandomAccess = (m_hevcSeqParams->HierarchicalFlag && !m_hevcSeqParams->LowDelayMode) ? true : false;
3664 
3665     if (m_brcInit)
3666     {
3667         m_dBrcInitCurrentTargetBufFullInBits = curbe.DW1_InitBufFull;
3668     }
3669 
3670     m_brcInitResetBufSizeInBits      = curbe.DW2_BufSize;
3671     m_dBrcInitResetInputBitsPerFrame = inputBitsPerFrame;
3672 
3673     PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
3674     CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
3675         &curbe,
3676         kernelState->dwCurbeOffset,
3677         sizeof(curbe)));
3678 
3679     return eStatus;
3680 }
3681 
3682 // ------------------------------------------------------------------------------
3683 //| Purpose:    Setup curbe for HEVC BrcUpdate Kernel
3684 //| Return:     N/A
3685 //------------------------------------------------------------------------------
SetCurbeBrcUpdate(CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx)3686 MOS_STATUS CodechalEncHevcStateG11::SetCurbeBrcUpdate(
3687     CODECHAL_HEVC_BRC_KRNIDX    brcKrnIdx)
3688 {
3689     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
3690 
3691     if (brcKrnIdx != CODECHAL_HEVC_BRC_FRAME_UPDATE && brcKrnIdx != CODECHAL_HEVC_BRC_LCU_UPDATE)
3692     {
3693         CODECHAL_ENCODE_ASSERTMESSAGE("Brc kernel requested is not frame update or LCU update\n");
3694         return MOS_STATUS_INVALID_PARAMETER;
3695     }
3696 
3697     CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
3698 
3699     // Initialize the CURBE data
3700     BRCUPDATE_CURBE curbe = m_brcUpdateCurbeInit;
3701 
3702     curbe.DW5_TargetSize_Flag = 0;
3703 
3704     if (m_dBrcInitCurrentTargetBufFullInBits > (double)m_brcInitResetBufSizeInBits)
3705     {
3706         m_dBrcInitCurrentTargetBufFullInBits -= (double)m_brcInitResetBufSizeInBits;
3707         curbe.DW5_TargetSize_Flag = 1;
3708     }
3709 
3710     if (m_numSkipFrames)
3711     {
3712         // pass num/size of skipped frames to update BRC
3713         curbe.DW6_NumSkippedFrames = m_numSkipFrames;
3714         curbe.DW15_SizeOfSkippedFrames = m_sizeSkipFrames;
3715 
3716         // account for skipped frame in calculating CurrentTargetBufFullInBits
3717         m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame * m_numSkipFrames;
3718     }
3719 
3720     curbe.DW0_TargetSize  = (uint32_t)(m_dBrcInitCurrentTargetBufFullInBits);
3721     curbe.DW1_FrameNumber = m_storeData - 1; // Check if we can remove this is unused (set to 0)
3722 
3723     // BRC PAK statistic buffer from last frame, the encoded size includes header already.
3724     // in BRC Initreset kernel, curbe DW8_BRCFlag will always ignore picture header size, so no need to set picture header size here.
3725     curbe.DW2_PictureHeaderSize = 0;
3726 
3727     curbe.DW5_CurrFrameBrcLevel = m_currFrameBrcLevel;
3728     curbe.DW5_MaxNumPAKs        = m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses();
3729 
3730     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
3731     {
3732         curbe.DW6_CqpValue = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
3733     }
3734     if (m_hevcPicParams->NumROI)
3735     {
3736         curbe.DW6_ROIEnable    = m_brcEnabled ? false : true;
3737         curbe.DW6_BRCROIEnable = m_brcEnabled ? true : false;
3738         curbe.DW6_RoiRatio     = CalculateROIRatio();
3739     }
3740     curbe.DW6_SlidingWindowEnable = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_LOW);
3741 
3742     //for low delay brc
3743     curbe.DW6_LowDelayEnable      = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW);
3744     curbe.DW16_UserMaxFrameSize   = GetProfileLevelMaxFrameSize();
3745 
3746     curbe.DW14_ParallelMode       = m_hevcSeqParams->ParallelBRC;
3747 
3748     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
3749     {
3750         curbe.DW3_StartGAdjFrame0 = (uint32_t)((10 * m_usAvbrConvergence) / (double)150);
3751         curbe.DW3_StartGAdjFrame1 = (uint32_t)((50 * m_usAvbrConvergence) / (double)150);
3752         curbe.DW4_StartGAdjFrame2 = (uint32_t)((100 * m_usAvbrConvergence) / (double)150);
3753         curbe.DW4_StartGAdjFrame3 = (uint32_t)((150 * m_usAvbrConvergence) / (double)150);
3754 
3755         curbe.DW11_gRateRatioThreshold0 =
3756             (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 40)));
3757         curbe.DW11_gRateRatioThreshold1 =
3758             (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 75)));
3759         curbe.DW12_gRateRatioThreshold2 = (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 97)));
3760         curbe.DW12_gRateRatioThreshold3 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (103 - 100)));
3761         curbe.DW12_gRateRatioThreshold4 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (125 - 100)));
3762         curbe.DW12_gRateRatioThreshold5 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (160 - 100)));
3763     }
3764 
3765     if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)
3766     {
3767         curbe.DW17_LongTerm_Current = 0; // no LTR for low delay brc
3768     }
3769     else
3770     {
3771         m_isFrameLTR = (CodecHal_PictureIsLongTermRef(m_currReconstructedPic));
3772         curbe.DW17_LongTerm_Current = (m_enableBrcLTR && m_isFrameLTR) ? 1 : 0;
3773     }
3774 
3775     PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
3776     CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
3777         &curbe,
3778         kernelState->dwCurbeOffset,
3779         sizeof(curbe)));
3780 
3781     return eStatus;
3782 }
3783 
SendMbEncSurfacesKernel(PMOS_COMMAND_BUFFER cmdBuffer)3784 MOS_STATUS CodechalEncHevcStateG11::SendMbEncSurfacesKernel(
3785     PMOS_COMMAND_BUFFER cmdBuffer)
3786 {
3787     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
3788 
3789     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
3790     PMHW_KERNEL_STATE kernelState = m_isMaxLcu64 ? &m_mbEncKernelStates[MBENC_LCU64_KRNIDX] : &m_mbEncKernelStates[MBENC_LCU32_KRNIDX];
3791 
3792     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelBindingTable);
3793     PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = m_isMaxLcu64 ? &m_mbEncKernelBindingTable[MBENC_LCU64_KRNIDX] : &m_mbEncKernelBindingTable[MBENC_LCU32_KRNIDX];
3794 
3795     PMOS_SURFACE    inputSurface = m_rawSurfaceToEnc;
3796     uint32_t   startBTI = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0;
3797     CODECHAL_SURFACE_CODEC_PARAMS   surfaceCodecParams;
3798 
3799     // Combined 1D buffer 1, which contains regular kernel curbe and concurrent map
3800     startBTI = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER1;
3801     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
3802         &surfaceCodecParams,
3803         &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource,
3804         MOS_BYTES_TO_DWORDS(m_encBCombinedBuffer1[m_currRecycledBufIdx].dwSize),
3805         0,
3806         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ENC_BCOMBINED1_ENCODE].Value,
3807         bindingTable->dwBindingTableEntries[startBTI++],
3808         false));
3809 
3810     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
3811         m_hwInterface,
3812         cmdBuffer,
3813         &surfaceCodecParams,
3814         kernelState));
3815 
3816     CODECHAL_DEBUG_TOOL(
3817         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
3818             &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource,
3819             CodechalDbgAttr::attrOutput,
3820             "Hevc_CombinedBuffer1",
3821             m_encBCombinedBuffer1[m_currRecycledBufIdx].dwSize,
3822             0,
3823             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
3824     );
3825 
3826     // Combined 1D RAW buffer 2, which contains non fixed sizes of buffers
3827     startBTI = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER2;
3828     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
3829         &surfaceCodecParams,
3830         &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
3831         m_encBCombinedBuffer2[m_currRecycledBufIdx].dwSize,
3832         0,
3833         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ENC_BCOMBINED2_ENCODE].Value,
3834         bindingTable->dwBindingTableEntries[startBTI++],
3835         false));
3836     surfaceCodecParams.bRawSurface = true;
3837 
3838     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
3839         m_hwInterface,
3840         cmdBuffer,
3841         &surfaceCodecParams,
3842         kernelState));
3843 
3844     CODECHAL_DEBUG_TOOL(
3845         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
3846             &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
3847             CodechalDbgAttr::attrOutput,
3848             "Hevc_CombinedBuffer2",
3849             m_encBCombinedBuffer2[m_currRecycledBufIdx].dwSize,
3850             0,
3851             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
3852     );
3853     // VME surfaces
3854     startBTI = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0;
3855     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
3856         &surfaceCodecParams,
3857         inputSurface,
3858         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
3859         bindingTable->dwBindingTableEntries[startBTI++]));
3860 
3861     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
3862         m_hwInterface,
3863         cmdBuffer,
3864         &surfaceCodecParams,
3865         kernelState));
3866 
3867     for (int32_t surface_idx = 0; surface_idx < 4; surface_idx++)
3868     {
3869         int32_t ll = 0;
3870         CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
3871         if (!CodecHal_PictureIsInvalid(refPic) &&
3872             !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
3873         {
3874             int32_t      idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
3875             PMOS_SURFACE refSurfacePtr;
3876             if (surface_idx == 0 && m_useWeightedSurfaceForL0)
3877             {
3878                 refSurfacePtr = m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L0_START + surface_idx);
3879             }
3880             else
3881             {
3882                 refSurfacePtr = &m_refList[idx]->sRefBuffer;
3883             }
3884 
3885             // Picture Y VME
3886             CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
3887                 &surfaceCodecParams,
3888                 refSurfacePtr,
3889                 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
3890                 bindingTable->dwBindingTableEntries[startBTI++]));
3891 
3892             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
3893                 m_hwInterface,
3894                 cmdBuffer,
3895                 &surfaceCodecParams,
3896                 kernelState));
3897 
3898             CODECHAL_DEBUG_TOOL(
3899                 m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
3900                 std::string refSurfName      = "RefSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex));
3901                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
3902                     &m_refList[idx]->sRefBuffer,
3903                     CodechalDbgAttr::attrReferenceSurfaces,
3904                     refSurfName.data())));
3905         }
3906         else
3907         {
3908             // Providing Dummy surface as per VME requirement.
3909             CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
3910                 &surfaceCodecParams,
3911                 inputSurface,
3912                 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
3913                 bindingTable->dwBindingTableEntries[startBTI++]));
3914 
3915             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
3916                 m_hwInterface,
3917                 cmdBuffer,
3918                 &surfaceCodecParams,
3919                 kernelState));
3920         }
3921 
3922         ll = 1;
3923         refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
3924         if (!CodecHal_PictureIsInvalid(refPic) &&
3925             !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
3926         {
3927             int32_t      idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
3928             PMOS_SURFACE refSurfacePtr;
3929             if (surface_idx == 0 && m_useWeightedSurfaceForL1)
3930             {
3931                 refSurfacePtr = m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L1_START + surface_idx);
3932             }
3933             else
3934             {
3935                 refSurfacePtr = &m_refList[idx]->sRefBuffer;
3936             }
3937 
3938             // Picture Y VME
3939             CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
3940                 &surfaceCodecParams,
3941                 refSurfacePtr,
3942                 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
3943                 bindingTable->dwBindingTableEntries[startBTI++]));
3944 
3945             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
3946                 m_hwInterface,
3947                 cmdBuffer,
3948                 &surfaceCodecParams,
3949                 kernelState));
3950 
3951             CODECHAL_DEBUG_TOOL(
3952                 m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
3953                 std::string refSurfName      = "RefSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex));
3954                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
3955                     &m_refList[idx]->sRefBuffer,
3956                     CodechalDbgAttr::attrReferenceSurfaces,
3957                     refSurfName.data())));
3958         }
3959         else
3960         {
3961             // Providing Dummy surface as per VME requirement.
3962             CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
3963                 &surfaceCodecParams,
3964                 inputSurface,
3965                 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
3966                 bindingTable->dwBindingTableEntries[startBTI++]));
3967 
3968             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
3969                 m_hwInterface,
3970                 cmdBuffer,
3971                 &surfaceCodecParams,
3972                 kernelState));
3973         }
3974     }
3975 
3976     //Source Y and UV
3977     startBTI = MBENC_B_FRAME_CURR_Y;
3978     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
3979         &surfaceCodecParams,
3980         inputSurface,
3981         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
3982         bindingTable->dwBindingTableEntries[startBTI++],
3983         m_verticalLineStride,
3984         false));
3985 
3986     surfaceCodecParams.bUseUVPlane = true;
3987 
3988     surfaceCodecParams.dwUVBindingTableOffset = bindingTable->dwBindingTableEntries[startBTI];
3989     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
3990         m_hwInterface,
3991         cmdBuffer,
3992         &surfaceCodecParams,
3993         kernelState));
3994 
3995     CODECHAL_DEBUG_TOOL(
3996     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
3997         inputSurface,
3998         CodechalDbgAttr::attrEncodeRawInputSurface,
3999         "MbEnc_Input_SrcSurf")));
4000 
4001     // Current Y with reconstructed boundary pixels
4002     startBTI = MBENC_B_FRAME_CURR_Y_WITH_RECON_BOUNDARY_PIX;
4003     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4004         &surfaceCodecParams,
4005         &m_currPicWithReconBoundaryPix,
4006         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4007         bindingTable->dwBindingTableEntries[startBTI],
4008         m_verticalLineStride,
4009         true));
4010 
4011     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4012         m_hwInterface,
4013         cmdBuffer,
4014         &surfaceCodecParams,
4015         kernelState));
4016 
4017     // Enc CU Record
4018     startBTI = MBENC_B_FRAME_ENC_CU_RECORD;
4019     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4020         &surfaceCodecParams,
4021         &m_intermediateCuRecordSurfaceLcu32,
4022         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ENC_CU_RECORD_ENCODE].Value,
4023         bindingTable->dwBindingTableEntries[startBTI],
4024         0,
4025         true));
4026 
4027     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4028         m_hwInterface,
4029         cmdBuffer,
4030         &surfaceCodecParams,
4031         kernelState));
4032 
4033     // PAK object command surface
4034     startBTI = MBENC_B_FRAME_PAK_OBJ;
4035     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4036         &surfaceCodecParams,
4037         &m_resMbCodeSurface,
4038         MOS_BYTES_TO_DWORDS(m_mvOffset),
4039         0,
4040         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4041         bindingTable->dwBindingTableEntries[startBTI],
4042         true));
4043 
4044     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4045         m_hwInterface,
4046         cmdBuffer,
4047         &surfaceCodecParams,
4048         kernelState));
4049 
4050     // CU packet for PAK surface
4051     startBTI = MBENC_B_FRAME_PAK_CU_RECORD;
4052     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4053         &surfaceCodecParams,
4054         &m_resMbCodeSurface,
4055         MOS_BYTES_TO_DWORDS(m_mbCodeSize - m_mvOffset),
4056         m_mvOffset,
4057         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ENC_CU_PACKET_FOR_PAK_ENCODE].Value,
4058         bindingTable->dwBindingTableEntries[startBTI],
4059         true));
4060 
4061     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4062         m_hwInterface,
4063         cmdBuffer,
4064         &surfaceCodecParams,
4065         kernelState));
4066 
4067     //Software scoreboard surface
4068     startBTI = MBENC_B_FRAME_SW_SCOREBOARD;
4069     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4070         &surfaceCodecParams,
4071         m_swScoreboardState->GetCurSwScoreboardSurface(),
4072         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_SOFTWARE_SCOREBOARD_ENCODE].Value,
4073         bindingTable->dwBindingTableEntries[startBTI],
4074         m_verticalLineStride,
4075         true));
4076 
4077     surfaceCodecParams.bUse32UINTSurfaceFormat = true;
4078     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4079         m_hwInterface,
4080         cmdBuffer,
4081         &surfaceCodecParams,
4082         kernelState));
4083 
4084     // Scratch surface for Internal Use Only
4085     startBTI = MBENC_B_FRAME_SCRATCH_SURFACE;
4086     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4087         &surfaceCodecParams,
4088         &m_scratchSurface,
4089         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_SCRATCH_ENCODE].Value,
4090         bindingTable->dwBindingTableEntries[startBTI],
4091         m_verticalLineStride,
4092         true));
4093 
4094     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4095         m_hwInterface,
4096         cmdBuffer,
4097         &surfaceCodecParams,
4098         kernelState));
4099 
4100     // CU 16x16 QP data input surface
4101     startBTI = MBENC_B_FRAME_CU_QP_DATA;
4102     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4103         &surfaceCodecParams,
4104         &m_brcBuffers.sBrcMbQpBuffer,
4105         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4106         bindingTable->dwBindingTableEntries[startBTI],
4107         m_verticalLineStride,
4108         false));
4109 
4110     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4111         m_hwInterface,
4112         cmdBuffer,
4113         &surfaceCodecParams,
4114         kernelState));
4115 
4116     // Lcu level data input
4117     startBTI = MBENC_B_FRAME_LCU_LEVEL_DATA_INPUT;
4118     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4119         &surfaceCodecParams,
4120         &m_lcuLevelInputDataSurface[m_currRecycledBufIdx],
4121         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_LCU_LEVEL_DATA_ENCODE].Value,
4122         bindingTable->dwBindingTableEntries[startBTI],
4123         m_verticalLineStride,
4124         false));
4125 
4126     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4127         m_hwInterface,
4128         cmdBuffer,
4129         &surfaceCodecParams,
4130         kernelState));
4131 
4132     // Enc B 32x32 Constant Table surface
4133     startBTI = MBENC_B_FRAME_ENC_CONST_TABLE;
4134     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4135         &surfaceCodecParams,
4136         &m_encConstantTableForB.sResource,
4137         MOS_BYTES_TO_DWORDS(m_encConstantTableForB.dwSize),
4138         0,
4139         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ENC_CONSTANT_TABLE_ENCODE].Value,
4140         bindingTable->dwBindingTableEntries[startBTI],
4141         false));
4142 
4143     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4144         m_hwInterface,
4145         cmdBuffer,
4146         &surfaceCodecParams,
4147         kernelState));
4148 
4149     // Colocated CU Motion Vector Data surface
4150     startBTI = MBENC_B_FRAME_COLOCATED_CU_MV_DATA;
4151     uint8_t   mbCodeIdxForTempMVP = 0xFF;
4152     if (m_hevcPicParams->CollocatedRefPicIndex != 0xFF && m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC)
4153     {
4154         uint8_t frameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx;
4155 
4156         mbCodeIdxForTempMVP = m_refList[frameIdx]->ucScalingIdx;
4157     }
4158 
4159     if(m_pictureCodingType == I_TYPE)
4160     {
4161         // No temoporal MVP in the I frame
4162         m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
4163     }
4164     else
4165     {
4166         if (mbCodeIdxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag)
4167         {
4168             // Temporal reference MV index is invalid and so disable the temporal MVP
4169             CODECHAL_ENCODE_ASSERT(false);
4170             m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
4171         }
4172     }
4173 
4174     if (mbCodeIdxForTempMVP == 0xFF)
4175     {
4176         startBTI++;
4177     }
4178     else
4179     {
4180         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4181             &surfaceCodecParams,
4182             m_trackedBuf->GetMvTemporalBuffer(mbCodeIdxForTempMVP),
4183             MOS_BYTES_TO_DWORDS(m_sizeOfMvTemporalBuffer),
4184             0,
4185             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ENC_MV_TEMPORAL_BUFFER_ENCODE].Value,
4186             bindingTable->dwBindingTableEntries[startBTI++],
4187             false));
4188 
4189         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4190             m_hwInterface,
4191             cmdBuffer,
4192             &surfaceCodecParams,
4193             kernelState));
4194     }
4195 
4196     startBTI = MBENC_B_FRAME_HME_MOTION_PREDICTOR_DATA;
4197 
4198     // HME motion predictor data
4199     if (m_hmeEnabled)
4200     {
4201         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4202             &surfaceCodecParams,
4203             m_hmeKernel->GetSurface(CodechalKernelHme::SurfaceId::me4xMvDataBuffer),
4204             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_MV_DATA_ENCODE].Value,
4205             bindingTable->dwBindingTableEntries[startBTI++],
4206             m_verticalLineStride,
4207             false));
4208 
4209         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4210             m_hwInterface,
4211             cmdBuffer,
4212             &surfaceCodecParams,
4213             kernelState));
4214     }
4215     else
4216     {
4217         startBTI++;
4218     }
4219 
4220     // Brc Combined Enc parameter surface
4221     startBTI = MBENC_B_FRAME_BRC_COMBINED_ENC_PARAMETER_SURFACE;
4222     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4223         &surfaceCodecParams,
4224         &m_brcInputForEncKernelBuffer->sResource,
4225         MOS_BYTES_TO_DWORDS(HEVC_FRAMEBRC_BUF_CONST_SIZE),
4226         0,
4227         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_COMBINED_ENC_ENCODE].Value,
4228         bindingTable->dwBindingTableEntries[startBTI++],
4229         false));
4230 
4231     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4232         m_hwInterface,
4233         cmdBuffer,
4234         &surfaceCodecParams,
4235         kernelState));
4236 
4237     startBTI = MBENC_B_FRAME_VME_PRED_FOR_2X_DS_CURR;
4238     if (m_isMaxLcu64)
4239     {
4240         PMOS_SURFACE currScaledSurface2x = m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER);
4241 
4242         //VME 2X Inter prediction surface for current frame
4243         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4244             &surfaceCodecParams,
4245             currScaledSurface2x,
4246             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4247             bindingTable->dwBindingTableEntries[startBTI++]));
4248 
4249         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4250             m_hwInterface,
4251             cmdBuffer,
4252             &surfaceCodecParams,
4253             kernelState));
4254 
4255         CODECHAL_DEBUG_TOOL(
4256             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4257                 currScaledSurface2x,
4258                 CodechalDbgAttr::attrReferenceSurfaces,
4259                 "2xScaledSurf")));
4260 
4261         // RefFrame's 2x DS surface
4262         for (int32_t surface_idx = 0; surface_idx < 4; surface_idx++)
4263         {
4264             int32_t ll = 0;
4265             CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
4266             if (!CodecHal_PictureIsInvalid(refPic) &&
4267                 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
4268             {
4269                 int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
4270 
4271                 // Picture Y VME
4272                 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4273                     &surfaceCodecParams,
4274                     m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
4275                     m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4276                     bindingTable->dwBindingTableEntries[startBTI++]));
4277 
4278                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4279                     m_hwInterface,
4280                     cmdBuffer,
4281                     &surfaceCodecParams,
4282                     kernelState));
4283 
4284                 CODECHAL_DEBUG_TOOL(
4285                     m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
4286                     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4287                         m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
4288                         CodechalDbgAttr::attrReferenceSurfaces,
4289                         "Ref2xScaledSurf")));
4290             }
4291             else
4292             {
4293                 // Providing Dummy surface as per VME requirement.
4294                 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4295                     &surfaceCodecParams,
4296                     currScaledSurface2x,
4297                     m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4298                     bindingTable->dwBindingTableEntries[startBTI++]));
4299 
4300                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4301                     m_hwInterface,
4302                     cmdBuffer,
4303                     &surfaceCodecParams,
4304                     kernelState));
4305             }
4306 
4307             ll = 1;
4308             refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
4309             if (!CodecHal_PictureIsInvalid(refPic) &&
4310                 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
4311             {
4312                 int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
4313 
4314                 // Picture Y VME
4315                 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4316                     &surfaceCodecParams,
4317                     m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
4318                     m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4319                     bindingTable->dwBindingTableEntries[startBTI++]));
4320 
4321                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4322                     m_hwInterface,
4323                     cmdBuffer,
4324                     &surfaceCodecParams,
4325                     kernelState));
4326 
4327                 CODECHAL_DEBUG_TOOL(
4328                     m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
4329                     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4330                         m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
4331                         CodechalDbgAttr::attrReferenceSurfaces,
4332                         "Ref2xScaledSurf")));
4333             }
4334             else
4335             {
4336                 // Providing Dummy surface as per VME requirement.
4337                 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4338                     &surfaceCodecParams,
4339                     currScaledSurface2x,
4340                     m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4341                     bindingTable->dwBindingTableEntries[startBTI++]));
4342 
4343                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4344                     m_hwInterface,
4345                     cmdBuffer,
4346                     &surfaceCodecParams,
4347                     kernelState));
4348             }
4349         }
4350     }
4351 
4352     // Kernel debug surface
4353     startBTI = MBENC_B_FRAME_DEBUG_SURFACE;
4354     for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_debugSurface); i++, startBTI++)
4355     {
4356         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4357             &surfaceCodecParams,
4358             &m_debugSurface[i].sResource,
4359             MOS_BYTES_TO_DWORDS(m_debugSurface[i].dwSize),
4360             0,
4361             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_DEBUG_ENCODE].Value,
4362             bindingTable->dwBindingTableEntries[startBTI],
4363             false));
4364 
4365         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4366             m_hwInterface,
4367             cmdBuffer,
4368             &surfaceCodecParams,
4369             kernelState));
4370     }
4371 
4372     return eStatus;
4373 }
4374 
SendBrcInitResetSurfaces(PMOS_COMMAND_BUFFER cmdBuffer,CODECHAL_HEVC_BRC_KRNIDX krnIdx)4375 MOS_STATUS CodechalEncHevcStateG11::SendBrcInitResetSurfaces(
4376     PMOS_COMMAND_BUFFER         cmdBuffer,
4377     CODECHAL_HEVC_BRC_KRNIDX    krnIdx)
4378 {
4379     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
4380 
4381     if (krnIdx != CODECHAL_HEVC_BRC_INIT && krnIdx != CODECHAL_HEVC_BRC_RESET)
4382     {
4383         CODECHAL_ENCODE_ASSERTMESSAGE("Brc kernel requested is not init or reset\n");
4384         return MOS_STATUS_INVALID_PARAMETER;
4385     }
4386 
4387     PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_brcKernelBindingTable[krnIdx];
4388     uint32_t startBTI = 0;
4389     CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
4390     // BRC History Buffer
4391     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4392         &surfaceCodecParams,
4393         &m_brcBuffers.resBrcHistoryBuffer,
4394         MOS_BYTES_TO_DWORDS(m_brcHistoryBufferSize),
4395         0,
4396         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_HISTORY_ENCODE].Value,
4397         bindingTable->dwBindingTableEntries[startBTI++],
4398         true));
4399 
4400     PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[krnIdx];
4401     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4402         m_hwInterface,
4403         cmdBuffer,
4404         &surfaceCodecParams,
4405         kernelState));
4406 
4407     // BRC Distortion surface
4408     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4409         &surfaceCodecParams,
4410         m_brcDistortion,
4411         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
4412         bindingTable->dwBindingTableEntries[startBTI++],
4413         0,
4414         true));
4415     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4416         m_hwInterface,
4417         cmdBuffer,
4418         &surfaceCodecParams,
4419         kernelState));
4420 
4421     return eStatus;
4422 }
4423 
SetupBrcConstantTable(PMOS_SURFACE brcConstantData)4424 MOS_STATUS CodechalEncHevcStateG11::SetupBrcConstantTable(
4425     PMOS_SURFACE    brcConstantData)
4426 {
4427     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
4428 
4429     CODECHAL_ENCODE_FUNCTION_ENTER;
4430 
4431     MOS_LOCK_PARAMS lockFlags;
4432     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4433     lockFlags.WriteOnly = 1;
4434     uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &brcConstantData->OsResource, &lockFlags);
4435     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4436 
4437     uint32_t size = brcConstantData->dwHeight * brcConstantData->dwWidth;
4438     // 576-byte of Qp adjust table
4439     MOS_SecureMemcpy(data, size, g_cInit_HEVC_BRC_QP_ADJUST, sizeof(g_cInit_HEVC_BRC_QP_ADJUST));
4440     data += sizeof(g_cInit_HEVC_BRC_QP_ADJUST);
4441     size -= sizeof(g_cInit_HEVC_BRC_QP_ADJUST);
4442 
4443     //lambda and mode cost
4444     if (m_isMaxLcu64)
4445     {
4446         MOS_SecureMemcpy(data, size, m_brcLcu64x64LambdaModeCostInit, sizeof(m_brcLcu64x64LambdaModeCostInit));
4447     }
4448     else
4449     {
4450         MOS_SecureMemcpy(data, size, m_brcLcu32x32LambdaModeCostInit, sizeof(m_brcLcu32x32LambdaModeCostInit));
4451     }
4452     data += m_brcLambdaModeCostTableSize;
4453     size -= m_brcLambdaModeCostTableSize;
4454 
4455     m_osInterface->pfnUnlockResource(m_osInterface, &brcConstantData->OsResource);
4456 
4457     return eStatus;
4458 }
4459 
SendBrcFrameUpdateSurfaces(PMOS_COMMAND_BUFFER cmdBuffer)4460 MOS_STATUS CodechalEncHevcStateG11::SendBrcFrameUpdateSurfaces(
4461     PMOS_COMMAND_BUFFER cmdBuffer)
4462 {
4463     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
4464 
4465     // Fill HCP_IMG_STATE so that BRC kernel can use it to generate the write buffer for PAK
4466     PMOS_RESOURCE            brcHcpStateReadBuffer = &m_brcBuffers.resBrcImageStatesReadBuffer[m_currRecycledBufIdx];
4467     MHW_VDBOX_HEVC_PIC_STATE mhwHevcPicState;
4468     mhwHevcPicState.pHevcEncSeqParams = m_hevcSeqParams;
4469     mhwHevcPicState.pHevcEncPicParams = m_hevcPicParams;
4470     mhwHevcPicState.bUseVDEnc = m_vdencEnabled ? 1 : 0;
4471     mhwHevcPicState.sseEnabledInVmeEncode = m_sseEnabled;
4472     mhwHevcPicState.brcNumPakPasses = m_mfxInterface->GetBrcNumPakPasses();
4473     mhwHevcPicState.rhodomainRCEnable = m_brcEnabled && (m_numPipe > 1);
4474     mhwHevcPicState.bSAOEnable = m_hevcSeqParams->SAO_enabled_flag ? (m_hevcSliceParams->slice_sao_luma_flag || m_hevcSliceParams->slice_sao_chroma_flag) : 0;
4475     // disable RDOQ before we get enough quality/perf data for BRC to prove its goodness
4476     //mhwHevcPicState.bHevcRdoqEnabled      = m_hevcRdoqEnabled;
4477     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcPicBrcBuffer(brcHcpStateReadBuffer, &mhwHevcPicState));
4478 
4479     PMOS_SURFACE brcConstantData = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx];
4480     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBrcConstantTable(brcConstantData));
4481 
4482     uint32_t startBTI = 0;
4483     PMHW_KERNEL_STATE                      kernelState  = &m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE];
4484     PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_brcKernelBindingTable[CODECHAL_HEVC_BRC_FRAME_UPDATE];
4485     CODECHAL_SURFACE_CODEC_PARAMS   surfaceCodecParams;
4486 
4487     // BRC History Buffer
4488     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4489         &surfaceCodecParams,
4490         &m_brcBuffers.resBrcHistoryBuffer,
4491         MOS_BYTES_TO_DWORDS(m_brcHistoryBufferSize),
4492         0,
4493         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_HISTORY_ENCODE].Value,
4494         bindingTable->dwBindingTableEntries[startBTI++],
4495         true));
4496     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4497         m_hwInterface,
4498         cmdBuffer,
4499         &surfaceCodecParams,
4500         kernelState));
4501 
4502     // BRC Prev PAK statistics output buffer
4503     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4504         &surfaceCodecParams,
4505         &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead],
4506         MOS_BYTES_TO_DWORDS(m_hevcBrcPakStatisticsSize),
4507         0,
4508         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_PAK_STATS_ENCODE].Value,
4509         bindingTable->dwBindingTableEntries[startBTI++],
4510         false));
4511     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4512         m_hwInterface,
4513         cmdBuffer,
4514         &surfaceCodecParams,
4515         kernelState));
4516 
4517     // BRC HCP_PIC_STATE read
4518     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4519         &surfaceCodecParams,
4520         brcHcpStateReadBuffer,
4521         MOS_BYTES_TO_DWORDS(m_brcBuffers.dwBrcHcpPicStateSize),
4522         0,
4523         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_PIC_STATE_READ_ENCODE].Value,
4524         bindingTable->dwBindingTableEntries[startBTI++],
4525         false));
4526     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4527         m_hwInterface,
4528         cmdBuffer,
4529         &surfaceCodecParams,
4530         kernelState));
4531 
4532     // BRC HCP_PIC_STATE write
4533     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4534         &surfaceCodecParams,
4535         &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
4536         MOS_BYTES_TO_DWORDS(m_brcBuffers.dwBrcHcpPicStateSize),
4537         0,
4538         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_PIC_STATE_WRITE_ENCODE].Value,
4539         bindingTable->dwBindingTableEntries[startBTI++],
4540         true));
4541     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4542         m_hwInterface,
4543         cmdBuffer,
4544         &surfaceCodecParams,
4545         kernelState));
4546 
4547     // Combined ENC-parameter buffer
4548     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4549         &surfaceCodecParams,
4550         &m_brcInputForEncKernelBuffer->sResource,
4551         MOS_BYTES_TO_DWORDS(HEVC_FRAMEBRC_BUF_CONST_SIZE),
4552         0,
4553         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_COMBINED_ENC_ENCODE].Value,
4554         bindingTable->dwBindingTableEntries[startBTI++],
4555         true));
4556     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4557         m_hwInterface,
4558         cmdBuffer,
4559         &surfaceCodecParams,
4560         kernelState));
4561 
4562     // BRC Distortion surface
4563     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4564         &surfaceCodecParams,
4565         m_brcDistortion,
4566         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
4567         bindingTable->dwBindingTableEntries[startBTI++],
4568         0,
4569         true));
4570     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4571         m_hwInterface,
4572         cmdBuffer,
4573         &surfaceCodecParams,
4574         kernelState));
4575 
4576     // BRC Data surface
4577     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4578         &surfaceCodecParams,
4579         brcConstantData,
4580         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_CONSTANT_DATA_ENCODE].Value,
4581         bindingTable->dwBindingTableEntries[startBTI++],
4582         0,
4583         false));
4584     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4585         m_hwInterface,
4586         cmdBuffer,
4587         &surfaceCodecParams,
4588         kernelState));
4589 
4590     // Pixel MB Statistics surface
4591     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4592         &surfaceCodecParams,
4593         &m_resMbStatsBuffer,
4594         MOS_BYTES_TO_DWORDS(m_hwInterface->m_avcMbStatBufferSize),
4595         0,
4596         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_MB_STATS_ENCODE].Value,
4597         bindingTable->dwBindingTableEntries[startBTI++],
4598         false));
4599     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4600         m_hwInterface,
4601         cmdBuffer,
4602         &surfaceCodecParams,
4603         kernelState));
4604 
4605     // Mv and Distortion summation surface
4606     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4607         &surfaceCodecParams,
4608         &m_mvAndDistortionSumSurface.sResource,
4609         MOS_BYTES_TO_DWORDS(m_mvAndDistortionSumSurface.dwSize),
4610         0,
4611         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DISTORTION_ENCODE].Value,
4612         bindingTable->dwBindingTableEntries[startBTI++],
4613         false));
4614     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4615         m_hwInterface,
4616         cmdBuffer,
4617         &surfaceCodecParams,
4618         kernelState));
4619 
4620     CODECHAL_DEBUG_TOOL(
4621         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4622             &m_mvAndDistortionSumSurface.sResource,
4623             CodechalDbgAttr::attrInput,
4624             "MvDistSum",
4625             m_mvAndDistortionSumSurface.dwSize,
4626             0,
4627             CODECHAL_MEDIA_STATE_BRC_UPDATE));
4628         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4629             &m_brcBuffers.resBrcImageStatesReadBuffer[m_currRecycledBufIdx],
4630             CodechalDbgAttr::attrInput,
4631             "ImgStateRead",
4632             BRC_IMG_STATE_SIZE_PER_PASS_G11 * m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(),
4633             0,
4634             CODECHAL_MEDIA_STATE_BRC_UPDATE));
4635 
4636         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
4637             &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx],
4638             CodechalDbgAttr::attrInput,
4639             "ConstData",
4640             CODECHAL_MEDIA_STATE_BRC_UPDATE));
4641 
4642         // PAK statistics buffer is only dumped for BrcUpdate kernel input
4643         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4644             &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead],
4645             CodechalDbgAttr::attrInput,
4646             "PakStats",
4647             HEVC_BRC_PAK_STATISTCS_SIZE,
4648             0,
4649             CODECHAL_MEDIA_STATE_BRC_UPDATE));
4650         // HEVC maintains a ptr to its own distortion surface, as it may be a couple different surfaces
4651         if (m_brcDistortion) {
4652             CODECHAL_ENCODE_CHK_STATUS_RETURN(
4653                 m_debugInterface->DumpBuffer(
4654                     &m_brcDistortion->OsResource,
4655                     CodechalDbgAttr::attrInput,
4656                     "BrcDist_BeforeFrameBrc",
4657                     m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
4658                     m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
4659                     CODECHAL_MEDIA_STATE_BRC_UPDATE));
4660         }
4661 
4662         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4663             &m_brcBuffers.resBrcHistoryBuffer,
4664             CodechalDbgAttr::attrInput,
4665             "HistoryRead_beforeFramBRC",
4666             m_brcHistoryBufferSize,
4667             0,
4668             CODECHAL_MEDIA_STATE_BRC_UPDATE));
4669         if (m_brcBuffers.pMbEncKernelStateInUse) {
4670             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
4671                 CODECHAL_MEDIA_STATE_BRC_UPDATE,
4672                 m_brcBuffers.pMbEncKernelStateInUse));
4673         } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(&m_resMbStatsBuffer,
4674             CodechalDbgAttr::attrInput,
4675             "MBStatsSurf",
4676             m_hwInterface->m_avcMbStatBufferSize,
4677             0,
4678             CODECHAL_MEDIA_STATE_BRC_UPDATE));)
4679     return eStatus;
4680 }
4681 
SendBrcLcuUpdateSurfaces(PMOS_COMMAND_BUFFER cmdBuffer)4682 MOS_STATUS CodechalEncHevcStateG11::SendBrcLcuUpdateSurfaces(
4683     PMOS_COMMAND_BUFFER cmdBuffer)
4684 {
4685     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
4686 
4687     PMHW_KERNEL_STATE                      kernelState  = &m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE];
4688     PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_brcKernelBindingTable[CODECHAL_HEVC_BRC_LCU_UPDATE];
4689     uint32_t startBTI = 0;
4690     CODECHAL_SURFACE_CODEC_PARAMS   surfaceCodecParams;
4691 
4692     if (m_brcEnabled)
4693     {
4694         // BRC History Buffer
4695         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4696             &surfaceCodecParams,
4697             &m_brcBuffers.resBrcHistoryBuffer,
4698             MOS_BYTES_TO_DWORDS(m_brcHistoryBufferSize),
4699             0,
4700             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_HISTORY_ENCODE].Value,
4701             bindingTable->dwBindingTableEntries[startBTI++],
4702             true));
4703         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4704             m_hwInterface,
4705             cmdBuffer,
4706             &surfaceCodecParams,
4707             kernelState));
4708 
4709         // BRC Distortion surface - Intra or Inter
4710         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4711             &surfaceCodecParams,
4712             m_brcDistortion,
4713             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
4714             bindingTable->dwBindingTableEntries[startBTI++],
4715             0,
4716             true));
4717         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4718             m_hwInterface,
4719             cmdBuffer,
4720             &surfaceCodecParams,
4721             kernelState));
4722 
4723         // Pixel MB Statistics surface
4724         CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4725             &surfaceCodecParams,
4726             &m_resMbStatsBuffer,
4727             MOS_BYTES_TO_DWORDS(m_hwInterface->m_avcMbStatBufferSize),
4728             0,
4729             m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_MB_STATS_ENCODE].Value,
4730             bindingTable->dwBindingTableEntries[startBTI++],
4731             false));
4732         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4733             m_hwInterface,
4734             cmdBuffer,
4735             &surfaceCodecParams,
4736             kernelState));
4737     }
4738     else
4739     {
4740         // CQP ROI
4741         startBTI += 3;
4742     }
4743     // MB QP surface
4744     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4745         &surfaceCodecParams,
4746         &m_brcBuffers.sBrcMbQpBuffer,
4747         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MB_QP_CODEC].Value,
4748         bindingTable->dwBindingTableEntries[startBTI++],
4749         0,
4750         true));
4751     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4752         m_hwInterface,
4753         cmdBuffer,
4754         &surfaceCodecParams,
4755         kernelState));
4756 
4757     // ROI surface
4758     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4759         &surfaceCodecParams,
4760         &m_brcBuffers.sBrcRoiSurface,
4761         m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ROI_ENCODE].Value,
4762         bindingTable->dwBindingTableEntries[startBTI++],
4763         0,
4764         false));
4765     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4766         m_hwInterface,
4767         cmdBuffer,
4768         &surfaceCodecParams,
4769         kernelState));
4770 
4771     return eStatus;
4772 }
4773 
GetCustomDispatchPattern(PMHW_WALKER_PARAMS walkerParams,PCODECHAL_WALKER_CODEC_PARAMS walkerCodecParams)4774 MOS_STATUS CodechalEncHevcStateG11::GetCustomDispatchPattern(
4775     PMHW_WALKER_PARAMS              walkerParams,
4776     PCODECHAL_WALKER_CODEC_PARAMS   walkerCodecParams)
4777 {
4778     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
4779 
4780     CODECHAL_ENCODE_CHK_NULL_RETURN(walkerParams);
4781     CODECHAL_ENCODE_CHK_NULL_RETURN(walkerCodecParams);
4782 
4783     MOS_ZeroMemory(walkerParams, sizeof(*walkerParams));
4784 
4785     walkerParams->WalkerMode = (MHW_WALKER_MODE)walkerCodecParams->WalkerMode;
4786 
4787     walkerParams->dwLocalLoopExecCount = 0xFFFF;  //MAX VALUE
4788     walkerParams->dwGlobalLoopExecCount = 0xFFFF;  //MAX VALUE
4789 
4790     // the following code is copied from the kernel ULT
4791     uint32_t maxThreadWidth, maxThreadHeight;
4792     uint32_t threadSpaceWidth, threadSpaceHeight, concurGroupNum, threadScaleV;
4793 
4794     threadSpaceWidth = walkerCodecParams->dwResolutionX;
4795     threadSpaceHeight = walkerCodecParams->dwResolutionY;
4796     maxThreadWidth = threadSpaceWidth;
4797     maxThreadHeight = threadSpaceHeight;
4798     concurGroupNum = m_numberConcurrentGroup;
4799     threadScaleV = m_numberEncKernelSubThread;
4800 
4801     if (concurGroupNum > 1)
4802     {
4803         if (m_degree45Needed)
4804         {
4805             maxThreadWidth  = threadSpaceWidth;
4806             maxThreadHeight = threadSpaceWidth + (threadSpaceWidth + threadSpaceHeight + concurGroupNum - 2) / concurGroupNum;
4807         }
4808         else //for tu4 we ensure threadspace width and height is even or a multiple of 4
4809         {
4810             maxThreadWidth  = (threadSpaceWidth + 1) & 0xfffe; //ensuring width is even
4811             maxThreadHeight = ((threadSpaceWidth + 1) >> 1) + (threadSpaceWidth + 2 * (((threadSpaceHeight + 3) & 0xfffc) - 1) + (2 * concurGroupNum - 1)) / (2 * concurGroupNum);
4812         }
4813         maxThreadHeight *= threadScaleV;
4814         maxThreadHeight += 1;
4815     }
4816     else
4817     {
4818         threadSpaceHeight *= threadScaleV;
4819         maxThreadHeight *= threadScaleV;
4820     }
4821 
4822     uint32_t localLoopExecCount = m_degree45Needed ? (2 * m_numWavefrontInOneRegion + 1):m_numWavefrontInOneRegion;
4823 
4824     eStatus = InitMediaObjectWalker(maxThreadWidth,
4825             maxThreadHeight,
4826             concurGroupNum - 1,
4827             m_swScoreboardState->GetDependencyPattern(),
4828             m_numberEncKernelSubThread - 1,
4829             localLoopExecCount,
4830             *walkerParams);
4831 
4832     return eStatus;
4833 }
4834 
GenerateLcuLevelData(MOS_SURFACE & lcuLevelInputDataSurfaceParam)4835 MOS_STATUS CodechalEncHevcStateG11::GenerateLcuLevelData(MOS_SURFACE &lcuLevelInputDataSurfaceParam)
4836 {
4837     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
4838 
4839     CODECHAL_ENCODE_FUNCTION_ENTER;
4840 
4841     CODECHAL_ENCODE_CHK_NULL_RETURN(m_tileParams);
4842 
4843     uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
4844     uint32_t numTileRows    = m_hevcPicParams->num_tile_rows_minus1 + 1;
4845 
4846     uint32_t shift    = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
4847     uint32_t residual = (1 << shift) - 1;
4848 
4849     uint32_t frameWidthInLcu  = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift;
4850     uint32_t frameHeightInLcu = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift;
4851 
4852     PLCU_LEVEL_DATA* lcuInfo = (PLCU_LEVEL_DATA*)MOS_AllocMemory(sizeof(PLCU_LEVEL_DATA) * frameWidthInLcu);
4853     CODECHAL_ENCODE_CHK_NULL_RETURN(lcuInfo);
4854     for (uint32_t i = 0; i < frameWidthInLcu; i++)
4855     {
4856         lcuInfo[i] = (PLCU_LEVEL_DATA)MOS_AllocMemory(sizeof(LCU_LEVEL_DATA) * frameHeightInLcu);
4857         if (lcuInfo[i] == nullptr)
4858         {
4859             for (uint32_t j = 0; j < i; j++)
4860             {
4861                 MOS_FreeMemory(lcuInfo[j]);
4862             }
4863             MOS_FreeMemory(lcuInfo);
4864             CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr);
4865         }
4866         MOS_ZeroMemory(lcuInfo[i], (sizeof(LCU_LEVEL_DATA) * frameHeightInLcu));
4867     }
4868 
4869     // Tiling case
4870     if (numTileColumns > 1 || numTileRows > 1)
4871     {
4872         // This assumes that the entire Slice is contained within a Tile
4873         for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
4874         {
4875             for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
4876             {
4877                 uint32_t tileId = tileRow * numTileColumns + tileCol;
4878                 MHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 currentTile = m_tileParams[tileId];
4879 
4880                 uint32_t tileColumnWidth = (currentTile.TileWidthInMinCbMinus1 + 1 + residual) >> shift;
4881                 uint32_t tileRowHeight = (currentTile.TileHeightInMinCbMinus1 + 1 + residual) >> shift;
4882 
4883                 for (uint32_t startLCU = 0, sliceStartLcu = 0, slcCount = 0; slcCount < m_numSlices; slcCount++)
4884                 {
4885                     bool lastSliceInTile = false, sliceInTile = false;
4886 
4887                     eStatus = (MOS_STATUS) IsSliceInTile(slcCount,
4888                         &currentTile,
4889                         &sliceInTile,
4890                         &lastSliceInTile);
4891                     if (eStatus != MOS_STATUS_SUCCESS)
4892                     {
4893                         for (uint32_t i = 0; i < frameWidthInLcu; i++)
4894                         {
4895                             MOS_FreeMemory(lcuInfo[i]);
4896                         }
4897                         MOS_FreeMemory(lcuInfo);
4898                         CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4899                     }
4900 
4901                     if (!sliceInTile)
4902                     {
4903                         startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice;
4904                         continue;
4905                     }
4906 
4907                     sliceStartLcu      = m_hevcSliceParams[slcCount].slice_segment_address;
4908                     uint32_t sliceLcuX = sliceStartLcu % frameWidthInLcu;
4909                     uint32_t sliceLcuY = sliceStartLcu / frameWidthInLcu;
4910 
4911                     for (uint32_t i = 0; i < m_hevcSliceParams[slcCount].NumLCUsInSlice; i++)
4912                     {
4913                         lcuInfo[sliceLcuX][sliceLcuY].SliceStartLcuIndex = (uint16_t)startLCU;
4914                         lcuInfo[sliceLcuX][sliceLcuY].SliceEndLcuIndex     = (uint16_t)(startLCU + m_hevcSliceParams[slcCount].NumLCUsInSlice);  // this should be next slice start index
4915                         lcuInfo[sliceLcuX][sliceLcuY].SliceId = (uint16_t)slcCount;
4916                         lcuInfo[sliceLcuX][sliceLcuY].TileId = (uint16_t)tileId;
4917                         lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateX = (uint16_t)currentTile.TileStartLCUX;
4918                         lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateY = (uint16_t)currentTile.TileStartLCUY;
4919                         lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateX = (uint16_t)(currentTile.TileStartLCUX + tileColumnWidth);
4920                         lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateY = (uint16_t)(currentTile.TileStartLCUY + tileRowHeight);
4921 
4922                         sliceLcuX++;
4923 
4924                         if (sliceLcuX >= currentTile.TileStartLCUX + tileColumnWidth)
4925                         {
4926                             sliceLcuX = currentTile.TileStartLCUX;
4927                             sliceLcuY++;
4928                         }
4929                     }
4930                     startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice;
4931                 }
4932             }
4933         }
4934     }
4935     else // non-tiling case
4936     {
4937         for (uint32_t startLCU = 0, sliceStartLcu = 0, slcCount = 0; slcCount < m_numSlices; slcCount++)
4938         {
4939             sliceStartLcu      = m_hevcSliceParams[slcCount].slice_segment_address;
4940             uint32_t sliceLcuX = sliceStartLcu % frameWidthInLcu;
4941             uint32_t sliceLcuY = sliceStartLcu / frameWidthInLcu;
4942 
4943             for (uint32_t i = 0; i < m_hevcSliceParams[slcCount].NumLCUsInSlice; i++)
4944             {
4945                 lcuInfo[sliceLcuX][sliceLcuY].SliceStartLcuIndex = (uint16_t)startLCU;
4946                 lcuInfo[sliceLcuX][sliceLcuY].SliceEndLcuIndex     = (uint16_t)(startLCU + m_hevcSliceParams[slcCount].NumLCUsInSlice);  // this should be next slice start index
4947                 lcuInfo[sliceLcuX][sliceLcuY].SliceId = (uint16_t)slcCount;
4948                 lcuInfo[sliceLcuX][sliceLcuY].TileId = 0;
4949                 lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateX = 0;
4950                 lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateY = 0;
4951                 lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateX = (uint16_t)frameWidthInLcu;
4952                 lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateY = (uint16_t)frameHeightInLcu;
4953 
4954                 sliceLcuX++;
4955 
4956                 if (sliceLcuX >= frameWidthInLcu)
4957                 {
4958                     sliceLcuX = 0;
4959                     sliceLcuY++;
4960                 }
4961             }
4962             startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice;
4963         }
4964     }
4965 
4966     // Write LCU Info to the surface
4967     if (!Mos_ResourceIsNull(&lcuLevelInputDataSurfaceParam.OsResource))
4968     {
4969         MOS_LOCK_PARAMS lockFlags;
4970         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4971         lockFlags.WriteOnly = 1;
4972         PLCU_LEVEL_DATA lcuLevelData = (PLCU_LEVEL_DATA)m_osInterface->pfnLockResource(
4973             m_osInterface,
4974             &lcuLevelInputDataSurfaceParam.OsResource,
4975             &lockFlags);
4976         if (lcuLevelData == nullptr)
4977         {
4978             for (uint32_t i = 0; i < frameWidthInLcu; i++)
4979             {
4980                 MOS_FreeMemory(lcuInfo[i]);
4981             }
4982             MOS_FreeMemory(lcuInfo);
4983             CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr);
4984         }
4985 
4986         uint8_t* dataRowStart = (uint8_t*)lcuLevelData;
4987 
4988         for (uint32_t sliceLcuY = 0; sliceLcuY < frameHeightInLcu; sliceLcuY++)
4989         {
4990             for (uint32_t sliceLcuX = 0; sliceLcuX < frameWidthInLcu; sliceLcuX++)
4991             {
4992                 *(lcuLevelData) = lcuInfo[sliceLcuX][sliceLcuY];
4993 
4994                 if ((sliceLcuX + 1) == frameWidthInLcu)
4995                 {
4996                     dataRowStart += lcuLevelInputDataSurfaceParam.dwPitch;
4997                     lcuLevelData = (PLCU_LEVEL_DATA)dataRowStart;
4998                 }
4999                 else
5000                 {
5001                     lcuLevelData++;
5002                 }
5003             }
5004         }
5005 
5006         m_osInterface->pfnUnlockResource(
5007             m_osInterface,
5008             &lcuLevelInputDataSurfaceParam.OsResource);
5009     }
5010     else
5011     {
5012         eStatus = MOS_STATUS_NULL_POINTER;
5013         CODECHAL_ENCODE_ASSERTMESSAGE("Null pointer exception\n");
5014     }
5015 
5016     // Freeing the temporarily allocated memory
5017     if (lcuInfo)
5018     {
5019         for (uint32_t i = 0; i < frameWidthInLcu; i++)
5020         {
5021             MOS_FreeMemory(lcuInfo[i]);
5022         }
5023         MOS_FreeMemory(lcuInfo);
5024     }
5025     return eStatus;
5026 }
5027 
GenerateConcurrentThreadGroupData()5028 MOS_STATUS CodechalEncHevcStateG11::GenerateConcurrentThreadGroupData()
5029 {
5030     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
5031     uint32_t    curIdx = m_currRecycledBufIdx;
5032 
5033     CODECHAL_ENCODE_FUNCTION_ENTER;
5034 
5035     if (!Mos_ResourceIsNull(&m_encBCombinedBuffer1[curIdx].sResource))
5036     {
5037         MOS_LOCK_PARAMS lockFlags;
5038         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
5039         lockFlags.WriteOnly = 1;
5040         auto *buf = (PMBENC_COMBINED_BUFFER1)m_osInterface->pfnLockResource(
5041             m_osInterface,
5042             &m_encBCombinedBuffer1[curIdx].sResource,
5043             &lockFlags);
5044         CODECHAL_ENCODE_CHK_NULL_RETURN(buf);
5045 
5046         MOS_ZeroMemory(&buf->concurrent, sizeof(buf->concurrent));
5047 
5048         auto concurrentTgData = (PCONCURRENT_THREAD_GROUP_DATA)&buf->concurrent.item[0];
5049 
5050         uint32_t shift    = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
5051         uint32_t residual = (1 << shift) - 1;
5052 
5053         uint32_t frameWidthInLcu  = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift;
5054         uint32_t frameHeightInLcu = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift;
5055 
5056         uint32_t slcCount = 0;
5057         // Currently only using one thread group for each slice. Extend it to multiple soon.
5058         for (uint32_t startLCU = 0; slcCount < m_numSlices; slcCount++, startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice)
5059         {
5060             uint32_t sliceStartLcu  = m_hevcSliceParams[slcCount].slice_segment_address;
5061             uint32_t sliceStartLcuX = sliceStartLcu % frameWidthInLcu;
5062             uint32_t sliceStartLcuY = sliceStartLcu / frameWidthInLcu;
5063 
5064             uint32_t sliceEnd     = (uint16_t)(startLCU + m_hevcSliceParams[slcCount].NumLCUsInSlice);  // this should be next slice start index
5065             uint32_t sliceEndLcuX = sliceStartLcu % frameWidthInLcu;
5066             uint32_t sliceEndLcuY = sliceStartLcu / frameWidthInLcu;
5067 
5068             concurrentTgData->CurrSliceStartLcuX = (uint16_t)sliceStartLcuX;
5069             concurrentTgData->CurrSliceStartLcuY = (uint16_t)sliceStartLcuY;
5070 
5071             concurrentTgData->CurrSliceEndLcuX = (uint16_t)sliceEndLcuX;
5072             concurrentTgData->CurrSliceEndLcuY = (uint16_t)sliceEndLcuY;
5073 
5074             concurrentTgData->CurrTgStartLcuX = (uint16_t)sliceStartLcuX;
5075             concurrentTgData->CurrTgStartLcuY = (uint16_t)sliceStartLcuY;
5076 
5077             concurrentTgData->CurrTgEndLcuX = (uint16_t)sliceEndLcuX;
5078             concurrentTgData->CurrTgEndLcuY = (uint16_t)sliceEndLcuY;
5079         }
5080 
5081         m_osInterface->pfnUnlockResource(
5082             m_osInterface,
5083             &m_encBCombinedBuffer1[curIdx].sResource);
5084     }
5085     else
5086     {
5087         CODECHAL_ENCODE_ASSERTMESSAGE("Null pointer exception\n");
5088         return MOS_STATUS_NULL_POINTER;
5089     }
5090 
5091     return eStatus;
5092 }
5093 
EncodeMbEncKernel(CODECHAL_MEDIA_STATE_TYPE encFunctionType)5094 MOS_STATUS CodechalEncHevcStateG11::EncodeMbEncKernel(
5095     CODECHAL_MEDIA_STATE_TYPE   encFunctionType)
5096 {
5097     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
5098 
5099     PerfTagSetting perfTag;
5100     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_MBENC_KERNEL);
5101 
5102     // Initialize DSH kernel state
5103     PMHW_KERNEL_STATE   kernelState;
5104     CODECHAL_WALKER_CODEC_PARAMS    walkerCodecParams;
5105     CODECHAL_WALKER_DEGREE          walkerDegree;
5106     MHW_WALKER_PARAMS               walkerParams;
5107     uint32_t                        walkerResolutionX, walkerResolutionY;
5108     uint16_t  totalThreadNumPerLcu = 1;
5109 
5110     if (m_hevcPicParams->CodingType == I_TYPE)
5111     {
5112         encFunctionType = CODECHAL_MEDIA_STATE_HEVC_I_MBENC;
5113     }
5114     else
5115     {
5116         encFunctionType = m_isMaxLcu64 ? CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC : CODECHAL_MEDIA_STATE_HEVC_B_MBENC;
5117     }
5118 
5119     if (m_isMaxLcu64)
5120     {
5121         kernelState = &m_mbEncKernelStates[MBENC_LCU64_KRNIDX];
5122         if (m_hevcSeqParams->TargetUsage == 1)
5123         {
5124             walkerResolutionX = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE) >> 6;
5125             walkerResolutionY = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE) >> 6;
5126         }
5127         else
5128         {
5129             walkerResolutionX = 2 * (MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE) >> 6);
5130             walkerResolutionY = 2 * (MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE) >> 6);
5131         }
5132     }
5133     else
5134     {
5135         kernelState       = &m_mbEncKernelStates[MBENC_LCU32_KRNIDX];
5136         walkerResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
5137         walkerResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
5138     }
5139 
5140     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
5141     walkerCodecParams.WalkerMode = m_walkerMode;
5142     walkerCodecParams.dwResolutionX = walkerResolutionX;
5143     walkerCodecParams.dwResolutionY = walkerResolutionY;
5144     walkerCodecParams.dwNumSlices = m_numSlices;
5145     walkerCodecParams.usTotalThreadNumPerLcu = totalThreadNumPerLcu;
5146 
5147     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCustomDispatchPattern(&walkerParams, &walkerCodecParams));
5148 
5149     // If Single Task Phase is not enabled, use BT count for the kernel state.
5150     if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
5151     {
5152         uint32_t maxBtCount = m_singleTaskPhaseSupported ?
5153                               m_maxBtCount : kernelState->KernelParams.iBTCount;
5154         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
5155             m_stateHeapInterface,
5156             maxBtCount));
5157         m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
5158         CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
5159     }
5160 
5161     // Set up the DSH/SSH as normal
5162     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
5163         m_stateHeapInterface,
5164         kernelState,
5165         false,
5166         0,
5167         false,
5168         m_storeData));
5169 
5170     MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
5171     MOS_ZeroMemory(&idParams, sizeof(idParams));
5172     idParams.pKernelState = kernelState;
5173     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
5174         m_stateHeapInterface,
5175         1,
5176         &idParams));
5177 
5178     // Generate Lcu Level Data
5179     CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateLcuLevelData(m_lcuLevelInputDataSurface[m_currRecycledBufIdx]));
5180 
5181     // Generate Concurrent Thread Group Data
5182     if(m_swScoreboardState->GetDependencyPattern() == dependencyWavefront26Degree ||
5183         m_swScoreboardState->GetDependencyPattern() == dependencyWavefront26ZDegree ||
5184         m_swScoreboardState->GetDependencyPattern() == dependencyWavefront26XDegree ||
5185         m_swScoreboardState->GetDependencyPattern() == dependencyWavefront26XDDegree)
5186     {
5187         // Generate Concurrent Thread Group Data
5188         CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateConcurrentThreadGroupData());
5189     }
5190     else
5191     {
5192         // For 45D walking patter, kernel generates the concurrent thread group by itself. No need for driver to generate.
5193     }
5194 
5195     // setup curbe
5196     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeMbEncKernel());
5197 
5198     CODECHAL_DEBUG_TOOL(
5199         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5200         encFunctionType,
5201         MHW_DSH_TYPE,
5202         kernelState));
5203 
5204     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
5205         encFunctionType,
5206         kernelState));
5207     //CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpHEVCMbEncCurbeG11(
5208     //    m_debugInterface,
5209     //    encFunctionType,
5210     //    &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource));  //to do
5211 
5212     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5213         encFunctionType,
5214         MHW_ISH_TYPE,
5215         kernelState));
5216     )
5217 
5218     MOS_COMMAND_BUFFER  cmdBuffer;
5219     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
5220 
5221     SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
5222     sendKernelCmdsParams.EncFunctionType = encFunctionType;
5223     sendKernelCmdsParams.pKernelState = kernelState;
5224     // TO DO : Remove scoreboard from VFE STATE Command
5225     sendKernelCmdsParams.bEnableCustomScoreBoard = false;
5226     sendKernelCmdsParams.pCustomScoreBoard = nullptr;
5227     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
5228 
5229     // Add binding table
5230     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
5231         m_stateHeapInterface,
5232         kernelState));
5233 
5234     // send surfaces
5235     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMbEncSurfacesKernel(&cmdBuffer));
5236 
5237     CODECHAL_DEBUG_TOOL(
5238         if (m_pictureCodingType == I_TYPE)
5239         {
5240             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
5241                 &m_lcuLevelInputDataSurface[m_currRecycledBufIdx],
5242                 CodechalDbgAttr::attrOutput,
5243                 "HEVC_I_MBENC_LcuLevelData_In",
5244                 CODECHAL_MEDIA_STATE_HEVC_I_MBENC));
5245         }
5246         else
5247         {
5248             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
5249                 &m_lcuLevelInputDataSurface[m_currRecycledBufIdx],
5250                 CodechalDbgAttr::attrOutput,
5251                 "HEVC_B_MBENC_LcuLevelData_In",
5252                 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
5253         }
5254     )
5255 
5256     if ((encFunctionType == CODECHAL_MEDIA_STATE_HEVC_B_MBENC) || (encFunctionType == CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC))
5257     {
5258         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5259             &m_encConstantTableForB.sResource,
5260             CodechalDbgAttr::attrOutput,
5261             "HEVC_B_MBENC_ConstantData_In",
5262             m_encConstantTableForB.dwSize,
5263             0,
5264             encFunctionType)));
5265     }
5266 
5267     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObjectWalkerCmd(
5268         &cmdBuffer,
5269         &walkerParams));
5270 
5271     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
5272 
5273     // Add dump for MBEnc surface state heap here
5274     CODECHAL_DEBUG_TOOL(
5275     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5276         encFunctionType,
5277         MHW_SSH_TYPE,
5278         kernelState));
5279     )
5280 
5281     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
5282         m_stateHeapInterface,
5283         kernelState));
5284 
5285     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5286     {
5287         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
5288             m_stateHeapInterface));
5289         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(
5290             &cmdBuffer,
5291             nullptr));
5292     }
5293 
5294     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
5295         &cmdBuffer,
5296         encFunctionType,
5297         nullptr)));
5298 
5299     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
5300 
5301     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
5302 
5303     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5304     {
5305         m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
5306         m_lastTaskInPhase = false;
5307     }
5308 
5309     CODECHAL_DEBUG_TOOL(
5310         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5311             &m_debugSurface[0].sResource,
5312             CodechalDbgAttr::attrOutput,
5313             "DebugDataSurface_Out0",
5314             m_debugSurface[0].dwSize,
5315             0,
5316             encFunctionType));
5317         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5318             &m_debugSurface[1].sResource,
5319             CodechalDbgAttr::attrOutput,
5320             "DebugDataSurface_Out1",
5321             m_debugSurface[1].dwSize,
5322             0,
5323             encFunctionType));
5324         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5325             &m_debugSurface[2].sResource,
5326             CodechalDbgAttr::attrOutput,
5327             "DebugDataSurface_Out2",
5328             m_debugSurface[2].dwSize,
5329             0,
5330             encFunctionType));
5331         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5332             &m_debugSurface[3].sResource,
5333             CodechalDbgAttr::attrOutput,
5334             "DebugDataSurface_Out3",
5335             m_debugSurface[3].dwSize,
5336             0,
5337             encFunctionType));
5338     );
5339 
5340 #if 0 // the dump should be done in the GetStatusReport. However, if ENC causes PAK hangs-up, there is no way to get them.
5341     {
5342         CODECHAL_DEBUG_TOOL(
5343             CODEC_REF_LIST      currRefList;
5344 
5345         m_currRefList = (m_refList[m_currReconstructedPic.FrameIdx]);
5346         m_currRefList->RefPic = m_currOriginalPic;
5347 
5348         m_debugInterface->m_currPic = m_currOriginalPic;
5349         m_debugInterface->m_bufferDumpFrameNum = m_storeData;
5350         m_debugInterface->m_frameType = m_pictureCodingType;
5351 
5352         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5353             &m_currRefList->resRefMbCodeBuffer,
5354             CodechalDbgAttr::attrOutput,
5355             "MbCode",
5356             m_picWidthInMb * m_frameFieldHeightInMb * 64,
5357             CodecHal_PictureIsBottomField(m_currRefList->RefPic) ? m_frameFieldHeightInMb * m_picWidthInMb * 64 : 0,
5358             (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
5359             CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
5360 
5361         if (m_mvDataSize)
5362         {
5363             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5364                 &currRefList.resRefMvDataBuffer,
5365                 CodechalDbgAttr::attrOutput,
5366                 "MbData",
5367                 m_picWidthInMb * m_frameFieldHeightInMb * (32 * 4),
5368                 CodecHal_PictureIsBottomField(currRefList.RefPic) ? MOS_ALIGN_CEIL(m_frameFieldHeightInMb * m_picWidthInMb * (32 * 4), 0x1000) : 0,
5369                 (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
5370                 CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
5371         }
5372 
5373         if (CodecHalIsFeiEncode(m_codecFunction))
5374         {
5375             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5376                 &m_resDistortionBuffer,
5377                 CodechalDbgAttr::attrOutput,
5378                 "DistortionSurf",
5379                 m_picWidthInMb * m_frameFieldHeightInMb * 48,
5380                 CodecHal_PictureIsBottomField(currRefList.RefPic) ? MOS_ALIGN_CEIL(m_frameFieldHeightInMb * m_picWidthInMb * 48, 0x1000) : 0,
5381                 (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
5382                 CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
5383         }
5384 
5385         )
5386 
5387        CODECHAL_DEBUG_TOOL(
5388             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
5389                 this,
5390                 &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
5391                 m_encBCombinedBuffer2[m_currRecycledBufIdx].dwSize,
5392                 (const char*)"_Hevc_CombinedBuffer2",
5393                 false));
5394         );
5395 
5396         // Dump SW scoreboard surface - Output of MBENC
5397         CODECHAL_DEBUG_TOOL(
5398             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpHevcEncodeSwScoreboardSurface(
5399                 m_debugInterface,
5400                 m_swScoreboardState->GetCurSwScoreboardSurface(), false));
5401         );
5402 
5403         CODECHAL_DEBUG_TOOL(
5404             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
5405                 this,
5406                 &m_encConstantTableForB.sResource,
5407                 m_encConstantTableForB.dwSize,
5408                 (const char*)"_Hevc_EncConstantTable",
5409                 true));
5410         );
5411 
5412         CODECHAL_DEBUG_TOOL(
5413             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
5414                 this,
5415                 &m_debugSurface[0].sResource,
5416                 m_debugSurface[0].dwSize,
5417                 (const char*)"_Hevc_DebugDump0",
5418                 true));
5419         );
5420 
5421         CODECHAL_DEBUG_TOOL(
5422             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
5423                 this,
5424                 &m_debugSurface[1].sResource,
5425                 m_debugSurface[1].dwSize,
5426                 (const char*)"_Hevc_DebugDump1",
5427                 true));
5428         );
5429 
5430         CODECHAL_DEBUG_TOOL(
5431             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
5432                 this,
5433                 &m_debugSurface[2].sResource,
5434                 m_debugSurface[2].dwSize,
5435                 (const char*)"_Hevc_DebugDump2",
5436                 true));
5437         );
5438 
5439         CODECHAL_DEBUG_TOOL(
5440             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
5441                 this,
5442                 &m_debugSurface[3].sResource,
5443                 m_debugSurface[3].dwSize,
5444                 (const char*)"_Hevc_DebugDump3",
5445                 true));
5446         );
5447 
5448         CODECHAL_DEBUG_TOOL(
5449         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
5450             &m_currPicWithReconBoundaryPix,
5451             CodechalDbgAttr::attrReconstructedSurface,
5452             "ReconSurf")));
5453     }
5454 #endif
5455 
5456     return eStatus;
5457 }
5458 
EncodeBrcInitResetKernel()5459 MOS_STATUS CodechalEncHevcStateG11::EncodeBrcInitResetKernel()
5460 {
5461     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
5462 
5463     CODECHAL_ENCODE_FUNCTION_ENTER;
5464 
5465     CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
5466 
5467     CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx = m_brcInit ? CODECHAL_HEVC_BRC_INIT : CODECHAL_HEVC_BRC_RESET;
5468 
5469     // Initialize DSH kernel state
5470     PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
5471 
5472     // If Single Task Phase is not enabled, use BT count for the kernel state.
5473     if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
5474     {
5475         uint32_t maxBtCount = m_singleTaskPhaseSupported ?
5476             m_maxBtCount : kernelState->KernelParams.iBTCount;
5477         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
5478             m_stateHeapInterface,
5479             maxBtCount));
5480         m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
5481         CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
5482     }
5483 
5484     // Set up the DSH/SSH as normal
5485     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
5486         m_stateHeapInterface,
5487         kernelState,
5488         false,
5489         0,
5490         false,
5491         m_storeData));
5492 
5493     MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
5494     MOS_ZeroMemory(&idParams, sizeof(idParams));
5495     idParams.pKernelState = kernelState;
5496     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
5497         m_stateHeapInterface,
5498         1,
5499         &idParams));
5500 
5501     // Setup curbe for BrcInitReset kernel
5502     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeBrcInitReset(
5503         brcKrnIdx));
5504 
5505     CODECHAL_MEDIA_STATE_TYPE   encFunctionType = CODECHAL_MEDIA_STATE_BRC_INIT_RESET;
5506     CODECHAL_DEBUG_TOOL(
5507         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5508             encFunctionType,
5509             MHW_DSH_TYPE,
5510             kernelState));
5511     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
5512         encFunctionType,
5513         kernelState));
5514     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5515         encFunctionType,
5516         MHW_ISH_TYPE,
5517         kernelState));
5518     )
5519 
5520     MOS_COMMAND_BUFFER cmdBuffer;
5521     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
5522 
5523     SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
5524     sendKernelCmdsParams.EncFunctionType = encFunctionType;
5525     sendKernelCmdsParams.pKernelState = kernelState;
5526     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
5527 
5528     // Add binding table
5529     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
5530         m_stateHeapInterface,
5531         kernelState));
5532 
5533     // Send surfaces for BrcInitReset Kernel
5534     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendBrcInitResetSurfaces(&cmdBuffer, brcKrnIdx));
5535 
5536     MHW_MEDIA_OBJECT_PARAMS mediaObjectParams;
5537     MOS_ZeroMemory(&mediaObjectParams, sizeof(mediaObjectParams));
5538 
5539     MediaObjectInlineData mediaObjectInlineData;
5540     MOS_ZeroMemory(&mediaObjectInlineData, sizeof(mediaObjectInlineData));
5541     mediaObjectParams.pInlineData = &mediaObjectInlineData;
5542     mediaObjectParams.dwInlineDataSize = sizeof(mediaObjectInlineData);
5543     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObject(
5544         &cmdBuffer,
5545         nullptr,
5546         &mediaObjectParams));
5547 
5548     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
5549 
5550     // Add dump for BrcInitReset surface state heap here
5551     CODECHAL_DEBUG_TOOL(
5552         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5553             encFunctionType,
5554             MHW_SSH_TYPE,
5555             kernelState));
5556     )
5557 
5558     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
5559         m_stateHeapInterface,
5560         kernelState));
5561     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5562     {
5563         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
5564             m_stateHeapInterface));
5565         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(
5566             &cmdBuffer,
5567             nullptr));
5568     }
5569 
5570     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
5571         &cmdBuffer,
5572         encFunctionType,
5573         nullptr)));
5574 
5575     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
5576 
5577     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
5578 
5579     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5580     {
5581         m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
5582         m_lastTaskInPhase = false;
5583     }
5584 
5585     return eStatus;
5586 }
5587 
EncodeBrcFrameUpdateKernel()5588 MOS_STATUS CodechalEncHevcStateG11::EncodeBrcFrameUpdateKernel()
5589 {
5590     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5591 
5592     CODECHAL_ENCODE_FUNCTION_ENTER;
5593 
5594     PerfTagSetting perfTag;
5595     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE);
5596 
5597     CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx = CODECHAL_HEVC_BRC_FRAME_UPDATE;
5598 
5599     // Initialize DSH kernel state
5600     PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
5601 
5602     // If Single Task Phase is not enabled, use BT count for the kernel state.
5603     if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
5604     {
5605         uint32_t maxBtCount = m_singleTaskPhaseSupported ?
5606             m_maxBtCount : kernelState->KernelParams.iBTCount;
5607         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
5608             m_stateHeapInterface,
5609             maxBtCount));
5610         m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
5611         CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
5612     }
5613 
5614     // Set up the DSH/SSH as normal
5615     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
5616         m_stateHeapInterface,
5617         kernelState,
5618         false,
5619         0,
5620         false,
5621         m_storeData));
5622 
5623     MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
5624     MOS_ZeroMemory(&idParams, sizeof(idParams));
5625     idParams.pKernelState = kernelState;
5626     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
5627         m_stateHeapInterface,
5628         1,
5629         &idParams));
5630 
5631     // Setup curbe for BrcFrameUpdate kernel
5632     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeBrcUpdate(
5633         brcKrnIdx));
5634 
5635     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_BRC_UPDATE;
5636     CODECHAL_DEBUG_TOOL(
5637         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5638             encFunctionType,
5639             MHW_DSH_TYPE,
5640             kernelState));
5641     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
5642         encFunctionType,
5643         kernelState));
5644     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5645         encFunctionType,
5646         MHW_ISH_TYPE,
5647         kernelState));
5648     )
5649 
5650     MOS_COMMAND_BUFFER cmdBuffer;
5651     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
5652 
5653     SendKernelCmdsParams sendKernelCmdsParams;
5654     sendKernelCmdsParams = SendKernelCmdsParams();
5655     sendKernelCmdsParams.EncFunctionType = encFunctionType;
5656     sendKernelCmdsParams.pKernelState = kernelState;
5657     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
5658 
5659     // Add binding table
5660     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
5661         m_stateHeapInterface,
5662         kernelState));
5663 
5664     // Send surfaces for BrcFrameUpdate Kernel
5665     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendBrcFrameUpdateSurfaces(&cmdBuffer));
5666 
5667     MHW_MEDIA_OBJECT_PARAMS mediaObjectParams;
5668     MOS_ZeroMemory(&mediaObjectParams, sizeof(mediaObjectParams));
5669 
5670     MediaObjectInlineData mediaObjectInlineData;
5671     MOS_ZeroMemory(&mediaObjectInlineData, sizeof(mediaObjectInlineData));
5672     mediaObjectParams.pInlineData = &mediaObjectInlineData;
5673     mediaObjectParams.dwInlineDataSize = sizeof(mediaObjectInlineData);
5674     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObject(
5675         &cmdBuffer,
5676         nullptr,
5677         &mediaObjectParams));
5678 
5679     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
5680 
5681     // Add dump for BrcFrameUpdate surface state heap here
5682     CODECHAL_DEBUG_TOOL(
5683         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5684             encFunctionType,
5685             MHW_SSH_TYPE,
5686             kernelState));
5687     )
5688 
5689     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
5690         m_stateHeapInterface,
5691         kernelState));
5692     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5693     {
5694         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
5695             m_stateHeapInterface));
5696         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(
5697             &cmdBuffer,
5698             nullptr));
5699     }
5700 
5701     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
5702         &cmdBuffer,
5703         encFunctionType,
5704         nullptr)));
5705 
5706     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
5707 
5708     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
5709 
5710     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5711     {
5712         m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
5713         m_lastTaskInPhase = false;
5714     }
5715 
5716     return eStatus;
5717 }
5718 
EncodeBrcLcuUpdateKernel()5719 MOS_STATUS CodechalEncHevcStateG11::EncodeBrcLcuUpdateKernel()
5720 {
5721     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5722 
5723     CODECHAL_ENCODE_FUNCTION_ENTER;
5724 
5725     PerfTagSetting perfTag;
5726     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE_LCU);
5727 
5728     CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx = CODECHAL_HEVC_BRC_LCU_UPDATE;
5729 
5730     // Initialize DSH kernel state
5731     PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
5732 
5733     // If Single Task Phase is not enabled, use BT count for the kernel state.
5734     if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
5735     {
5736         uint32_t maxBtCount = m_singleTaskPhaseSupported ?
5737             m_maxBtCount : kernelState->KernelParams.iBTCount;
5738         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
5739             m_stateHeapInterface,
5740             maxBtCount));
5741         m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
5742         CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
5743     }
5744 
5745     // Set up the DSH/SSH as normal
5746     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
5747         m_stateHeapInterface,
5748         kernelState,
5749         false,
5750         0,
5751         false,
5752         m_storeData));
5753 
5754     MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
5755     MOS_ZeroMemory(&idParams, sizeof(idParams));
5756     idParams.pKernelState = kernelState;
5757     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
5758         m_stateHeapInterface,
5759         1,
5760         &idParams));
5761 
5762     // Setup curbe for BrcFrameUpdate kernel
5763     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeBrcUpdate(
5764         brcKrnIdx));
5765 
5766     CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_MB_BRC_UPDATE;
5767 
5768     CODECHAL_DEBUG_TOOL(
5769         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5770             encFunctionType,
5771             MHW_DSH_TYPE,
5772             kernelState));
5773     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
5774         encFunctionType,
5775         kernelState));
5776     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5777         encFunctionType,
5778         MHW_ISH_TYPE,
5779         kernelState));
5780     )
5781 
5782     MOS_COMMAND_BUFFER  cmdBuffer;
5783     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
5784 
5785     SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
5786     sendKernelCmdsParams.EncFunctionType = encFunctionType;
5787     sendKernelCmdsParams.pKernelState = kernelState;
5788     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
5789 
5790     // Add binding table
5791     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
5792         m_stateHeapInterface,
5793         kernelState));
5794 
5795     if (m_hevcPicParams->NumROI)
5796     {
5797         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupROISurface());
5798     }
5799 
5800     // Send surfaces for BrcFrameUpdate Kernel
5801     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendBrcLcuUpdateSurfaces(&cmdBuffer));
5802 
5803     // Program Media walker
5804     uint32_t   resolutionX, resolutionY;
5805     resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth);
5806     resolutionX = MOS_ROUNDUP_SHIFT(resolutionX, 4);
5807     resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight);
5808     resolutionY = MOS_ROUNDUP_SHIFT(resolutionY, 3);
5809     CODECHAL_ENCODE_ASSERTMESSAGE("LucBRC thread space = %d x %d", resolutionX, resolutionY);
5810 
5811     MHW_WALKER_PARAMS   walkerParams;
5812     MOS_ZeroMemory(&walkerParams, sizeof(walkerParams));
5813 
5814     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
5815     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
5816     walkerCodecParams.WalkerMode = m_walkerMode;
5817     walkerCodecParams.dwResolutionX = resolutionX;
5818     walkerCodecParams.dwResolutionY = resolutionY;
5819     walkerCodecParams.bNoDependency = true;
5820     walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported;
5821     walkerCodecParams.ucGroupId = m_groupId;
5822     walkerCodecParams.wPictureCodingType = m_pictureCodingType;
5823     walkerCodecParams.bUseScoreboard = false;
5824 
5825     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
5826         m_hwInterface,
5827         &walkerParams,
5828         &walkerCodecParams));
5829 
5830     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObjectWalkerCmd(
5831         &cmdBuffer,
5832         &walkerParams));
5833 
5834     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
5835 
5836     // Add dump for BrcFrameUpdate surface state heap here
5837     CODECHAL_DEBUG_TOOL(
5838         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5839             encFunctionType,
5840             MHW_SSH_TYPE,
5841             kernelState));
5842     )
5843 
5844     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
5845         m_stateHeapInterface,
5846         kernelState));
5847     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5848     {
5849         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
5850             m_stateHeapInterface));
5851         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(
5852             &cmdBuffer,
5853             nullptr));
5854     }
5855 
5856     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
5857         &cmdBuffer,
5858         encFunctionType,
5859         nullptr)));
5860 
5861     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
5862 
5863     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
5864 
5865     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5866     {
5867         m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
5868         m_lastTaskInPhase = false;
5869     }
5870 
5871     return eStatus;
5872 }
5873 
EncodeKernelFunctions()5874 MOS_STATUS CodechalEncHevcStateG11::EncodeKernelFunctions()
5875 {
5876     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
5877     typedef void                        (* pAppCallBack)();
5878 
5879     if (m_pakOnlyTest)
5880     {
5881         // Skip ENC when PAK only mode is enabled
5882         return eStatus;
5883     }
5884 
5885     if (m_pictureCodingType == P_TYPE)
5886     {
5887         m_lowDelay = true;
5888     }
5889 
5890     if (m_hevcPicParams->bUsedAsRef || m_brcEnabled)
5891     {
5892         m_currRefSync = &m_refSync[m_currMbCodeIdx];
5893 
5894         // Check if the signal obj has been used before
5895         if (!m_hevcSeqParams->ParallelBRC && (m_currRefSync->uiSemaphoreObjCount || m_currRefSync->bInUsed))
5896         {
5897             MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
5898             syncParams.GpuContext = m_renderContext;
5899             syncParams.presSyncResource = &m_currRefSync->resSyncObject;
5900             syncParams.uiSemaphoreCount = m_currRefSync->uiSemaphoreObjCount;
5901 
5902             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
5903             m_currRefSync->uiSemaphoreObjCount = 0;
5904             m_currRefSync->bInUsed             = false;
5905         }
5906     }
5907     else
5908     {
5909         m_currRefSync = nullptr;
5910     }
5911 
5912     //Reset to use a different performance tag ID
5913     m_osInterface->pfnResetPerfBufferID(m_osInterface);
5914 
5915     m_firstTaskInPhase = true;
5916     m_lastTaskInPhase = false;
5917 
5918     m_brcInputForEncKernelBuffer = &m_encBCombinedBuffer2[m_currRecycledBufIdx];
5919 
5920     // BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface
5921     // BRC init is called once even for CQP mode when ROI is enabled, hence also checking for first frame flag
5922     if ((m_brcEnabled && (m_brcInit || m_brcReset)) || (m_firstFrame && m_hevcPicParams->NumROI))
5923     {
5924         m_firstTaskInPhase = m_lastTaskInPhase = true;
5925         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcInitResetKernel());
5926         m_brcInit = m_brcReset = false;
5927     }
5928 
5929     m_firstTaskInPhase = true;
5930     m_lastTaskInPhase = false;
5931 
5932     CodechalEncodeSwScoreboard::KernelParams swScoreboardKernelParames;
5933     MOS_ZeroMemory(&swScoreboardKernelParames, sizeof(swScoreboardKernelParames));
5934     // SW scoreboard Kernel Call -- to be continued - DS + HME kernel call
5935     swScoreboardKernelParames.isHevc = false; // can be set to false. Need to enabled only for an optimization which is not needed for now
5936 
5937     m_degree45Needed = true;
5938     if (m_hevcSeqParams->TargetUsage == 1)
5939     {
5940         m_numberConcurrentGroup = MOS_MIN(m_maxWavefrontsforTU1, m_numberConcurrentGroup);
5941         // m_numberConcurrentGroup should  default to 2 here for TU1. the only other value allowed from reg key will be 1
5942         m_degree45Needed = false;
5943     }
5944 
5945     DecideConcurrentGroupAndWaveFrontNumber();
5946 
5947     DependencyPattern walkPattern;
5948     if (m_hevcSeqParams->TargetUsage == 1)
5949     {
5950         if (m_isMaxLcu64)
5951         {
5952             walkPattern = m_numberConcurrentGroup == 1 ? dependencyWavefront26XDegreeAlt:dependencyWavefront26XDDegree;
5953         }
5954         else
5955         {
5956             walkPattern = m_numberConcurrentGroup == 1 ? dependencyWavefront26Degree:dependencyWavefront26DDegree;
5957         }
5958     }
5959     else if (m_hevcSeqParams->TargetUsage == 4)
5960     {
5961         walkPattern = m_numberConcurrentGroup == 1 ? dependencyWavefront45Degree:dependencyWavefront45DDegree;
5962     }
5963     else
5964     {
5965         walkPattern = dependencyWavefront45DDegree;
5966     }
5967     m_swScoreboardState->SetDependencyPattern(walkPattern);
5968 
5969     if (m_isMaxLcu64)
5970     {
5971         if (m_hevcSeqParams->TargetUsage == 1)
5972         {
5973             swScoreboardKernelParames.scoreboardWidth = (m_widthAlignedMaxLcu >> 6);
5974             swScoreboardKernelParames.scoreboardHeight = (m_heightAlignedMaxLcu >> 6) * m_numberEncKernelSubThread;
5975         }
5976         else
5977         {
5978             swScoreboardKernelParames.scoreboardWidth =  2*(m_widthAlignedMaxLcu >> 6);
5979             swScoreboardKernelParames.scoreboardHeight = 2*(m_heightAlignedMaxLcu >> 6);
5980         }
5981         swScoreboardKernelParames.numberOfWaveFrontSplit = m_numberConcurrentGroup;
5982         swScoreboardKernelParames.numberOfChildThread = m_numberEncKernelSubThread - 1; // child thread number is minus one of the total sub-thread for the main thread takes one.
5983 
5984     }
5985     else
5986     {
5987         swScoreboardKernelParames.scoreboardWidth          = 4*(m_widthAlignedLcu32 >> 5);
5988         swScoreboardKernelParames.scoreboardHeight         = m_heightAlignedLcu32 >> 5;
5989         swScoreboardKernelParames.numberOfWaveFrontSplit   = m_numberConcurrentGroup;
5990         swScoreboardKernelParames.numberOfChildThread      = 0;
5991     }
5992     swScoreboardKernelParames.swScoreboardSurfaceWidth  = swScoreboardKernelParames.scoreboardWidth;
5993     swScoreboardKernelParames.swScoreboardSurfaceHeight = swScoreboardKernelParames.scoreboardHeight;
5994 
5995     m_swScoreboardState->SetCurSwScoreboardSurfaceIndex(m_currRecycledBufIdx);
5996 
5997     swScoreboardKernelParames.lcuInfoSurface = &m_lcuLevelInputDataSurface[m_currRecycledBufIdx];
5998 
5999     if(m_useSwInitScoreboard)
6000     {
6001         SetupSwScoreBoard(&swScoreboardKernelParames);
6002     }
6003     else
6004     {
6005         // Call SW scoreboard Init kernel used by MBEnc kernel
6006         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_swScoreboardState->Execute(&swScoreboardKernelParames));
6007     }
6008 
6009     // Dump SW scoreboard surface - Output of SW scoreboard Init Kernel and Input to MBENC
6010        CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
6011         m_swScoreboardState->GetCurSwScoreboardSurface(),
6012         CodechalDbgAttr::attrInput,
6013         "InitSWScoreboard_In",
6014         CODECHAL_MEDIA_STATE_SW_SCOREBOARD_INIT)));
6015 
6016     // Csc, Downscaling, and/or 10-bit to 8-bit conversion
6017     CODECHAL_ENCODE_CHK_NULL_RETURN(m_cscDsState);
6018 
6019     CodechalEncodeCscDs::KernelParams cscScalingKernelParams;
6020     MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams));
6021     cscScalingKernelParams.bLastTaskInPhaseCSC =
6022         cscScalingKernelParams.bLastTaskInPhase4xDS = !(m_16xMeSupported || m_hmeEnabled || m_brcEnabled);
6023     cscScalingKernelParams.bLastTaskInPhase16xDS    = !(m_32xMeSupported || m_hmeEnabled || m_brcEnabled);
6024     cscScalingKernelParams.bLastTaskInPhase32xDS    = !(m_hmeEnabled || m_brcEnabled);
6025 
6026     CodechalEncodeCscDsG11::HevcExtKernelParams hevcExtCscParams;
6027     MOS_ZeroMemory(&hevcExtCscParams, sizeof(hevcExtCscParams));
6028 
6029     if (m_isMaxLcu64)
6030     {
6031         hevcExtCscParams.bHevcEncHistorySum             = true;
6032         hevcExtCscParams.bUseLCU32                      = false;
6033         hevcExtCscParams.presHistoryBuffer              = &m_encBCombinedBuffer2[m_lastRecycledBufIdx].sResource;
6034         hevcExtCscParams.dwSizeHistoryBuffer            = m_historyOutBufferSize;
6035         hevcExtCscParams.dwOffsetHistoryBuffer          = m_historyOutBufferOffset;
6036         hevcExtCscParams.presHistorySumBuffer           = &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource;
6037         hevcExtCscParams.dwSizeHistorySumBuffer         = sizeof(MBENC_COMBINED_BUFFER2::ucHistoryInBuffer);
6038         hevcExtCscParams.dwOffsetHistorySumBuffer       = sizeof(MBENC_COMBINED_BUFFER2::ucBrcCombinedEncBuffer);
6039         hevcExtCscParams.presMultiThreadTaskBuffer      = &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource;
6040         hevcExtCscParams.dwSizeMultiThreadTaskBuffer    = m_threadTaskBufferSize;
6041         hevcExtCscParams.dwOffsetMultiThreadTaskBuffer  = m_threadTaskBufferOffset;
6042         cscScalingKernelParams.hevcExtParams            = &hevcExtCscParams;
6043     }
6044     else
6045     {
6046         cscScalingKernelParams.hevcExtParams           = nullptr; // LCU32 does not require history buffers
6047     }
6048 
6049     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->KernelFunctions(&cscScalingKernelParams));
6050 
6051     if (m_hmeEnabled)
6052     {
6053         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel());
6054     }
6055     else if (m_brcEnabled && m_hevcPicParams->CodingType == I_TYPE)
6056     {
6057         m_lastTaskInPhase = true;
6058 
6059         CodechalKernelIntraDist::CurbeParam curbeParam;
6060         curbeParam.downScaledWidthInMb4x = m_downscaledWidthInMb4x;
6061         curbeParam.downScaledHeightInMb4x = m_downscaledHeightInMb4x;
6062 
6063         CodechalKernelIntraDist::SurfaceParams surfaceParam;
6064         surfaceParam.input4xDsSurface =
6065         surfaceParam.input4xDsVmeSurface = m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
6066         surfaceParam.intraDistSurface           = m_brcDistortion;
6067         surfaceParam.intraDistBottomFieldOffset = m_brcBuffers.dwMeBrcDistortionBottomFieldOffset;
6068         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_intraDistKernel->Execute(curbeParam, surfaceParam));
6069     }
6070 
6071     // BRC + MbEnc in second task phase
6072     m_firstTaskInPhase = true;
6073     m_lastTaskInPhase = false;
6074 
6075     // Wait for PAK if necessary
6076     CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitForPak());
6077 
6078     // ROI uses the BRC LCU update kernel, even in CQP.  So we will call it
6079     if (m_hevcPicParams->NumROI && !m_brcEnabled)
6080     {
6081         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcLcuUpdateKernel());
6082         m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame;
6083 
6084         CODECHAL_DEBUG_TOOL(
6085             if (!Mos_ResourceIsNull(&m_brcBuffers.sBrcMbQpBuffer.OsResource)) {
6086                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6087                     &m_brcBuffers.sBrcMbQpBuffer.OsResource,
6088                     CodechalDbgAttr::attrOutput,
6089                     "MbQp",
6090                     m_brcBuffers.sBrcMbQpBuffer.dwPitch * m_brcBuffers.sBrcMbQpBuffer.dwHeight,
6091                     m_brcBuffers.dwBrcMbQpBottomFieldOffset,
6092                     CODECHAL_MEDIA_STATE_BRC_UPDATE));
6093             }
6094             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6095                 &m_brcDistortion->OsResource,
6096                 CodechalDbgAttr::attrInput,
6097                 "BrcDist_AfterLcuBrc",
6098                 m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
6099                 m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
6100                 CODECHAL_MEDIA_STATE_BRC_UPDATE));)
6101     }
6102 
6103     if (m_brcEnabled)
6104     {
6105         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcFrameUpdateKernel());
6106         CODECHAL_DEBUG_TOOL(
6107             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6108                 &m_brcDistortion->OsResource,
6109                 CodechalDbgAttr::attrInput,
6110                 "BrcDist_AfterFrameBrc",
6111                 m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
6112                 m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
6113                 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6114             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6115                 &m_brcBuffers.resBrcHistoryBuffer,
6116                 CodechalDbgAttr::attrOutput,
6117                 "HistoryWrite",
6118                 m_brcHistoryBufferSize,
6119                 0,
6120                 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6121             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6122                 &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
6123                 CodechalDbgAttr::attrOutput,
6124                 "ImgStateWrite",
6125                 BRC_IMG_STATE_SIZE_PER_PASS_G11 * m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(),
6126                 0,
6127                 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6128         )
6129 
6130         if (m_lcuBrcEnabled || m_hevcPicParams->NumROI)
6131         {
6132             // LCU-based BRC needs to have frame-based one to be call first in order to get HCP_IMG_STATE command result
6133             CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcLcuUpdateKernel());
6134             m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame;
6135         }
6136         else
6137         {
6138             m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame;
6139         }
6140 
6141         CODECHAL_DEBUG_TOOL(
6142             if (!Mos_ResourceIsNull(&m_brcBuffers.sBrcMbQpBuffer.OsResource))
6143             {
6144                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6145                     &m_brcBuffers.sBrcMbQpBuffer.OsResource,
6146                     CodechalDbgAttr::attrOutput,
6147                     "MbQp",
6148                     m_brcBuffers.sBrcMbQpBuffer.dwPitch * m_brcBuffers.sBrcMbQpBuffer.dwHeight,
6149                     m_brcBuffers.dwBrcMbQpBottomFieldOffset,
6150                     CODECHAL_MEDIA_STATE_BRC_UPDATE));
6151             }
6152             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6153                 &m_brcDistortion->OsResource,
6154                 CodechalDbgAttr::attrInput,
6155                 "BrcDist_AfterLcuBrc",
6156                 m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
6157                 m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
6158                 CODECHAL_MEDIA_STATE_BRC_UPDATE));)
6159     }
6160 
6161     m_useWeightedSurfaceForL0 = false;
6162     m_useWeightedSurfaceForL1 = false;
6163 
6164     //currently only support same weightoffset for all slices, and only support Luma weighted prediction
6165     auto slicetype = m_hevcSliceParams->slice_type;
6166     if (m_weightedPredictionSupported && !m_feiEnable &&
6167         ((slicetype == CODECHAL_HEVC_P_SLICE && m_hevcPicParams->weighted_pred_flag) ||
6168             (slicetype == CODECHAL_HEVC_B_SLICE && m_hevcPicParams->weighted_bipred_flag)))
6169     {
6170         uint32_t LumaWeightFlag[2] = {0}; //[L0, L1]
6171         CodechalEncodeWP::SliceParams sliceWPParams;
6172         memset((void *)&sliceWPParams, 0, sizeof(sliceWPParams));
6173 
6174         //populate the slice WP parameter structure
6175         sliceWPParams.luma_log2_weight_denom = m_hevcSliceParams->luma_log2_weight_denom;  // luma weidht denom
6176         for (auto i = 0; i < 2; i++)
6177         {
6178             for (auto j = 0; j < CODEC_MAX_NUM_REF_FRAME_HEVC; j++)
6179             {
6180                 sliceWPParams.weights[i][j][0][0] = (1 << m_hevcSliceParams->luma_log2_weight_denom) +
6181                                                     m_hevcSliceParams->delta_luma_weight[i][j];  //Luma weight
6182                 sliceWPParams.weights[i][j][0][1] = m_hevcSliceParams->luma_offset[i][j];        //Luma offset
6183 
6184                 if (m_hevcSliceParams->delta_luma_weight[i][j] || m_hevcSliceParams->luma_offset[i][j])
6185                 {
6186                     LumaWeightFlag[i] |= (1 << j);
6187                 }
6188             }
6189         }
6190 
6191         CodechalEncodeWP::KernelParams wpKernelParams;
6192         memset((void *)&wpKernelParams, 0, sizeof(wpKernelParams));
6193         wpKernelParams.useWeightedSurfaceForL0 = &m_useWeightedSurfaceForL0;
6194         wpKernelParams.useWeightedSurfaceForL1 = &m_useWeightedSurfaceForL1;
6195         wpKernelParams.slcWPParams             = &sliceWPParams;
6196 
6197         // Weighted Prediction to be applied for L0
6198         for (auto i = 0; i < (m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1); i++)
6199         {
6200             if((LumaWeightFlag[LIST_0] & (1 << i)) && (i < CODEC_MAX_FORWARD_WP_FRAME))
6201             {
6202                 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][i];
6203                 if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
6204                 {
6205                     MOS_SURFACE refFrameInput;
6206                     uint8_t     frameIndex = m_picIdx[refPic.FrameIdx].ucPicIdx;
6207                     refFrameInput          = m_hevcPicParams->bUseRawPicForRef ? m_refList[frameIndex]->sRefRawBuffer : m_refList[frameIndex]->sRefReconBuffer;
6208 
6209                     //Weighted Prediction for ith forward reference frame
6210                     wpKernelParams.useRefPicList1   = false;
6211                     wpKernelParams.wpIndex          = i;
6212                     wpKernelParams.refFrameInput    = &refFrameInput;
6213                     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_wpState->Execute(&wpKernelParams));
6214                 }
6215             }
6216         }
6217 
6218         // Weighted Predition to be applied for L1
6219         if (slicetype == CODECHAL_HEVC_B_SLICE && m_hevcPicParams->weighted_bipred_flag)
6220         {
6221             for (auto i = 0; i < (m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1); i++)
6222             {
6223                 if((LumaWeightFlag[LIST_1] & (1 << i)) && (i < CODEC_MAX_BACKWARD_WP_FRAME))
6224                 {
6225                     CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_1][i];
6226                     if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
6227                     {
6228                         MOS_SURFACE refFrameInput;
6229                         uint8_t     frameIndex = m_picIdx[refPic.FrameIdx].ucPicIdx;
6230                         refFrameInput          = m_hevcPicParams->bUseRawPicForRef ? m_refList[frameIndex]->sRefRawBuffer : m_refList[frameIndex]->sRefReconBuffer;
6231 
6232                         //Weighted Prediction for ith backward reference frame
6233                         wpKernelParams.useRefPicList1   = true;
6234                         wpKernelParams.wpIndex          = i;
6235                         wpKernelParams.refFrameInput    = &refFrameInput;
6236                         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_wpState->Execute(&wpKernelParams));
6237                     }
6238                 }
6239             }
6240         }
6241     }
6242 
6243 #if (_DEBUG || _RELEASE_INTERNAL)
6244 
6245     MOS_USER_FEATURE_VALUE_WRITE_DATA   userFeatureWriteData;
6246     // Weighted prediction for L0 Reporting
6247     userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__;
6248     userFeatureWriteData.Value.i32Data = m_useWeightedSurfaceForL0;
6249     userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_WEIGHTED_PREDICTION_L0_IN_USE_ID;
6250     MOS_UserFeature_WriteValues_ID(NULL, &userFeatureWriteData, 1, m_osInterface->pOsContext);
6251     // Weighted prediction for L1 Reporting
6252     userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__;
6253     userFeatureWriteData.Value.i32Data = m_useWeightedSurfaceForL1;
6254     userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_WEIGHTED_PREDICTION_L1_IN_USE_ID;
6255     MOS_UserFeature_WriteValues_ID(NULL, &userFeatureWriteData, 1, m_osInterface->pOsContext);
6256 
6257 #endif // _DEBUG || _RELEASE_INTERNAL
6258 
6259     // Reset to use a different performance tag ID
6260     m_osInterface->pfnResetPerfBufferID(m_osInterface);
6261 
6262     m_lastTaskInPhase  = true;
6263 
6264     if (m_hevcPicParams->CodingType == I_TYPE)
6265     {
6266         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMbEncKernel(CODECHAL_MEDIA_STATE_HEVC_I_MBENC));
6267     }
6268     else
6269     {
6270         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMbEncKernel(m_isMaxLcu64 ? CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC : CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
6271     }
6272 
6273     // Notify PAK engine once ENC is done
6274     if (!Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
6275     {
6276         MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
6277         syncParams.GpuContext = m_renderContext;
6278         syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
6279 
6280         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
6281     }
6282 
6283     if (m_brcEnabled)
6284     {
6285         if (m_hevcSeqParams->ParallelBRC)
6286         {
6287             m_brcBuffers.uiCurrBrcPakStasIdxForRead =
6288                 (m_brcBuffers.uiCurrBrcPakStasIdxForRead + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
6289         }
6290     }
6291 
6292     CODECHAL_DEBUG_TOOL(
6293         uint8_t       index;
6294         CODEC_PICTURE refPic;
6295         if (m_useWeightedSurfaceForL0) {
6296             refPic = m_hevcSliceParams->RefPicList[LIST_0][0];
6297             index  = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
6298 
6299             CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
6300                 &m_refList[index]->sRefBuffer,
6301                 CodechalDbgAttr::attrReferenceSurfaces,
6302                 "WP_In_L0")));
6303 
6304             CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
6305                 m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L0_START + 0),
6306                 CodechalDbgAttr::attrReferenceSurfaces,
6307                 "WP_Out_L0")));
6308         } if (m_useWeightedSurfaceForL1) {
6309 
6310             refPic = m_hevcSliceParams->RefPicList[LIST_1][0];
6311             index  = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
6312 
6313             CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
6314                 &m_refList[index]->sRefBuffer,
6315                 CodechalDbgAttr::attrReferenceSurfaces,
6316                 "WP_In_L1")));
6317 
6318             CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
6319                 m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L1_START + 0),
6320                 CodechalDbgAttr::attrReferenceSurfaces,
6321                 "WP_Out_L1")));
6322         })
6323 
6324         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
6325             &m_scratchSurface,
6326             CodechalDbgAttr::attrInput,
6327             "Scratch_Surface",
6328             CODECHAL_MEDIA_STATE_HEVC_I_MBENC)));
6329 
6330         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
6331             &m_intermediateCuRecordSurfaceLcu32,
6332             CodechalDbgAttr::attrInput,
6333             "IntermediateCURecord_Surface",
6334             CODECHAL_MEDIA_STATE_HEVC_I_MBENC)));
6335     pAppCallBack pCallBack;
6336     pCallBack = (pAppCallBack) m_encodeParams.plastEncKernelSubmissionCompleteCallback;
6337     if(pCallBack != NULL)
6338         pCallBack();
6339 
6340     m_lastPictureCodingType = m_pictureCodingType;
6341     m_lastRecycledBufIdx = m_currRecycledBufIdx;
6342 
6343     return eStatus;
6344 }
6345 
InitKernelState()6346 MOS_STATUS CodechalEncHevcStateG11::InitKernelState()
6347 {
6348     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6349 
6350     CODECHAL_ENCODE_FUNCTION_ENTER;
6351 
6352     // Init kernel state
6353     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMbEnc());
6354     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateBrc());
6355 
6356     // Create weighted prediction kernel state
6357     CODECHAL_ENCODE_CHK_NULL_RETURN(m_wpState = MOS_New(CodechalEncodeWPG11, this));
6358     m_wpState->SetKernelBase(m_kernelBase);
6359     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_wpState->InitKernelState());
6360     // create intra distortion kernel
6361     m_intraDistKernel = MOS_New(CodechalKernelIntraDist, this);
6362     CODECHAL_ENCODE_CHK_NULL_RETURN(m_intraDistKernel);
6363     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_intraDistKernel->Initialize(
6364         GetCommonKernelHeaderAndSizeG11,
6365         m_kernelBase,
6366         m_kuidCommon));
6367     // Create Hme kernel
6368     m_hmeKernel = MOS_New(CodechalKernelHmeG11, this);
6369     CODECHAL_ENCODE_CHK_NULL_RETURN(m_hmeKernel);
6370     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Initialize(
6371         GetCommonKernelHeaderAndSizeG11,
6372         m_kernelBase,
6373         m_kuidCommon));
6374 
6375     // Create SW scoreboard init kernel state
6376     CODECHAL_ENCODE_CHK_NULL_RETURN(m_swScoreboardState = MOS_New(CodechalEncodeSwScoreboardG11, this));
6377     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_swScoreboardState->InitKernelState());
6378 
6379     return eStatus;
6380 }
6381 
SetDmemHuCPakIntegrate(PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)6382 MOS_STATUS CodechalEncHevcStateG11::SetDmemHuCPakIntegrate(
6383     PMHW_VDBOX_HUC_DMEM_STATE_PARAMS    dmemParams)
6384 {
6385     CODECHAL_ENCODE_FUNCTION_ENTER;
6386 
6387     MOS_STATUS                          eStatus = MOS_STATUS_SUCCESS;
6388 
6389     MOS_LOCK_PARAMS lockFlagsWriteOnly;
6390     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
6391     lockFlagsWriteOnly.WriteOnly = true;
6392 
6393     int32_t currentPass = GetCurrentPass();
6394     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES || !m_brcEnabled)
6395     {
6396         eStatus = MOS_STATUS_INVALID_PARAMETER;
6397         return eStatus;
6398     }
6399 
6400     HucPakStitchDmemEncG11* hucPakStitchDmem = (HucPakStitchDmemEncG11*)m_osInterface->pfnLockResource(
6401         m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]), &lockFlagsWriteOnly);
6402     CODECHAL_ENCODE_CHK_NULL_RETURN(hucPakStitchDmem);
6403 
6404     MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakStitchDmemEncG11));
6405 
6406     // reset all the offsets to -1
6407     uint32_t TotalOffsetSize =  sizeof(hucPakStitchDmem->TileSizeRecord_offset) +
6408                                 sizeof(hucPakStitchDmem->VDENCSTAT_offset) +
6409                                 sizeof(hucPakStitchDmem->HEVC_PAKSTAT_offset) +
6410                                 sizeof(hucPakStitchDmem->HEVC_Streamout_offset) +
6411                                 sizeof(hucPakStitchDmem->VP9_PAK_STAT_offset) +
6412                                 sizeof(hucPakStitchDmem->Vp9CounterBuffer_offset);
6413     MOS_FillMemory(hucPakStitchDmem, TotalOffsetSize, 0xFF);
6414 
6415     uint16_t numTileRows    = m_hevcPicParams->num_tile_rows_minus1 + 1;
6416     uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
6417     CODECHAL_ENCODE_ASSERT(numTileColumns > 0 && numTileColumns % 2 == 0); //numTileColumns is nonzero and even number; 2 or 4
6418     CODECHAL_ENCODE_ASSERT(m_numPipe > 0 && m_numPipe % 2 == 0 && numTileColumns <= m_numPipe);  //ucNumPipe is nonzero and even number; 2 or 4
6419     uint16_t numTiles                           = numTileRows * numTileColumns;
6420     uint16_t numTilesPerPipe                    = m_numTiles / m_numPipe;
6421 
6422     hucPakStitchDmem->PicWidthInPixel          = (uint16_t)m_frameWidth;
6423     hucPakStitchDmem->PicHeightInPixel         = (uint16_t)m_frameHeight;
6424     hucPakStitchDmem->TotalNumberOfPAKs        = m_numPipe;
6425     hucPakStitchDmem->Codec                    = 1;             // 1: HEVC DP; 2: HEVC VDEnc; 3: VP9 VDEnc
6426     hucPakStitchDmem->MAXPass                  = m_brcEnabled ? (m_numPassesInOnePipe + 1) : 1;
6427     hucPakStitchDmem->CurrentPass              = (uint8_t)currentPass + 1;      // // Current BRC pass [1..MAXPass]
6428     hucPakStitchDmem->MinCUSize                = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
6429     hucPakStitchDmem->CabacZeroWordFlag        = true; // to do: set to true later
6430     hucPakStitchDmem->bitdepth_luma            = m_hevcSeqParams->bit_depth_luma_minus8 + 8;    // default: 8
6431     hucPakStitchDmem->bitdepth_chroma          = m_hevcSeqParams->bit_depth_chroma_minus8 + 8;  // default: 8
6432     hucPakStitchDmem->ChromaFormatIdc          = m_hevcSeqParams->chroma_format_idc;
6433     hucPakStitchDmem->TotalSizeInCommandBuffer = m_numTiles * CODECHAL_CACHELINE_SIZE;
6434     // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record
6435     hucPakStitchDmem->OffsetInCommandBuffer   = m_tileParams[m_numTiles - 1].TileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8;
6436     hucPakStitchDmem->LastTileBS_StartInBytes = m_tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
6437 
6438     hucPakStitchDmem->StitchEnable = true;
6439     hucPakStitchDmem->StitchCommandOffset = 0;
6440     hucPakStitchDmem->BBEndforStitch = HUC_BATCH_BUFFER_END;
6441     hucPakStitchDmem->brcUnderFlowEnable      = false; //temporally disable underflow bit rate control in HUC fw since it need more tuning.
6442 
6443     PCODEC_ENCODER_SLCDATA slcData = m_slcData;
6444     CODECHAL_ENCODE_CHK_NULL_RETURN(slcData);
6445     uint32_t totalSliceHeaderSize = 0;
6446     for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
6447     {
6448         totalSliceHeaderSize += (slcData->BitSize + 7) >> 3;
6449         slcData++;
6450     }
6451     hucPakStitchDmem->SliceHeaderSizeinBits = totalSliceHeaderSize * 8;
6452     hucPakStitchDmem->currFrameBRClevel     = m_currFrameBrcLevel;
6453 
6454     //Set the kernel output offsets
6455     hucPakStitchDmem->TileSizeRecord_offset[0] = m_hevcFrameStatsOffset.uiTileSizeRecord;
6456     hucPakStitchDmem->HEVC_PAKSTAT_offset[0]   = m_hevcFrameStatsOffset.uiHevcPakStatistics;
6457     hucPakStitchDmem->HEVC_Streamout_offset[0] = 0xFFFFFFFF;
6458     hucPakStitchDmem->VDENCSTAT_offset[0]      = 0xFFFFFFFF;
6459 
6460     for (auto i = 0; i < m_numPipe; i++)
6461     {
6462         hucPakStitchDmem->NumTiles[i] = numTilesPerPipe;
6463 
6464         // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic.
6465         // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region.
6466         hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiTileSizeRecord) +
6467                                                          m_hevcTileStatsOffset.uiTileSizeRecord;
6468         hucPakStitchDmem->HEVC_PAKSTAT_offset[i + 1]  = (i * numTilesPerPipe * m_hevcStatsSize.uiHevcPakStatistics) +
6469                                                          m_hevcTileStatsOffset.uiHevcPakStatistics;
6470     }
6471 
6472     m_osInterface->pfnUnlockResource(m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]));
6473 
6474     MOS_ZeroMemory(dmemParams, sizeof(MHW_VDBOX_HUC_DMEM_STATE_PARAMS));
6475     dmemParams->presHucDataSource = &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]);
6476     dmemParams->dwDataLength      = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemEncG11), CODECHAL_CACHELINE_SIZE);
6477     dmemParams->dwDmemOffset      = HUC_DMEM_OFFSET_RTOS_GEMS;
6478 
6479     return eStatus;
6480 }
6481 
SetRegionsHuCPakIntegrate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)6482 MOS_STATUS CodechalEncHevcStateG11::SetRegionsHuCPakIntegrate(
6483     PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS  virtualAddrParams)
6484 {
6485     CODECHAL_ENCODE_FUNCTION_ENTER;
6486 
6487     MOS_STATUS                              eStatus = MOS_STATUS_SUCCESS;
6488 
6489     int32_t currentPass = GetCurrentPass();
6490     if (currentPass < 0 ||
6491         (currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES  && m_brcEnabled) ||
6492         (currentPass != 0 && m_cqpEnabled))
6493     {
6494         eStatus = MOS_STATUS_INVALID_PARAMETER;
6495         return eStatus;
6496     }
6497 
6498     CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
6499 
6500     MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
6501     // Add Virtual addr
6502     virtualAddrParams->regionParams[0].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;  // Region 0 - Tile based input statistics from PAK/ VDEnc
6503     virtualAddrParams->regionParams[0].dwOffset = 0;
6504     virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource;  // Region 1 - HuC Frame statistics output
6505     virtualAddrParams->regionParams[1].isWritable = true;
6506     virtualAddrParams->regionParams[4].presRegion = &m_resBitstreamBuffer;                         // Region 4 - Last Tile bitstream
6507     virtualAddrParams->regionParams[5].presRegion = &m_resBitstreamBuffer;                         // Region 5 - HuC modifies the last tile bitstream before stitch command
6508     virtualAddrParams->regionParams[5].isWritable = true;
6509     virtualAddrParams->regionParams[6].presRegion = &m_brcBuffers.resBrcHistoryBuffer;  // Region 6  History Buffer (Input/Output)
6510     virtualAddrParams->regionParams[6].isWritable = true;
6511     virtualAddrParams->regionParams[7].presRegion = &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx];  //&m_resHucPakStitchReadBatchBuffer;             // Region 7 - HCP PIC state command
6512     virtualAddrParams->regionParams[9].presRegion = &m_resBrcDataBuffer;                           // Region 9  HuC outputs BRC data
6513     virtualAddrParams->regionParams[9].isWritable = true;
6514     virtualAddrParams->regionParams[8].presRegion  = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass];  // Region 8 - data buffer read by HUC for stitching cmd generation
6515     virtualAddrParams->regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource;  // Region 10 - SLB for stitching cmd output from Huc
6516     virtualAddrParams->regionParams[10].isWritable = true;
6517     virtualAddrParams->regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource;          // Region 15 [In/Out] - Tile Record Buffer
6518     virtualAddrParams->regionParams[15].dwOffset   = 0;
6519 
6520     return eStatus;
6521 }
6522 
SetDmemHuCPakIntegrateCqp(PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)6523 MOS_STATUS CodechalEncHevcStateG11::SetDmemHuCPakIntegrateCqp(
6524     PMHW_VDBOX_HUC_DMEM_STATE_PARAMS    dmemParams)
6525 {
6526     CODECHAL_ENCODE_FUNCTION_ENTER;
6527 
6528     MOS_STATUS                          eStatus = MOS_STATUS_SUCCESS;
6529 
6530     MOS_LOCK_PARAMS lockFlagsWriteOnly;
6531     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
6532     lockFlagsWriteOnly.WriteOnly = true;
6533 
6534     int32_t currentPass = GetCurrentPass();
6535     if (currentPass != 0 || (!m_cqpEnabled && m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ))
6536     {
6537         eStatus = MOS_STATUS_INVALID_PARAMETER;
6538         return eStatus;
6539     }
6540 
6541     HucPakStitchDmemEncG11* hucPakStitchDmem = (HucPakStitchDmemEncG11*)m_osInterface->pfnLockResource(
6542         m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]), &lockFlagsWriteOnly);
6543     CODECHAL_ENCODE_CHK_NULL_RETURN(hucPakStitchDmem);
6544 
6545     MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakStitchDmemEncG11));
6546 
6547     // reset all the offsets to -1
6548     uint32_t TotalOffsetSize =  sizeof(hucPakStitchDmem->TileSizeRecord_offset) +
6549                                 sizeof(hucPakStitchDmem->VDENCSTAT_offset) +
6550                                 sizeof(hucPakStitchDmem->HEVC_PAKSTAT_offset) +
6551                                 sizeof(hucPakStitchDmem->HEVC_Streamout_offset) +
6552                                 sizeof(hucPakStitchDmem->VP9_PAK_STAT_offset) +
6553                                 sizeof(hucPakStitchDmem->Vp9CounterBuffer_offset);
6554     MOS_FillMemory(hucPakStitchDmem, TotalOffsetSize, 0xFF);
6555 
6556     uint16_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
6557     uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
6558     CODECHAL_ENCODE_ASSERT(numTileColumns > 0 && numTileColumns % 2 == 0); //numTileColumns is nonzero and even number; 2 or 4
6559     CODECHAL_ENCODE_ASSERT(m_numPipe > 0 && m_numPipe % 2 == 0 && numTileColumns <= m_numPipe);  //ucNumPipe is nonzero and even number; 2 or 4
6560     uint16_t numTiles = numTileRows * numTileColumns;
6561     uint16_t numTilesPerPipe = m_numTiles / m_numPipe;
6562 
6563     hucPakStitchDmem->PicWidthInPixel = (uint16_t)m_frameWidth;
6564     hucPakStitchDmem->PicHeightInPixel = (uint16_t)m_frameHeight;
6565     hucPakStitchDmem->TotalNumberOfPAKs = m_numPipe;
6566     hucPakStitchDmem->Codec = 2; //HEVC DP CQP
6567     hucPakStitchDmem->MAXPass = 1;
6568     hucPakStitchDmem->CurrentPass = 1;
6569     hucPakStitchDmem->MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
6570     hucPakStitchDmem->CabacZeroWordFlag = true;
6571     hucPakStitchDmem->bitdepth_luma = m_hevcSeqParams->bit_depth_luma_minus8 + 8;    // default: 8
6572     hucPakStitchDmem->bitdepth_chroma = m_hevcSeqParams->bit_depth_chroma_minus8 + 8;  // default: 8
6573     hucPakStitchDmem->ChromaFormatIdc = m_hevcSeqParams->chroma_format_idc;
6574     hucPakStitchDmem->TotalSizeInCommandBuffer = m_numTiles * CODECHAL_CACHELINE_SIZE;
6575     // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record
6576     hucPakStitchDmem->OffsetInCommandBuffer = m_tileParams[m_numTiles - 1].TileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8;
6577     hucPakStitchDmem->LastTileBS_StartInBytes = m_tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
6578 
6579     hucPakStitchDmem->StitchEnable = true;
6580     hucPakStitchDmem->StitchCommandOffset = 0;
6581     hucPakStitchDmem->BBEndforStitch = HUC_BATCH_BUFFER_END;
6582 
6583     //Set the kernel output offsets
6584     hucPakStitchDmem->TileSizeRecord_offset[0] = m_hevcFrameStatsOffset.uiTileSizeRecord;
6585     hucPakStitchDmem->HEVC_PAKSTAT_offset[0] = 0xFFFFFFFF;
6586     hucPakStitchDmem->HEVC_Streamout_offset[0] = 0xFFFFFFFF;
6587     hucPakStitchDmem->VDENCSTAT_offset[0] = 0xFFFFFFFF;
6588 
6589     for (auto i = 0; i < m_numPipe; i++)
6590     {
6591         hucPakStitchDmem->NumTiles[i] = numTilesPerPipe;
6592 
6593         // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic.
6594         // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region.
6595         hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiTileSizeRecord) +
6596             m_hevcTileStatsOffset.uiTileSizeRecord;
6597     }
6598 
6599     m_osInterface->pfnUnlockResource(m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]));
6600 
6601     MOS_ZeroMemory(dmemParams, sizeof(MHW_VDBOX_HUC_DMEM_STATE_PARAMS));
6602     dmemParams->presHucDataSource = &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]);
6603     dmemParams->dwDataLength      = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemEncG11), CODECHAL_CACHELINE_SIZE);
6604     dmemParams->dwDmemOffset      = HUC_DMEM_OFFSET_RTOS_GEMS;
6605 
6606     return eStatus;
6607 }
6608 
ConfigStitchDataBuffer()6609 MOS_STATUS CodechalEncHevcStateG11::ConfigStitchDataBuffer()
6610 {
6611     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6612     CODECHAL_ENCODE_FUNCTION_ENTER;
6613     int32_t currentPass = GetCurrentPass();
6614     if (currentPass < 0 ||
6615         (currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES  && m_brcEnabled) ||
6616         (currentPass != 0 && m_cqpEnabled))
6617     {
6618         eStatus = MOS_STATUS_INVALID_PARAMETER;
6619         return eStatus;
6620     }
6621 
6622     MOS_LOCK_PARAMS lockFlagsWriteOnly;
6623     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
6624     lockFlagsWriteOnly.WriteOnly = 1;
6625 
6626     HucCommandData* hucStitchDataBuf = (HucCommandData*)m_osInterface->pfnLockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass], &lockFlagsWriteOnly);
6627     CODECHAL_ENCODE_CHK_NULL_RETURN(hucStitchDataBuf);
6628 
6629     MOS_ZeroMemory(hucStitchDataBuf, sizeof(HucCommandData));
6630     hucStitchDataBuf->TotalCommands = 1;
6631     hucStitchDataBuf->InputCOM[0].SizeOfData = 0xF;
6632 
6633     HucInputCmdG11 hucInputCmd;
6634     MOS_ZeroMemory(&hucInputCmd, sizeof(HucInputCmdG11));
6635 
6636     CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
6637     hucInputCmd.SelectionForIndData = m_osInterface->osCpInterface->IsCpEnabled() ? 4 : 0;
6638     hucInputCmd.CmdMode = HUC_CMD_LIST_MODE;
6639     hucInputCmd.LengthOfTable = (uint8_t)(m_numTiles);
6640     hucInputCmd.CopySize = m_hwInterface->m_tileRecordSize;;
6641 
6642     PMOS_RESOURCE  presSrc = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource;
6643 
6644     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
6645         m_osInterface,
6646         presSrc,
6647         false,
6648         false));
6649     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
6650         m_osInterface,
6651         &m_resBitstreamBuffer,
6652         true,
6653         true));
6654 
6655     uint64_t srcAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, presSrc);
6656     uint64_t destAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, &m_resBitstreamBuffer);
6657     hucInputCmd.SrcAddrBottom = (uint32_t)(srcAddr & 0x00000000FFFFFFFF);
6658     hucInputCmd.SrcAddrTop = (uint32_t)((srcAddr & 0xFFFFFFFF00000000) >> 32);
6659 
6660     hucInputCmd.DestAddrBottom = (uint32_t)(destAddr & 0x00000000FFFFFFFF);
6661     hucInputCmd.DestAddrTop = (uint32_t)((destAddr & 0xFFFFFFFF00000000) >> 32);
6662 
6663     MOS_SecureMemcpy(hucStitchDataBuf->InputCOM[0].data, sizeof(HucInputCmdG11), &hucInputCmd, sizeof(HucInputCmdG11));
6664 
6665     m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]);
6666 
6667     return eStatus;
6668 }
6669 
SetRegionsHuCPakIntegrateCqp(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)6670 MOS_STATUS CodechalEncHevcStateG11::SetRegionsHuCPakIntegrateCqp(
6671     PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS  virtualAddrParams)
6672 {
6673     CODECHAL_ENCODE_FUNCTION_ENTER;
6674 
6675     MOS_STATUS                              eStatus = MOS_STATUS_SUCCESS;
6676 
6677     int32_t currentPass = GetCurrentPass();
6678     if (currentPass < 0 ||
6679         (m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ && m_brcEnabled) ||
6680         (currentPass != 0 && m_cqpEnabled))
6681     {
6682         eStatus = MOS_STATUS_INVALID_PARAMETER;
6683         return eStatus;
6684     }
6685     MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
6686 
6687     CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
6688 
6689     // Add Virtual addr
6690     virtualAddrParams->regionParams[0].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;  // Region 0 - Tile based input statistics from PAK/ VDEnc
6691     virtualAddrParams->regionParams[0].dwOffset = 0;
6692     virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource;  // Region 1 - HuC Frame statistics output
6693     virtualAddrParams->regionParams[1].isWritable = true;
6694     virtualAddrParams->regionParams[4].presRegion = &m_resBitstreamBuffer;                         // Region 4 - Last Tile bitstream
6695     virtualAddrParams->regionParams[5].presRegion = &m_resBitstreamBuffer;                         // Region 5 - HuC modifies the last tile bitstream before stitch command
6696     virtualAddrParams->regionParams[5].isWritable = true;
6697     virtualAddrParams->regionParams[6].presRegion = &m_brcBuffers.resBrcHistoryBuffer;  // Region 6  History Buffer (Input/Output)
6698     virtualAddrParams->regionParams[6].isWritable = true;
6699     virtualAddrParams->regionParams[7].presRegion = &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx];  //&m_resHucPakStitchReadBatchBuffer;             // Region 7 - HCP PIC state command
6700 
6701     virtualAddrParams->regionParams[9].presRegion = &m_resBrcDataBuffer;                           // Region 9  HuC outputs BRC data
6702     virtualAddrParams->regionParams[9].isWritable = true;
6703     virtualAddrParams->regionParams[8].presRegion  = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass];  // Region 8 - data buffer read by HUC for stitching cmd generation
6704     virtualAddrParams->regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource;  // Region 10 - SLB for stitching cmd output from Huc
6705     virtualAddrParams->regionParams[10].isWritable = true;
6706     virtualAddrParams->regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource;          // Region 15 [In/Out] - Tile Record Buffer
6707     virtualAddrParams->regionParams[15].dwOffset   = 0;
6708 
6709     return eStatus;
6710 }
6711 
6712 #if (_DEBUG || _RELEASE_INTERNAL)
ResetImgCtrlRegInPAKStatisticsBuffer(PMOS_COMMAND_BUFFER cmdBuffer)6713 MOS_STATUS CodechalEncHevcStateG11::ResetImgCtrlRegInPAKStatisticsBuffer(
6714     PMOS_COMMAND_BUFFER                        cmdBuffer)
6715 {
6716     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6717 
6718     CODECHAL_ENCODE_FUNCTION_ENTER;
6719 
6720     MHW_MI_STORE_DATA_PARAMS storeDataParams;
6721     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
6722     storeDataParams.pOsResource         = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
6723     storeDataParams.dwResourceOffset    = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL);
6724     storeDataParams.dwValue             = 0;
6725     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
6726         cmdBuffer,
6727         &storeDataParams));
6728 
6729     return eStatus;
6730 }
6731 #endif
6732 
ReadBrcPakStatisticsForScalability(PMOS_COMMAND_BUFFER cmdBuffer)6733 MOS_STATUS CodechalEncHevcStateG11::ReadBrcPakStatisticsForScalability(
6734     PMOS_COMMAND_BUFFER                        cmdBuffer)
6735 {
6736     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6737 
6738     CODECHAL_ENCODE_FUNCTION_ENTER;
6739 
6740     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
6741 
6742     MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
6743     MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
6744     miCpyMemMemParams.presSrc     = &m_resBrcDataBuffer;
6745     miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, FrameByteCount);
6746     miCpyMemMemParams.presDst     = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
6747     miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_BITSTREAM_BYTECOUNT_FRAME);
6748     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
6749 
6750     MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
6751     miCpyMemMemParams.presSrc     = &m_resBrcDataBuffer;
6752     miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, FrameByteCountNoHeader);
6753     miCpyMemMemParams.presDst     = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
6754     miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_BITSTREAM_BYTECOUNT_FRAME_NOHEADER);
6755     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
6756 
6757     MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
6758     miCpyMemMemParams.presSrc     = &m_resBrcDataBuffer;
6759     miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, HCP_ImageStatusControl);
6760     miCpyMemMemParams.presDst     = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
6761     miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL);
6762     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
6763 
6764     uint32_t dwOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
6765         m_encodeStatusBuf.dwNumPassesOffset +   // Num passes offset
6766         sizeof(uint32_t)* 2;                               // encodeStatus is offset by 2 DWs in the resource
6767 
6768     MHW_MI_STORE_DATA_PARAMS storeDataParams;
6769     storeDataParams.pOsResource      = &m_encodeStatusBuf.resStatusBuffer;
6770     storeDataParams.dwResourceOffset = dwOffset;
6771     storeDataParams.dwValue          = (uint8_t)GetCurrentPass();
6772     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
6773 
6774     return eStatus;
6775 }
6776 
DumpHucDebugOutputBuffers()6777 MOS_STATUS CodechalEncHevcStateG11::DumpHucDebugOutputBuffers()
6778 {
6779     MOS_STATUS      eStatus = MOS_STATUS_SUCCESS;
6780 
6781     //only dump HuC in/out buffers in brc scalability case
6782     bool dumpDebugBuffers = IsLastPipe() && (m_numPipe >= 2) && m_brcEnabled;
6783     if (m_singleTaskPhaseSupported)
6784     {
6785         dumpDebugBuffers = dumpDebugBuffers && IsLastPass();
6786     }
6787 
6788     if (dumpDebugBuffers)
6789     {
6790         CODECHAL_DEBUG_TOOL(
6791             int32_t currentPass = GetCurrentPass();
6792             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
6793                 &m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass],
6794                 sizeof(HucPakStitchDmemEncG11),
6795                 currentPass,
6796                 hucRegionDumpPakIntegrate));
6797 
6798             // Region 7 - HEVC PIC State Command
6799             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
6800                 &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
6801                 0,
6802                 m_hwInterface->m_vdenc2ndLevelBatchBufferSize,
6803                 7,
6804                 "_PicState",
6805                 true,
6806                 currentPass,
6807                 hucRegionDumpPakIntegrate));
6808 
6809             // Region 5 -  Last Tile PAK Bitstream Output
6810             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
6811                 &m_resBitstreamBuffer,
6812                 0,
6813                 m_encodeParams.dwBitstreamSize,
6814                 5,
6815                 "_Bitstream",
6816                 false,
6817                 currentPass,
6818                 hucRegionDumpPakIntegrate));
6819 
6820             // Region 6 - BRC History buffer
6821             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
6822                 &m_brcBuffers.resBrcHistoryBuffer,
6823                 0,
6824                 m_brcHistoryBufferSize,
6825                 6,
6826                 "_HistoryBuffer",
6827                 false,
6828                 currentPass,
6829                 hucRegionDumpPakIntegrate));
6830             // Region 9 - HCP BRC Data Output
6831             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
6832                 &m_resBrcDataBuffer,
6833                 0,
6834                 CODECHAL_CACHELINE_SIZE,
6835                 9,
6836                 "_HcpBrcData",
6837                 false,
6838                 currentPass,
6839                 hucRegionDumpPakIntegrate));
6840             // Region 1 - Output Aggregated Frame Level Statistics
6841             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
6842                 &m_resHuCPakAggregatedFrameStatsBuffer.sResource,
6843                 0,
6844                 m_hwInterface->m_pakIntAggregatedFrameStatsSize,        // program exact out size
6845                 1,
6846                 "_AggregateFrameStats",
6847                 false,
6848                 currentPass,
6849                 hucRegionDumpPakIntegrate));
6850              // Region 0 - Tile Statistics Constant Buffer
6851             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
6852                 &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
6853                 0,
6854                 m_hwInterface->m_pakIntTileStatsSize,
6855                 0,
6856                 "_TileBasedStats",
6857                 true,
6858                 currentPass,
6859                 hucRegionDumpPakIntegrate));
6860              // Region 15 - Tile Record Buffer
6861             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
6862                 &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource,
6863                 0,
6864                 m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize,
6865                 15,
6866                 "_TileRecord",
6867                 false,
6868                 currentPass,
6869                 hucRegionDumpPakIntegrate));)
6870     }
6871 
6872     return eStatus;
6873 }
6874 
CodechalEncHevcStateG11(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)6875 CodechalEncHevcStateG11::CodechalEncHevcStateG11(
6876     CodechalHwInterface* hwInterface,
6877     CodechalDebugInterface* debugInterface,
6878     PCODECHAL_STANDARD_INFO standardInfo)
6879     :CodechalEncHevcState(hwInterface, debugInterface, standardInfo)
6880 {
6881     m_2xMeSupported         =
6882     m_useCommonKernel       = true;
6883     m_useHwScoreboard       = false;
6884 #ifndef _FULL_OPEN_SOURCE
6885     m_kernelBase            = (uint8_t*)IGCODECKRN_G11;
6886 #else
6887     m_kernelBase            = nullptr;
6888 #endif
6889     m_kuidCommon            = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
6890     m_hucPakStitchEnabled   = true;
6891     m_scalabilityState      = nullptr;
6892 
6893     MOS_ZeroMemory(&m_currPicWithReconBoundaryPix, sizeof(m_currPicWithReconBoundaryPix));
6894     MOS_ZeroMemory(&m_lcuLevelInputDataSurface, sizeof(m_lcuLevelInputDataSurface));
6895     MOS_ZeroMemory(&m_intermediateCuRecordSurfaceLcu32, sizeof(m_intermediateCuRecordSurfaceLcu32));
6896     MOS_ZeroMemory(&m_scratchSurface, sizeof(m_scratchSurface));
6897     MOS_ZeroMemory(m_debugSurface, sizeof(m_debugSurface));
6898     MOS_ZeroMemory(&m_encConstantTableForB, sizeof(m_encConstantTableForB));
6899     MOS_ZeroMemory(&m_mvAndDistortionSumSurface, sizeof(m_mvAndDistortionSumSurface));
6900     MOS_ZeroMemory(m_encBCombinedBuffer1, sizeof(m_encBCombinedBuffer1));
6901     MOS_ZeroMemory(m_encBCombinedBuffer2, sizeof(m_encBCombinedBuffer2));
6902 
6903     MOS_ZeroMemory(&m_resPakcuLevelStreamoutData, sizeof(m_resPakcuLevelStreamoutData));
6904     MOS_ZeroMemory(&m_resPakSliceLevelStreamoutData, sizeof(m_resPakSliceLevelStreamoutData));
6905     MOS_ZeroMemory(m_resTileBasedStatisticsBuffer, sizeof(m_resTileBasedStatisticsBuffer));
6906     MOS_ZeroMemory(&m_resHuCPakAggregatedFrameStatsBuffer, sizeof(m_resHuCPakAggregatedFrameStatsBuffer));
6907     MOS_ZeroMemory(m_tileRecordBuffer, sizeof(m_tileRecordBuffer));
6908     MOS_ZeroMemory(&m_kmdVeOveride, sizeof(m_kmdVeOveride));
6909     MOS_ZeroMemory(&m_resHcpScalabilitySyncBuffer, sizeof(m_resHcpScalabilitySyncBuffer));
6910 
6911     MOS_ZeroMemory(m_veBatchBuffer, sizeof(m_veBatchBuffer));
6912     MOS_ZeroMemory(&m_realCmdBuffer, sizeof(m_realCmdBuffer));
6913     MOS_ZeroMemory(&m_resBrcSemaphoreMem, sizeof(m_resBrcSemaphoreMem));
6914     MOS_ZeroMemory(&m_resBrcPakSemaphoreMem, sizeof(m_resBrcPakSemaphoreMem));
6915     MOS_ZeroMemory(&m_resPipeStartSemaMem, sizeof(m_resPipeStartSemaMem));
6916     MOS_ZeroMemory(&m_resPipeCompleteSemaMem, sizeof(m_resPipeCompleteSemaMem));
6917     MOS_ZeroMemory(m_resHucPakStitchDmemBuffer, sizeof(m_resHucPakStitchDmemBuffer));
6918     MOS_ZeroMemory(&m_resBrcDataBuffer, sizeof(m_resBrcDataBuffer));
6919 
6920     CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_osInterface);
6921     m_hwInterface->GetStateHeapSettings()->dwNumSyncTags    = CODECHAL_ENCODE_HEVC_NUM_SYNC_TAGS;
6922     m_hwInterface->GetStateHeapSettings()->dwDshSize        = CODECHAL_INIT_DSH_SIZE_HEVC_ENC;
6923 
6924     m_kuid = IDR_CODEC_AllHEVCEnc;
6925     MOS_STATUS eStatus = CodecHalGetKernelBinaryAndSize(
6926         m_kernelBase,
6927         m_kuid,
6928         &m_kernelBinary,
6929         &m_combinedKernelSize);
6930     CODECHAL_ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
6931 
6932     m_hwInterface->GetStateHeapSettings()->dwIshSize +=
6933         MOS_ALIGN_CEIL(m_combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
6934 
6935     m_osInterface->pfnVirtualEngineSupported(m_osInterface, false, true);
6936     Mos_SetVirtualEngineSupported(m_osInterface, true);
6937 }
6938 
~CodechalEncHevcStateG11()6939 CodechalEncHevcStateG11::~CodechalEncHevcStateG11()
6940 {
6941     CODECHAL_ENCODE_FUNCTION_ENTER;
6942 
6943     if (m_wpState)
6944     {
6945         MOS_Delete(m_wpState);
6946         m_wpState = nullptr;
6947     }
6948     MOS_Delete(m_intraDistKernel);
6949 
6950     if (m_swScoreboardState)
6951     {
6952         MOS_Delete(m_swScoreboardState);
6953         m_swScoreboardState = nullptr;
6954     }
6955 
6956     if (m_scalabilityState)
6957     {
6958         MOS_FreeMemAndSetNull(m_scalabilityState);
6959     }
6960     //Note: virtual engine interface destroy is done in MOS layer
6961 }
6962 
CodecHalHevcGetFileSize(char * fileName)6963 static uint32_t CodecHalHevcGetFileSize(char* fileName)
6964 {
6965     FILE*   fp = nullptr;
6966     uint32_t    fileSize = 0;
6967     MosUtilities::MosSecureFileOpen(&fp, fileName, "rb");
6968     if (fp == nullptr)
6969     {
6970         return 0;
6971     }
6972     fseek(fp, 0, SEEK_END);
6973     fileSize = ftell(fp);
6974     fseek(fp, 0, SEEK_SET);
6975     fclose(fp);
6976 
6977     return fileSize;
6978 }
6979 
LoadPakCommandAndCuRecordFromFile()6980 MOS_STATUS CodechalEncHevcStateG11::LoadPakCommandAndCuRecordFromFile()
6981 {
6982     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
6983 
6984     CODECHAL_ENCODE_FUNCTION_ENTER;
6985 
6986     char pathOfPakCmd[MOS_USER_CONTROL_MAX_DATA_SIZE];
6987     MOS_SecureStringPrint(pathOfPakCmd,
6988         sizeof(pathOfPakCmd),
6989         sizeof(pathOfPakCmd),
6990         "%s\\PAKObj.dat.%d",
6991         m_pakOnlyDataFolder,
6992         m_frameNum);
6993 
6994     char pathOfCuRecord[MOS_USER_CONTROL_MAX_DATA_SIZE];
6995     MOS_SecureStringPrint(pathOfCuRecord,
6996         sizeof(pathOfCuRecord),
6997         sizeof(pathOfCuRecord),
6998         "%s\\CURecord.dat.%d",
6999         m_pakOnlyDataFolder,
7000         m_frameNum);
7001 
7002     uint32_t sizePakObj = CodecHalHevcGetFileSize(pathOfPakCmd);
7003     if(sizePakObj == 0 || sizePakObj > m_mvOffset)
7004     {
7005         return MOS_STATUS_INVALID_FILE_SIZE;
7006     }
7007 
7008     uint32_t sizeCuRecord = CodecHalHevcGetFileSize(pathOfCuRecord);
7009     if(sizeCuRecord == 0 || sizeCuRecord > m_mbCodeSize - m_mvOffset)
7010     {
7011         return MOS_STATUS_INVALID_FILE_SIZE;
7012     }
7013 
7014     MOS_LOCK_PARAMS lockFlags;
7015     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
7016     lockFlags.WriteOnly = 1;
7017     uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(
7018         m_osInterface, &m_resMbCodeSurface,  &lockFlags);
7019     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
7020 
7021     FILE* pakObj = nullptr;
7022     eStatus = MosUtilities::MosSecureFileOpen(&pakObj, pathOfPakCmd, "rb");
7023     if (pakObj == nullptr)
7024     {
7025         m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
7026         return eStatus;
7027     }
7028 
7029     uint8_t* pakCmd = data;
7030     if(sizePakObj != fread((void*)pakCmd, 1, sizePakObj, pakObj))
7031     {
7032         fclose(pakObj);
7033         m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
7034         return MOS_STATUS_INVALID_FILE_SIZE;
7035     }
7036     fclose(pakObj);
7037 
7038     uint8_t*   record  = data + m_mvOffset;
7039     FILE*      fRecord = nullptr;
7040     eStatus = MosUtilities::MosSecureFileOpen(&fRecord, pathOfCuRecord, "rb");
7041     if (fRecord == nullptr)
7042     {
7043         m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
7044         return eStatus;
7045     }
7046 
7047     if(sizeCuRecord != fread((void*)record, 1, sizeCuRecord, fRecord))
7048     {
7049         fclose(fRecord);
7050         m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
7051         return MOS_STATUS_INVALID_FILE_SIZE;
7052     }
7053     fclose(fRecord);
7054 
7055     m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
7056 
7057     return eStatus;
7058 }
7059 
ResizeOnResChange()7060 void CodechalEncHevcStateG11::ResizeOnResChange()
7061 {
7062     CODECHAL_ENCODE_FUNCTION_ENTER;
7063 
7064     CodechalEncoderState::ResizeOnResChange();
7065 
7066     // need to re-allocate surfaces according to resolution
7067     m_swScoreboardState->ReleaseResources();
7068 }
7069 
ResizeBufferOffset()7070 void CodechalEncHevcStateG11::ResizeBufferOffset()
7071 {
7072     CODECHAL_ENCODE_FUNCTION_ENTER;
7073 
7074     uint32_t size = 0;
7075     uint32_t numLcu64 = m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 64 / 64;
7076     MBENC_COMBINED_BUFFER2 fixedBuf;
7077 
7078     //Re-Calculate m_encBCombinedBuffer2 Size and Offsets
7079     m_historyOutBufferSize = MOS_ALIGN_CEIL(32 * numLcu64, CODECHAL_CACHELINE_SIZE);
7080     m_threadTaskBufferSize = MOS_ALIGN_CEIL(96 * numLcu64, CODECHAL_CACHELINE_SIZE);
7081 
7082     size = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE) + m_historyOutBufferSize + m_threadTaskBufferSize;
7083 
7084     m_historyOutBufferOffset = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE);
7085     m_threadTaskBufferOffset = m_historyOutBufferOffset + m_historyOutBufferSize;
7086 }
7087 
PicCodingTypeToSliceType(uint16_t pictureCodingType)7088 uint8_t CodechalEncHevcStateG11::PicCodingTypeToSliceType(uint16_t pictureCodingType)
7089 {
7090     uint8_t sliceType = 0;
7091 
7092     switch (pictureCodingType)
7093     {
7094     case I_TYPE:
7095         sliceType = CODECHAL_ENCODE_HEVC_I_SLICE;
7096         break;
7097     case P_TYPE:
7098         sliceType = CODECHAL_ENCODE_HEVC_P_SLICE;
7099         break;
7100     case B_TYPE:
7101     case B1_TYPE:
7102     case B2_TYPE:
7103         sliceType = CODECHAL_ENCODE_HEVC_B_SLICE;
7104         break;
7105     default:
7106         CODECHAL_ENCODE_ASSERT(false);
7107     }
7108     return sliceType;
7109 }
7110 
7111 // The following code is from the kernel ULT
InitMediaObjectWalker(uint32_t threadSpaceWidth,uint32_t threadSpaceHeight,uint32_t colorCountMinusOne,DependencyPattern dependencyPattern,uint32_t childThreadNumber,uint32_t localLoopExecCount,MHW_WALKER_PARAMS & walkerParams)7112 MOS_STATUS  CodechalEncHevcStateG11::InitMediaObjectWalker(
7113     uint32_t threadSpaceWidth,
7114     uint32_t threadSpaceHeight,
7115     uint32_t colorCountMinusOne,
7116     DependencyPattern dependencyPattern,
7117     uint32_t childThreadNumber,
7118     uint32_t localLoopExecCount,
7119     MHW_WALKER_PARAMS&  walkerParams)
7120 {
7121     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
7122 
7123     walkerParams.ColorCountMinusOne     = colorCountMinusOne;
7124     walkerParams.dwGlobalLoopExecCount  = 0x3ff;
7125     walkerParams.dwLocalLoopExecCount   = 0x3ff;
7126 
7127     if (dependencyPattern == dependencyWavefrontHorizontal)
7128     {
7129         // Global
7130         walkerParams.GlobalResolution.x         = threadSpaceWidth;
7131         walkerParams.GlobalResolution.y         = threadSpaceHeight;
7132         walkerParams.GlobalStart.x              = 0;
7133         walkerParams.GlobalStart.y              = 0;
7134         walkerParams.GlobalOutlerLoopStride.x   = threadSpaceWidth;
7135         walkerParams.GlobalOutlerLoopStride.y   = 0;
7136         walkerParams.GlobalInnerLoopUnit.x      = 0;
7137         walkerParams.GlobalInnerLoopUnit.y      = threadSpaceHeight;
7138 
7139         // Local
7140         walkerParams.BlockResolution.x      = threadSpaceWidth;
7141         walkerParams.BlockResolution.y      = threadSpaceHeight;
7142         walkerParams.LocalStart.x           = 0;
7143         walkerParams.LocalStart.y           = 0;
7144         walkerParams.LocalOutLoopStride.x   = 1;
7145         walkerParams.LocalOutLoopStride.y   = 0;
7146         walkerParams.LocalInnerLoopUnit.x   = 0;
7147         walkerParams.LocalInnerLoopUnit.y   = 1;
7148 
7149         // Mid
7150         walkerParams.MiddleLoopExtraSteps = 0;
7151         walkerParams.MidLoopUnitX = 0;
7152         walkerParams.MidLoopUnitY = 0;
7153     }
7154     else
7155     if (dependencyPattern == dependencyWavefrontVertical)
7156     {
7157         // Global
7158         walkerParams.GlobalResolution.x = threadSpaceWidth;
7159         walkerParams.GlobalResolution.y = threadSpaceHeight;
7160         walkerParams.GlobalStart.x      = 0;
7161         walkerParams.GlobalStart.y      = 0;
7162         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7163         walkerParams.GlobalOutlerLoopStride.y = 0;
7164         walkerParams.GlobalInnerLoopUnit.x = 0;
7165         walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7166 
7167         // Local
7168         walkerParams.BlockResolution.x = threadSpaceWidth;
7169         walkerParams.BlockResolution.y = threadSpaceHeight;
7170         walkerParams.LocalStart.x = 0;
7171         walkerParams.LocalStart.y = 0;
7172         walkerParams.LocalOutLoopStride.x = 0;
7173         walkerParams.LocalOutLoopStride.y = 1;
7174         walkerParams.LocalInnerLoopUnit.x = 1;
7175         walkerParams.LocalInnerLoopUnit.y = 0;
7176 
7177         // Mid
7178         walkerParams.MiddleLoopExtraSteps = 0;
7179         walkerParams.MidLoopUnitX = 0;
7180         walkerParams.MidLoopUnitY = 0;
7181     }
7182     else
7183     if (dependencyPattern == dependencyWavefront45Degree)
7184     {
7185         // Global
7186         walkerParams.GlobalResolution.x = threadSpaceWidth;
7187         walkerParams.GlobalResolution.y = threadSpaceHeight;
7188         walkerParams.GlobalStart.x = 0;
7189         walkerParams.GlobalStart.y = 0;
7190         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7191         walkerParams.GlobalOutlerLoopStride.y = 0;
7192         walkerParams.GlobalInnerLoopUnit.x = 0;
7193         walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7194 
7195         // Local
7196         walkerParams.BlockResolution.x = threadSpaceWidth;
7197         walkerParams.BlockResolution.y = threadSpaceHeight;
7198         walkerParams.LocalStart.x = 0;
7199         walkerParams.LocalStart.y = 0;
7200         walkerParams.LocalOutLoopStride.x = 1;
7201         walkerParams.LocalOutLoopStride.y = 0;
7202         walkerParams.LocalInnerLoopUnit.x = -1;
7203         walkerParams.LocalInnerLoopUnit.y = 1;
7204 
7205         // Mid
7206         walkerParams.MiddleLoopExtraSteps = 0;
7207         walkerParams.MidLoopUnitX = 0;
7208         walkerParams.MidLoopUnitY = 0;
7209     }
7210     else
7211     if (dependencyPattern == dependencyWavefront26Degree)
7212     {
7213         // Global
7214         walkerParams.GlobalResolution.x = threadSpaceWidth;
7215         walkerParams.GlobalResolution.y = threadSpaceHeight;
7216         walkerParams.GlobalStart.x = 0;
7217         walkerParams.GlobalStart.y = 0;
7218         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7219         walkerParams.GlobalOutlerLoopStride.y = 0;
7220         walkerParams.GlobalInnerLoopUnit.x = 0;
7221         walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7222 
7223         // Local
7224         walkerParams.BlockResolution.x = threadSpaceWidth;
7225         walkerParams.BlockResolution.y = threadSpaceHeight;
7226         walkerParams.LocalStart.x = 0;
7227         walkerParams.LocalStart.y = 0;
7228         walkerParams.LocalOutLoopStride.x = 1;
7229         walkerParams.LocalOutLoopStride.y = 0;
7230         walkerParams.LocalInnerLoopUnit.x = -2;
7231         walkerParams.LocalInnerLoopUnit.y = 1;
7232 
7233         // Mid
7234         walkerParams.MiddleLoopExtraSteps = 0;
7235         walkerParams.MidLoopUnitX = 0;
7236         walkerParams.MidLoopUnitY = 0;
7237     }
7238     else
7239     if ((dependencyPattern == dependencyWavefront45XDegree) ||
7240         (dependencyPattern == dependencyWavefront45XDegreeAlt))
7241     {
7242         // Global
7243         walkerParams.GlobalResolution.x = threadSpaceWidth;
7244         walkerParams.GlobalResolution.y = threadSpaceHeight;
7245         walkerParams.GlobalStart.x = 0;
7246         walkerParams.GlobalStart.y = 0;
7247         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7248         walkerParams.GlobalOutlerLoopStride.y = 0;
7249         walkerParams.GlobalInnerLoopUnit.x = 0;
7250         walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7251 
7252         // Local
7253         walkerParams.BlockResolution.x = threadSpaceWidth;
7254         walkerParams.BlockResolution.y = threadSpaceHeight;
7255         walkerParams.LocalStart.x = 0;
7256         walkerParams.LocalStart.y = 0;
7257         walkerParams.LocalOutLoopStride.x = 1;
7258         walkerParams.LocalOutLoopStride.y = 0;
7259         walkerParams.LocalInnerLoopUnit.x = -1;
7260         walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1;
7261 
7262         // Mid
7263         walkerParams.MiddleLoopExtraSteps = childThreadNumber;
7264         walkerParams.MidLoopUnitX = 0;
7265         walkerParams.MidLoopUnitY = 1;
7266     }
7267     else
7268     if ((dependencyPattern == dependencyWavefront26XDegree) ||
7269         (dependencyPattern == dependencyWavefront26XDegreeAlt)) {
7270 
7271         // Global
7272         walkerParams.GlobalResolution.x = threadSpaceWidth;
7273         walkerParams.GlobalResolution.y = threadSpaceHeight;
7274         walkerParams.GlobalStart.x = 0;
7275         walkerParams.GlobalStart.y = 0;
7276         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7277         walkerParams.GlobalOutlerLoopStride.y = 0;
7278         walkerParams.GlobalInnerLoopUnit.x = 0;
7279         walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7280 
7281         // Local
7282         walkerParams.BlockResolution.x = threadSpaceWidth;
7283         walkerParams.BlockResolution.y = threadSpaceHeight;
7284         walkerParams.LocalStart.x = 0;
7285         walkerParams.LocalStart.y = 0;
7286         walkerParams.LocalOutLoopStride.x = 1;
7287         walkerParams.LocalOutLoopStride.y = 0;
7288         walkerParams.LocalInnerLoopUnit.x = -2;
7289         walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1;
7290 
7291         // Mid
7292         walkerParams.MiddleLoopExtraSteps = childThreadNumber;
7293         walkerParams.MidLoopUnitX = 0;
7294         walkerParams.MidLoopUnitY = 1;
7295     }
7296     else
7297     if (dependencyPattern == dependencyWavefront45XVp9Degree)
7298     {
7299         // Global
7300         walkerParams.GlobalResolution.x = threadSpaceWidth;
7301         walkerParams.GlobalResolution.y = threadSpaceHeight;
7302         walkerParams.GlobalStart.x = 0;
7303         walkerParams.GlobalStart.y = 0;
7304         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7305         walkerParams.GlobalOutlerLoopStride.y = 0;
7306         walkerParams.GlobalInnerLoopUnit.x = 0;
7307         walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7308 
7309         // Local
7310         walkerParams.BlockResolution.x = threadSpaceWidth;
7311         walkerParams.BlockResolution.y = threadSpaceHeight;
7312         walkerParams.LocalStart.x = 0;
7313         walkerParams.LocalStart.y = 0;
7314         walkerParams.LocalOutLoopStride.x = 1;
7315         walkerParams.LocalOutLoopStride.y = 0;
7316         walkerParams.LocalInnerLoopUnit.x = -1;
7317         walkerParams.LocalInnerLoopUnit.y = 4;
7318 
7319         // Mid
7320         walkerParams.MiddleLoopExtraSteps = 3;
7321         walkerParams.MidLoopUnitX = 0;
7322         walkerParams.MidLoopUnitY = 1;
7323     }
7324     else
7325     if (dependencyPattern == dependencyWavefront26ZDegree)
7326     {
7327         // Global
7328         walkerParams.GlobalResolution.x = threadSpaceWidth;
7329         walkerParams.GlobalResolution.y = threadSpaceHeight;
7330         walkerParams.GlobalStart.x = 0;
7331         walkerParams.GlobalStart.y = 0;
7332         walkerParams.GlobalOutlerLoopStride.x = 2;
7333         walkerParams.GlobalOutlerLoopStride.y = 0;
7334         walkerParams.GlobalInnerLoopUnit.x = -4;
7335         walkerParams.GlobalInnerLoopUnit.y = 2;
7336 
7337         // Local
7338         walkerParams.BlockResolution.x = 2;
7339         walkerParams.BlockResolution.y = 2;
7340         walkerParams.LocalStart.x = 0;
7341         walkerParams.LocalStart.y = 0;
7342         walkerParams.LocalOutLoopStride.x = 0;
7343         walkerParams.LocalOutLoopStride.y = 1;
7344         walkerParams.LocalInnerLoopUnit.x = 1;
7345         walkerParams.LocalInnerLoopUnit.y = 0;
7346 
7347         // Mid
7348         walkerParams.MiddleLoopExtraSteps = 0;
7349         walkerParams.MidLoopUnitX = 0;
7350         walkerParams.MidLoopUnitY = 0;
7351     }
7352     else
7353     if (dependencyPattern == dependencyWavefront26ZigDegree)
7354     {
7355         int32_t size_x = threadSpaceWidth;//(threadSpaceWidth + 1)>> 1;
7356         int32_t size_y = threadSpaceHeight;//threadSpaceHeight << 1;
7357 
7358         // Global
7359         walkerParams.GlobalResolution.x = size_x;
7360         walkerParams.GlobalResolution.y = size_y;
7361         walkerParams.GlobalStart.x = 0;
7362         walkerParams.GlobalStart.y = 0;
7363         walkerParams.GlobalOutlerLoopStride.x = size_x;
7364         walkerParams.GlobalOutlerLoopStride.y = 0;
7365         walkerParams.GlobalInnerLoopUnit.x = 0;
7366         walkerParams.GlobalInnerLoopUnit.y = size_y;
7367 
7368         // Local
7369         walkerParams.BlockResolution.x = size_x;
7370         walkerParams.BlockResolution.y = size_y;
7371         walkerParams.LocalStart.x = 0;
7372         walkerParams.LocalStart.y = 0;
7373         walkerParams.LocalOutLoopStride.x = 1;
7374         walkerParams.LocalOutLoopStride.y = 0;
7375         walkerParams.LocalInnerLoopUnit.x = -2;
7376         walkerParams.LocalInnerLoopUnit.y = 4;
7377 
7378         // Mid
7379         walkerParams.MiddleLoopExtraSteps = 3;
7380         walkerParams.MidLoopUnitX = 0;
7381         walkerParams.MidLoopUnitY = 1;
7382     }
7383     else
7384     if (dependencyPattern == dependencyWavefront45DDegree)
7385     {
7386         // Global
7387         walkerParams.GlobalResolution.x = threadSpaceWidth;
7388         walkerParams.GlobalResolution.y = threadSpaceHeight;
7389         walkerParams.GlobalStart.x = 0;
7390         walkerParams.GlobalStart.y = 0;
7391         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7392         walkerParams.GlobalOutlerLoopStride.y = 0;
7393         walkerParams.GlobalInnerLoopUnit.x = 0;
7394         walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7395 
7396         // Local
7397         walkerParams.BlockResolution.x = threadSpaceWidth;
7398         walkerParams.BlockResolution.y = threadSpaceHeight;
7399         walkerParams.LocalStart.x = threadSpaceWidth;
7400         walkerParams.LocalStart.y = 0;
7401         walkerParams.LocalOutLoopStride.x = 1;
7402         walkerParams.LocalOutLoopStride.y = 0;
7403         walkerParams.LocalInnerLoopUnit.x = -1;
7404         walkerParams.LocalInnerLoopUnit.y = 1;
7405 
7406         // Mid
7407         walkerParams.MiddleLoopExtraSteps = 0;
7408         walkerParams.MidLoopUnitX = 0;
7409         walkerParams.MidLoopUnitY = 0;
7410         if (colorCountMinusOne > 0)
7411         {
7412             walkerParams.dwLocalLoopExecCount = localLoopExecCount;
7413         }
7414     }
7415     else
7416     if (dependencyPattern == dependencyWavefront26DDegree)
7417     {
7418         // Global
7419         walkerParams.GlobalResolution.x = threadSpaceWidth;
7420         walkerParams.GlobalResolution.y = threadSpaceHeight;
7421         walkerParams.GlobalStart.x = 0;
7422         walkerParams.GlobalStart.y = 0;
7423         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7424         walkerParams.GlobalOutlerLoopStride.y = 0;
7425         walkerParams.GlobalInnerLoopUnit.x = 0;
7426         walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7427         // Local
7428         walkerParams.BlockResolution.x = threadSpaceWidth;
7429         walkerParams.BlockResolution.y = threadSpaceHeight;
7430         walkerParams.LocalStart.x = threadSpaceWidth;
7431         walkerParams.LocalStart.y = 0;
7432         walkerParams.LocalOutLoopStride.x = 1;
7433         walkerParams.LocalOutLoopStride.y = 0;
7434         walkerParams.LocalInnerLoopUnit.x = -2;
7435         walkerParams.LocalInnerLoopUnit.y = 1;
7436         // Mid
7437         walkerParams.MiddleLoopExtraSteps = 0;
7438         walkerParams.MidLoopUnitX = 0;
7439         walkerParams.MidLoopUnitY = 0;
7440 
7441         if (colorCountMinusOne > 0)
7442         {
7443             walkerParams.dwLocalLoopExecCount = localLoopExecCount;
7444         }
7445     }
7446     else
7447     if (dependencyPattern == dependencyWavefront45XDDegree)
7448     {
7449         // Global
7450         walkerParams.GlobalResolution.x = threadSpaceWidth;
7451         walkerParams.GlobalResolution.y = threadSpaceHeight;
7452         walkerParams.GlobalStart.x = 0;
7453         walkerParams.GlobalStart.y = 0;
7454         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7455         walkerParams.GlobalOutlerLoopStride.y = 0;
7456         walkerParams.GlobalInnerLoopUnit.x = 0;
7457         walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7458 
7459         // Local
7460         walkerParams.BlockResolution.x = threadSpaceWidth;
7461         walkerParams.BlockResolution.y = threadSpaceHeight;
7462         walkerParams.LocalStart.x = threadSpaceWidth;
7463         walkerParams.LocalStart.y = 0;
7464         walkerParams.LocalOutLoopStride.x = 1;
7465         walkerParams.LocalOutLoopStride.y = 0;
7466         walkerParams.LocalInnerLoopUnit.x = -1;
7467         walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1;
7468 
7469         // Mid
7470         walkerParams.MiddleLoopExtraSteps = childThreadNumber;
7471         walkerParams.MidLoopUnitX = 0;
7472         walkerParams.MidLoopUnitY = 1;
7473         if (colorCountMinusOne > 0)
7474         {
7475             walkerParams.dwLocalLoopExecCount = localLoopExecCount;
7476         }
7477     }
7478     else
7479     if (dependencyPattern == dependencyWavefront26XDDegree)
7480     {
7481 
7482         // Global
7483         walkerParams.GlobalResolution.x = threadSpaceWidth;
7484         walkerParams.GlobalResolution.y = threadSpaceHeight;
7485         walkerParams.GlobalStart.x = 0;
7486         walkerParams.GlobalStart.y = 0;
7487         walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7488         walkerParams.GlobalOutlerLoopStride.y = 0;
7489         walkerParams.GlobalInnerLoopUnit.x = 0;
7490         walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7491         // Local
7492         walkerParams.BlockResolution.x = threadSpaceWidth;
7493         walkerParams.BlockResolution.y = threadSpaceHeight;
7494         walkerParams.LocalStart.x = threadSpaceWidth;
7495         walkerParams.LocalStart.y = 0;
7496         walkerParams.LocalOutLoopStride.x = 1;
7497         walkerParams.LocalOutLoopStride.y = 0;
7498         walkerParams.LocalInnerLoopUnit.x = -2;
7499         walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1;
7500         // Mid
7501         walkerParams.MiddleLoopExtraSteps = childThreadNumber;
7502         walkerParams.MidLoopUnitX = 0;
7503         walkerParams.MidLoopUnitY = 1;
7504 
7505         if (colorCountMinusOne > 0)
7506         {
7507             walkerParams.dwLocalLoopExecCount = localLoopExecCount;
7508         }
7509     }
7510     else
7511     {
7512         CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported walking pattern is observed\n");
7513         eStatus = MOS_STATUS_INVALID_PARAMETER;
7514     }
7515     return eStatus;
7516 }
7517 
IsDegree45Needed()7518 bool CodechalEncHevcStateG11::IsDegree45Needed()
7519 {
7520     if(m_numberConcurrentGroup == 1 && m_numberEncKernelSubThread == 1)
7521     {
7522         return false;
7523     }
7524     return true;
7525 }
7526 
DecideConcurrentGroupAndWaveFrontNumber()7527 void CodechalEncHevcStateG11::DecideConcurrentGroupAndWaveFrontNumber()
7528 {
7529     uint32_t          shift       = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
7530     uint32_t          widthInLcu  = MOS_ROUNDUP_SHIFT((m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1), shift);
7531     uint32_t          heightInLcu = MOS_ROUNDUP_SHIFT((m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1), shift);
7532     DependencyPattern walkerDegree;
7533 
7534     //As per kernel ULT,for all non TU1 cases m_numberEncKernelSubThread should be set to 1
7535     // LCU32 has no multiple thread support,
7536     if (!m_isMaxLcu64 || m_hevcSeqParams->TargetUsage != 1)
7537     {
7538         m_numberEncKernelSubThread = 1;
7539     }
7540 
7541     while(heightInLcu / m_numberConcurrentGroup == 0)
7542     {
7543         m_numberConcurrentGroup = m_numberConcurrentGroup >> 1;
7544         if(m_numberConcurrentGroup == 0)
7545         {
7546             // Try out all values and now have to use the default ones.
7547             // Concurrent group and wave-front split must be enabled together
7548             m_numberConcurrentGroup = 1;
7549             break;
7550         }
7551     }
7552 
7553     if (m_numberConcurrentGroup>1)
7554     {
7555         m_numWavefrontInOneRegion = 0;
7556         while(m_numWavefrontInOneRegion == 0)
7557         {
7558             uint32_t shift = m_degree45Needed ? 0 : 1;
7559 
7560             m_numWavefrontInOneRegion =
7561                 (widthInLcu + ((heightInLcu - 1) << shift) + m_numberConcurrentGroup - 1) / m_numberConcurrentGroup;
7562 
7563             if(m_numWavefrontInOneRegion > 0 )
7564             {
7565                 // this is a valid setting and number of regisions is greater than or equal to 1
7566                 break;
7567             }
7568             m_numberConcurrentGroup = m_numberConcurrentGroup >> 1;
7569             if(m_numberConcurrentGroup ==0 )
7570             {
7571                 // Try out all values and now have to use the default ones.
7572                 m_numberConcurrentGroup = 1;
7573                 break;
7574             }
7575         }
7576     }
7577     else
7578     {
7579         m_numWavefrontInOneRegion = 0;
7580     }
7581 
7582     m_numberEncKernelSubThread = MOS_MIN(m_numberEncKernelSubThread, m_hevcThreadTaskDataNum);
7583 
7584     return;
7585 }
7586 
UserFeatureKeyReport()7587 MOS_STATUS CodechalEncHevcStateG11::UserFeatureKeyReport()
7588 {
7589     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7590 
7591     CODECHAL_ENCODE_FUNCTION_ENTER;
7592 
7593     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::UserFeatureKeyReport());
7594 
7595     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_REGION_NUMBER_ID, m_numberConcurrentGroup, m_osInterface->pOsContext);
7596     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_SUBTHREAD_NUM_ID, m_numberEncKernelSubThread, m_osInterface->pOsContext);
7597 #if (_DEBUG || _RELEASE_INTERNAL)
7598     CodecHalEncode_WriteKey64(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE, m_kmdVeOveride.Value, m_osInterface->pOsContext);
7599     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENABLE_ENCODE_VE_CTXSCHEDULING_ID, MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface), m_osInterface->pOsContext);
7600     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENCODE_USED_VDBOX_NUM_ID, m_numPipe, m_osInterface->pOsContext);
7601 #endif
7602 
7603     if (m_pakOnlyTest)
7604     {
7605         CodecHalEncode_WriteStringKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_PAK_ONLY_ID, m_pakOnlyDataFolder, strlen(m_pakOnlyDataFolder), m_osInterface->pOsContext);
7606     }
7607 
7608     return eStatus;
7609 }
7610 
SetupSwScoreBoard(CodechalEncodeSwScoreboard::KernelParams * params)7611 MOS_STATUS CodechalEncHevcStateG11::SetupSwScoreBoard(CodechalEncodeSwScoreboard::KernelParams *params)
7612 {
7613     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7614 
7615     if (Mos_ResourceIsNull(&m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource))
7616     {
7617         MOS_ZeroMemory(m_swScoreboardState->GetCurSwScoreboardSurface(), sizeof(*m_swScoreboardState->GetCurSwScoreboardSurface()));
7618 
7619         MOS_ALLOC_GFXRES_PARAMS     allocParamsForBuffer2D;
7620         MOS_ZeroMemory(&allocParamsForBuffer2D, sizeof(MOS_ALLOC_GFXRES_PARAMS));
7621         allocParamsForBuffer2D.Type     = MOS_GFXRES_2D;
7622         allocParamsForBuffer2D.TileType = MOS_TILE_LINEAR;
7623         allocParamsForBuffer2D.Format   = Format_R32U;
7624         allocParamsForBuffer2D.dwWidth  = params->swScoreboardSurfaceWidth;
7625         allocParamsForBuffer2D.dwHeight = params->swScoreboardSurfaceHeight;
7626         allocParamsForBuffer2D.pBufName = "SW Scoreboard Init buffer";
7627 
7628         eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
7629             m_osInterface,
7630             &allocParamsForBuffer2D,
7631             &m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource);
7632 
7633         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo(
7634             m_osInterface,
7635             m_swScoreboardState->GetCurSwScoreboardSurface()));
7636     }
7637 
7638     if(m_swScoreboard == nullptr)
7639     {
7640         m_swScoreboard = (uint8_t*)MOS_AllocAndZeroMemory(params->scoreboardWidth * sizeof(uint32_t)*params->scoreboardHeight);
7641         InitSWScoreboard(m_swScoreboard, params->scoreboardWidth, params->scoreboardHeight,
7642             m_swScoreboardState->GetDependencyPattern(),
7643             (char)(params->numberOfChildThread));
7644     }
7645 
7646     MOS_LOCK_PARAMS lockFlags;
7647 
7648     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
7649     lockFlags.WriteOnly = 1;
7650     uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(
7651         m_osInterface,
7652         &m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource,
7653         &lockFlags);
7654     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
7655 
7656     for(uint32_t h = 0; h < params->scoreboardHeight; h++)
7657     {
7658         uint32_t s = params->scoreboardWidth * sizeof(uint32_t);
7659         MOS_SecureMemcpy(data, s, &m_swScoreboard[h*s], s);
7660         data += m_swScoreboardState->GetCurSwScoreboardSurface()->dwPitch;
7661     }
7662 
7663     m_osInterface->pfnUnlockResource(
7664         m_osInterface,
7665         &m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource);
7666 
7667     return eStatus;
7668 }
7669 
SetDependency(uint8_t & numDependencies,char * scoreboardDeltaX,char * scoreboardDeltaY,uint32_t dependencyPattern,char childThreadNumber)7670 void CodechalEncHevcStateG11::SetDependency(
7671     uint8_t &numDependencies,
7672     char* scoreboardDeltaX,
7673     char* scoreboardDeltaY,
7674     uint32_t dependencyPattern,
7675     char childThreadNumber)
7676 {
7677     if (dependencyPattern == dependencyWavefrontHorizontal)
7678     {
7679         numDependencies = m_numDependencyHorizontal;
7680         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependencyHorizontal, m_dxWavefrontHorizontal, m_numDependencyHorizontal);
7681         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependencyHorizontal, m_dyWavefrontHorizontal, m_numDependencyHorizontal);
7682     }
7683     else if (dependencyPattern == dependencyWavefrontVertical)
7684     {
7685         numDependencies = m_numDependencyVertical;
7686         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependencyVertical, m_dxWavefrontVertical, m_numDependencyVertical);
7687         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependencyVertical, m_dyWavefrontVertical, m_numDependencyVertical);
7688     }
7689     else if (dependencyPattern == dependencyWavefront45Degree)
7690     {
7691         numDependencies = m_numDependency45Degree;
7692         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45Degree, m_dxWavefront45Degree, m_numDependency45Degree);
7693         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45Degree, m_dyWavefront45Degree, m_numDependency45Degree);
7694     }
7695     else if (dependencyPattern == dependencyWavefront26Degree ||
7696              dependencyPattern == dependencyWavefront26DDegree)
7697     {
7698         numDependencies = m_numDependency26Degree;
7699         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26Degree, m_dxWavefront26Degree, m_numDependency26Degree);
7700         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26Degree, m_dyWavefront26Degree, m_numDependency26Degree);
7701     }
7702     else if (dependencyPattern == dependencyWavefront45XDegree)
7703     {
7704         numDependencies = m_numDependency45xDegree;
7705         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45xDegree, m_dxWavefront45xDegree, m_numDependency45xDegree);
7706         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45xDegree, m_dyWavefront45xDegree, m_numDependency45xDegree);
7707         numDependencies = childThreadNumber + 2;
7708         scoreboardDeltaY[0] = childThreadNumber;
7709     }
7710     else if (dependencyPattern == dependencyWavefront26XDegree)
7711     {
7712         numDependencies = m_numDependency26xDegree;
7713         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26xDegree, m_dxWavefront26xDegree, m_numDependency26xDegree);
7714         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26xDegree, m_dyWavefront26xDegree, m_numDependency26xDegree);
7715         numDependencies = childThreadNumber + 3;
7716         scoreboardDeltaY[0] = childThreadNumber;
7717     }
7718     else if ((dependencyPattern == dependencyWavefront45XDegreeAlt) ||
7719         (dependencyPattern == dependencyWavefront45XDDegree))
7720     {
7721         numDependencies = m_numDependency45xDegreeAlt;
7722         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45xDegreeAlt, m_dxWavefront45xDegreeAlt, m_numDependency45xDegreeAlt);
7723         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45xDegreeAlt, m_dyWavefront45xDegreeAlt, m_numDependency45xDegreeAlt);
7724         scoreboardDeltaY[0] = childThreadNumber;
7725     }
7726     else if ((dependencyPattern == dependencyWavefront26XDegreeAlt) ||
7727         (dependencyPattern == dependencyWavefront26XDDegree))
7728     {
7729         numDependencies = m_numDependency26xDegreeAlt;
7730         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26xDegreeAlt, m_dxWavefront26xDegreeAlt, m_numDependency26xDegreeAlt);
7731         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26xDegreeAlt, m_dyWavefront26xDegreeAlt, m_numDependency26xDegreeAlt);
7732         scoreboardDeltaY[0] = childThreadNumber;
7733     }
7734     else if (dependencyPattern == dependencyWavefront45XVp9Degree)
7735     {
7736         numDependencies = m_numDependency45xVp9Degree;
7737         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45xVp9Degree, m_dxWavefront45xVp9Degree, m_numDependency45xVp9Degree);
7738         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45xVp9Degree, m_dyWavefront45xVp9Degree, m_numDependency45xVp9Degree);
7739     }
7740     else if (dependencyPattern == dependencyWavefront26ZDegree)
7741     {
7742         numDependencies = m_numDependency26zDegree;
7743         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26zDegree, m_dxWavefront26zDegree, m_numDependency26zDegree);
7744         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26zDegree, m_dyWavefront26zDegree, m_numDependency26zDegree);
7745     }
7746     else if (dependencyPattern == dependencyWavefront26ZigDegree)
7747     {
7748         numDependencies = m_numDependency26ZigDegree;
7749         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26ZigDegree, m_dxWavefront26ZigDegree, m_numDependency26ZigDegree);
7750         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26ZigDegree, m_dyWavefront26ZigDegree, m_numDependency26ZigDegree);
7751     }
7752     else if (dependencyPattern == dependencyWavefront45DDegree)
7753     {
7754         numDependencies = m_numDependency45Degree;
7755         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45Degree, m_dxWavefront45Degree, m_numDependency45Degree);
7756         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45Degree, m_dyWavefront45Degree, m_numDependency45Degree);
7757     }
7758     else
7759     {
7760         numDependencies = m_numDependencyNone;
7761         MOS_SecureMemcpy(scoreboardDeltaX, m_numDependencyNone, m_dxWavefrontNone, m_numDependencyNone);
7762         MOS_SecureMemcpy(scoreboardDeltaY, m_numDependencyNone, m_dyWavefrontNone, m_numDependencyNone);
7763     }
7764 }
7765 
7766 // ========================================================================================
7767 // FUNCTION:        InitSWScoreboard
7768 // DESCRIPTION:        Initialize software scoreboard for a specific dependency pattern.
7769 // INPUTS:            scoreboardWidth - Width of scoreboard in Entries
7770 //                    scoreboardHeight - Height of scoreboard in Entries
7771 //                    dependencyPattern - The Enumeration of the Dependency Pattern
7772 // OUTPUTS:            scoreboard - Pointer to scoreboard in Memory
7773 // ========================================================================================
InitSWScoreboard(uint8_t * scoreboard,uint32_t scoreboardWidth,uint32_t scoreboardHeight,uint32_t dependencyPattern,char childThreadNumber)7774 void CodechalEncHevcStateG11::InitSWScoreboard(uint8_t* scoreboard, uint32_t scoreboardWidth, uint32_t scoreboardHeight, uint32_t dependencyPattern, char childThreadNumber)
7775 {
7776     // 1. Select Dependency Pattern
7777     uint8_t numDependencies;
7778     char scoreboardDeltaX[m_maxNumDependency];
7779     char scoreboardDeltaY[m_maxNumDependency];
7780     memset(scoreboardDeltaX, 0, sizeof(scoreboardDeltaX));
7781     memset(scoreboardDeltaY, 0, sizeof(scoreboardDeltaY));
7782 
7783     SetDependency(numDependencies, scoreboardDeltaX, scoreboardDeltaY, dependencyPattern, childThreadNumber);
7784 
7785     // 2. Initialize scoreboard (CPU Based)
7786     int32_t dependentLocationX = 0;
7787     int32_t dependentLocationY = 0;
7788     uint32_t* scoreboardInDws = (uint32_t*)scoreboard;
7789     int32_t totalThreadNumber = childThreadNumber + 1;
7790     for (int32_t y = 0; y < (int32_t)scoreboardHeight; y += totalThreadNumber)
7791     {
7792         for (int32_t x = 0; x < (int32_t)scoreboardWidth; x++)
7793         {
7794             scoreboardInDws[y*scoreboardWidth + x] = 0;
7795 
7796             // Add dependencies accordingly
7797             for (int32_t i = 0; i < numDependencies; i++)
7798             {
7799                 dependentLocationX = x + scoreboardDeltaX[i];
7800                 dependentLocationY = y + scoreboardDeltaY[i];
7801                 if ((dependentLocationX < 0) || (dependentLocationY < 0) ||
7802                     (dependentLocationX >= (int32_t)scoreboardWidth) ||
7803                     (dependentLocationY >= (int32_t)scoreboardHeight))
7804                 {
7805                     // Do not add dependency because thread does not exist
7806                 }
7807                 else
7808                 {
7809                     scoreboardInDws[y*scoreboardWidth + x] |= (1 << i);
7810                 }
7811             } // End NumDep
7812         } // End x
7813 
7814         for (int32_t n = y + 1; n<y + totalThreadNumber; n++)
7815         {
7816             for (int32_t k = 0; k < (int32_t)scoreboardWidth; k++)
7817             {
7818                 scoreboardInDws[n*scoreboardWidth + k] = scoreboardInDws[y*scoreboardWidth + k];
7819             }
7820         }
7821 
7822     } // End y
7823 }
7824 
CreateMhwParams()7825 void CodechalEncHevcStateG11::CreateMhwParams()
7826 {
7827     m_sliceStateParams = MOS_New(MHW_VDBOX_HEVC_SLICE_STATE_G11);
7828     m_pipeModeSelectParams = MOS_New(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11);
7829     m_pipeBufAddrParams = MOS_New(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS_G11);
7830 }
7831 
CalculatePictureStateCommandSize()7832 MOS_STATUS CodechalEncHevcStateG11::CalculatePictureStateCommandSize()
7833 {
7834     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7835 
7836     CODECHAL_ENCODE_FUNCTION_ENTER;
7837 
7838     MHW_VDBOX_STATE_CMDSIZE_PARAMS_G11 stateCmdSizeParams;
7839     CODECHAL_ENCODE_CHK_STATUS_RETURN(
7840         m_hwInterface->GetHxxStateCommandSize(
7841             CODECHAL_ENCODE_MODE_HEVC,
7842             &m_defaultPictureStatesSize,
7843             &m_defaultPicturePatchListSize,
7844             &stateCmdSizeParams));
7845 
7846     return eStatus;
7847 }
7848 
AddHcpPipeBufAddrCmd(PMOS_COMMAND_BUFFER cmdBuffer)7849 MOS_STATUS CodechalEncHevcStateG11::AddHcpPipeBufAddrCmd(
7850     PMOS_COMMAND_BUFFER  cmdBuffer)
7851 {
7852     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7853 
7854     CODECHAL_ENCODE_FUNCTION_ENTER;
7855 
7856     *m_pipeBufAddrParams = {};
7857     SetHcpPipeBufAddrParams(*m_pipeBufAddrParams);
7858 #ifdef _MMC_SUPPORTED
7859     m_mmcState->SetPipeBufAddr(m_pipeBufAddrParams);
7860 #endif
7861     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(cmdBuffer, m_pipeBufAddrParams));
7862 
7863     return eStatus;
7864 }
7865 
SetGpuCtxCreatOption()7866 MOS_STATUS CodechalEncHevcStateG11::SetGpuCtxCreatOption()
7867 {
7868     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7869 
7870     CODECHAL_ENCODE_FUNCTION_ENTER;
7871 
7872     if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
7873     {
7874         CodechalEncoderState::SetGpuCtxCreatOption();
7875     }
7876     else
7877     {
7878         m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS_ENHANCED);
7879         CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt);
7880 
7881         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ConstructParmsForGpuCtxCreation(
7882             m_scalabilityState,
7883             (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
7884     }
7885 
7886     return eStatus;
7887 }
7888 
SetTileData(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 * tileCodingParams,uint32_t bitstreamBufSize)7889 MOS_STATUS CodechalEncHevcStateG11::SetTileData(
7890     MHW_VDBOX_HCP_TILE_CODING_PARAMS_G11*   tileCodingParams,
7891     uint32_t                                bitstreamBufSize)
7892 {
7893     CODECHAL_ENCODE_FUNCTION_ENTER;
7894 
7895     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
7896 
7897     if (!m_hevcPicParams->tiles_enabled_flag)
7898     {
7899         return eStatus;
7900     }
7901 
7902     uint32_t colBd[100] = { 0 };
7903     uint32_t num_tile_columns = m_hevcPicParams->num_tile_columns_minus1 + 1;
7904     for (uint32_t i = 0; i < num_tile_columns; i++)
7905     {
7906         colBd[i + 1] = colBd[i] + m_hevcPicParams->tile_column_width[i];
7907     }
7908 
7909     uint32_t rowBd[100] = { 0 };
7910     uint32_t num_tile_rows = m_hevcPicParams->num_tile_rows_minus1 + 1;
7911     for (uint32_t i = 0; i < num_tile_rows; i++)
7912     {
7913         rowBd[i + 1] = rowBd[i] + m_hevcPicParams->tile_row_height[i];
7914     }
7915 
7916     m_numTiles = num_tile_rows * num_tile_columns;
7917 
7918     uint32_t const uiNumCuRecordTab[] = { 1, 4, 16, 64 }; //LCU: 8x8->1, 16x16->4, 32x32->16, 64x64->64
7919     uint32_t       numCuRecord        = uiNumCuRecordTab[MOS_MIN(3, m_hevcSeqParams->log2_max_coding_block_size_minus3)];
7920     uint32_t    bitstreamByteOffset = 0, saoRowstoreOffset = 0, cuLevelStreamoutOffset = 0, sseRowstoreOffset = 0;
7921     int32_t     frameWidthInMinCb  = m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1;
7922     int32_t     frameHeightInMinCb = m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1;
7923     int32_t     shift              = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
7924     uint32_t    NumLCUInPic        = 0;
7925 
7926     for (uint32_t i = 0; i < num_tile_rows; i++)
7927     {
7928         for (uint32_t j = 0; j < num_tile_columns; j++)
7929         {
7930             NumLCUInPic += m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j];
7931         }
7932     }
7933 
7934     uint32_t  numSliceInTile = 0;
7935     for (uint32_t uiNumLCUsInTiles = 0, i = 0; i < num_tile_rows; i++)
7936     {
7937         for (uint32_t j = 0; j < num_tile_columns; j++)
7938         {
7939             uint32_t idx = i * num_tile_columns + j;
7940             uint32_t numLCUInTile = m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j];
7941 
7942             tileCodingParams[idx].TileStartLCUX = colBd[j];
7943             tileCodingParams[idx].TileStartLCUY = rowBd[i];
7944 
7945             tileCodingParams[idx].TileColumnStoreSelect = j % 2;
7946             tileCodingParams[idx].TileRowStoreSelect = i % 2;
7947 
7948             if (j != num_tile_columns - 1)
7949             {
7950                 tileCodingParams[idx].TileWidthInMinCbMinus1 = (m_hevcPicParams->tile_column_width[j] << shift) - 1;
7951                 tileCodingParams[idx].IsLastTileofRow = false;
7952             }
7953             else
7954             {
7955                 tileCodingParams[idx].TileWidthInMinCbMinus1 = (frameWidthInMinCb - (colBd[j] << shift)) - 1;
7956                 tileCodingParams[idx].IsLastTileofRow = true;
7957 
7958             }
7959 
7960             if (i != num_tile_rows - 1)
7961             {
7962                 tileCodingParams[idx].IsLastTileofColumn = false;
7963                 tileCodingParams[idx].TileHeightInMinCbMinus1 = (m_hevcPicParams->tile_row_height[i] << shift) - 1;
7964             }
7965             else
7966             {
7967                 tileCodingParams[idx].TileHeightInMinCbMinus1 = (frameHeightInMinCb - (rowBd[i] << shift)) - 1;
7968                 tileCodingParams[idx].IsLastTileofColumn = true;
7969             }
7970 
7971             tileCodingParams[idx].NumOfTilesInFrame       = m_numTiles;
7972             tileCodingParams[idx].NumOfTileColumnsInFrame = num_tile_columns;
7973             tileCodingParams[idx].CuRecordOffset = MOS_ALIGN_CEIL(((numCuRecord * uiNumLCUsInTiles) * m_hcpInterface->GetHevcEncCuRecordSize()),
7974                 CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
7975             tileCodingParams[idx].NumberOfActiveBePipes   = (m_numPipe > 1) ? m_numPipe : 1;
7976 
7977             tileCodingParams[idx].PakTileStatisticsOffset = m_sizeOfHcpPakFrameStats * idx / CODECHAL_CACHELINE_SIZE;
7978             tileCodingParams[idx].TileSizeStreamoutOffset = idx;
7979             tileCodingParams[idx].Vp9ProbabilityCounterStreamoutOffset = 0;
7980             tileCodingParams[idx].presHcpSyncBuffer = &m_resHcpScalabilitySyncBuffer.sResource;
7981             tileCodingParams[idx].CuLevelStreamoutOffset = cuLevelStreamoutOffset;
7982             tileCodingParams[idx].SliceSizeStreamoutOffset = numSliceInTile;
7983             tileCodingParams[idx].SseRowstoreOffset = sseRowstoreOffset;
7984             tileCodingParams[idx].BitstreamByteOffset = bitstreamByteOffset;
7985             tileCodingParams[idx].SaoRowstoreOffset = saoRowstoreOffset;
7986 
7987             cuLevelStreamoutOffset += MOS_ALIGN_CEIL((tileCodingParams[idx].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[idx].TileHeightInMinCbMinus1 + 1) * 16,  CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
7988             sseRowstoreOffset += ((m_hevcPicParams->tile_column_width[j] + 3) * m_sizeOfSseSrcPixelRowStoreBufferPerLcu) / CODECHAL_CACHELINE_SIZE;
7989             saoRowstoreOffset += (MOS_ALIGN_CEIL(m_hevcPicParams->tile_column_width[j], 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU) / CODECHAL_CACHELINE_SIZE;
7990             uint64_t totalSizeTemp = (uint64_t)bitstreamBufSize * (uint64_t)numLCUInTile;
7991             uint32_t bitStreamSizePerTile = (uint32_t)(totalSizeTemp / (uint64_t)NumLCUInPic) + ((totalSizeTemp % (uint64_t)NumLCUInPic) ? 1 : 0);
7992             bitstreamByteOffset += MOS_ALIGN_CEIL(bitStreamSizePerTile, CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
7993             uiNumLCUsInTiles += numLCUInTile;
7994 
7995             for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
7996             {
7997                 bool lastSliceInTile = false, sliceInTile = false;
7998                 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
7999                     &tileCodingParams[idx],
8000                     &sliceInTile,
8001                     &lastSliceInTile));
8002                 numSliceInTile += (sliceInTile ? 1 : 0);
8003             }
8004         }
8005         // same row store buffer for different tile rows.
8006         saoRowstoreOffset = 0;
8007         sseRowstoreOffset = 0;
8008     }
8009 
8010     return eStatus;
8011 }
8012 
IsSliceInTile(uint32_t sliceNumber,PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 currentTile,bool * sliceInTile,bool * lastSliceInTile)8013 MOS_STATUS CodechalEncHevcStateG11::IsSliceInTile(
8014     uint32_t                                sliceNumber,
8015     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11   currentTile,
8016     bool                                    *sliceInTile,
8017     bool                                    *lastSliceInTile)
8018 {
8019     CODECHAL_ENCODE_FUNCTION_ENTER;
8020 
8021     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
8022 
8023     CODECHAL_ENCODE_CHK_NULL_RETURN(currentTile);
8024     CODECHAL_ENCODE_CHK_NULL_RETURN(sliceInTile);
8025     CODECHAL_ENCODE_CHK_NULL_RETURN(lastSliceInTile);
8026 
8027     uint32_t shift            = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
8028     uint32_t residual = (1 << shift) - 1;
8029     uint32_t frameWidthInLCU  = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift;
8030     uint32_t frameHeightInLCU = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift;
8031 
8032     PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = &m_hevcSliceParams[sliceNumber];
8033     uint32_t sliceStartLCU = hevcSlcParams->slice_segment_address;
8034     uint32_t sliceLCUx = sliceStartLCU % frameWidthInLCU;
8035     uint32_t sliceLCUy = sliceStartLCU / frameWidthInLCU;
8036 
8037     uint32_t tile_column_width = (currentTile->TileWidthInMinCbMinus1 + 1 + residual) >> shift;
8038     uint32_t tile_row_height = (currentTile->TileHeightInMinCbMinus1 + 1 + residual) >> shift;
8039     if (sliceLCUx <  currentTile->TileStartLCUX ||
8040         sliceLCUy <  currentTile->TileStartLCUY ||
8041         sliceLCUx >= currentTile->TileStartLCUX + tile_column_width ||
8042         sliceLCUy >= currentTile->TileStartLCUY + tile_row_height
8043         )
8044     {
8045         // slice start is not in the tile boundary
8046         *lastSliceInTile = *sliceInTile = false;
8047         return eStatus;
8048     }
8049 
8050     sliceLCUx += (hevcSlcParams->NumLCUsInSlice - 1) % tile_column_width;
8051     sliceLCUy += (hevcSlcParams->NumLCUsInSlice - 1) / tile_column_width;
8052 
8053     if (sliceLCUx >= currentTile->TileStartLCUX + tile_column_width)
8054     {
8055         sliceLCUx -= tile_column_width;
8056         sliceLCUy++;
8057     }
8058 
8059     if (sliceLCUx <  currentTile->TileStartLCUX ||
8060         sliceLCUy <  currentTile->TileStartLCUY ||
8061         sliceLCUx >= currentTile->TileStartLCUX + tile_column_width ||
8062         sliceLCUy >= currentTile->TileStartLCUY + tile_row_height
8063         )
8064     {
8065         // last LCU of the slice is out of the tile boundary
8066         *lastSliceInTile = *sliceInTile = false;
8067         return eStatus;
8068     }
8069 
8070     *sliceInTile = true;
8071 
8072     sliceLCUx++;
8073     sliceLCUy++;
8074 
8075     // the end of slice is at the boundary of tile
8076     *lastSliceInTile = (
8077         sliceLCUx == currentTile->TileStartLCUX + tile_column_width &&
8078         sliceLCUy == currentTile->TileStartLCUY + tile_row_height);
8079 
8080     return eStatus;
8081 }
8082 
8083 #if USE_CODECHAL_DEBUG_TOOL
8084 
8085 //MOS_STATUS CodechalEncHevcStateG11::CodecHal_DbgDumpHEVCMbEncCurbeG11(
8086 //    CodechalDebugInterface         *pDebugInterface,
8087 //    CODECHAL_MEDIA_STATE_TYPE       Function,
8088 //   PMOS_RESOURCE                   presDBuffer)
8089 //{
8090 
8091 //#define WRITE_CURBE_FIELD_TO_FILE(field) {\
8092 //    oss << "field = " << +pCurbeData->field << std::endl;}
8093 //
8094 //    PMOS_INTERFACE              m_osInterface = nullptr;
8095 //    MOS_LOCK_PARAMS             LockFlags;
8096 //    CodechalEncHevcStateG11::MBENC_COMBINED_BUFFER1 *pEncComBuf1 = nullptr;
8097 //
8098 //    CODECHAL_DEBUG_FUNCTION_ENTER;
8099 //
8100 //    CODECHAL_DEBUG_CHK_NULL(pDebugInterface);
8101 //    CODECHAL_DEBUG_CHK_NULL(pDebugInterface->pOsInterface);
8102 //    CODECHAL_DEBUG_CHK_NULL(pDebugInterface->pHwInterface);
8103 //    m_osInterface = pDebugInterface->pOsInterface;
8104 //
8105 //    if (!pDebugInterface->DumpIsEnabled(CodechalDbgAttr::attrCurbe))
8106 //    {
8107 //        return MOS_STATUS_SUCCESS;
8108 //    }
8109 //
8110 //    MOS_ZeroMemory(&LockFlags, sizeof(MOS_LOCK_PARAMS));
8111 //    LockFlags.ReadOnly = 1;
8112 //
8113 //    pEncComBuf1 = (CodechalEncHevcStateG11::MBENC_COMBINED_BUFFER1*)m_osInterface->pfnLockResource(
8114 //        m_osInterface,
8115 //        presDBuffer,
8116 //       &LockFlags);
8117 //
8118 //       CodechalEncHevcStateG11::MBENC_CURBE* pCurbeData = &pEncComBuf1->Curbe;
8119 //
8120 //       std::ostringstream oss;
8121 //        oss.setf(std::ios::showbase | std::ios::uppercase);
8122 //
8123 //        oss << "# CURBE Parameters:" << std::endl;
8124 //
8125 //        WRITE_CURBE_FIELD_TO_FILE(FrameWidthInSamples);
8126 //        WRITE_CURBE_FIELD_TO_FILE(FrameHeightInSamples);
8127 //
8128 //        WRITE_CURBE_FIELD_TO_FILE(Log2MaxCUSize);
8129 //        WRITE_CURBE_FIELD_TO_FILE(Log2MinCUSize);
8130 //        WRITE_CURBE_FIELD_TO_FILE(Log2MaxTUSize);
8131 //        WRITE_CURBE_FIELD_TO_FILE(Log2MinTUSize);
8132 //        WRITE_CURBE_FIELD_TO_FILE(MaxIntraRdeIter);
8133 //        WRITE_CURBE_FIELD_TO_FILE(QPType);
8134 //        WRITE_CURBE_FIELD_TO_FILE(MaxTransformDepthInter);
8135 //        WRITE_CURBE_FIELD_TO_FILE(MaxTransformDepthIntra);
8136 //        WRITE_CURBE_FIELD_TO_FILE(Log2ParallelMergeLevel);
8137 //
8138 //        WRITE_CURBE_FIELD_TO_FILE(CornerNeighborPixel);
8139 //        WRITE_CURBE_FIELD_TO_FILE(IntraNeighborAvailFlags);
8140 //        WRITE_CURBE_FIELD_TO_FILE(ChromaFormatType);
8141 //        WRITE_CURBE_FIELD_TO_FILE(SubPelMode);
8142 //        WRITE_CURBE_FIELD_TO_FILE(InterSADMeasure);
8143 //        WRITE_CURBE_FIELD_TO_FILE(IntraSADMeasure);
8144 //        WRITE_CURBE_FIELD_TO_FILE(IntraPrediction);
8145 //        WRITE_CURBE_FIELD_TO_FILE(RefIDCostMode);
8146 //        WRITE_CURBE_FIELD_TO_FILE(TUBasedCostSetting);
8147 //
8148 //        WRITE_CURBE_FIELD_TO_FILE(ExplictModeEn);
8149 //        WRITE_CURBE_FIELD_TO_FILE(AdaptiveEn);
8150 //        WRITE_CURBE_FIELD_TO_FILE(EarlyImeSuccessEn);
8151 //        WRITE_CURBE_FIELD_TO_FILE(IntraSpeedMode);
8152 //        WRITE_CURBE_FIELD_TO_FILE(IMECostCentersSel);
8153 //        WRITE_CURBE_FIELD_TO_FILE(RDEQuantRoundValue);
8154 //        WRITE_CURBE_FIELD_TO_FILE(IMERefWindowSize);
8155 //        WRITE_CURBE_FIELD_TO_FILE(IntraComputeType);
8156 //        WRITE_CURBE_FIELD_TO_FILE(Depth0IntraPredition);
8157 //        WRITE_CURBE_FIELD_TO_FILE(TUDepthControl);
8158 //        WRITE_CURBE_FIELD_TO_FILE(IntraTuRecFeedbackDisable);
8159 //        WRITE_CURBE_FIELD_TO_FILE(MergeListBiDisable);
8160 //        WRITE_CURBE_FIELD_TO_FILE(EarlyImeStop);
8161 //
8162 //        WRITE_CURBE_FIELD_TO_FILE(FrameQP);
8163 //        WRITE_CURBE_FIELD_TO_FILE(FrameQPSign);
8164 //        WRITE_CURBE_FIELD_TO_FILE(ConcurrentGroupNum);
8165 //        WRITE_CURBE_FIELD_TO_FILE(NumofUnitInWaveFront);
8166 //
8167 //        WRITE_CURBE_FIELD_TO_FILE(LoadBalenceEnable);
8168 //        WRITE_CURBE_FIELD_TO_FILE(NumberofMultiFrame);
8169 //        WRITE_CURBE_FIELD_TO_FILE(Degree45);
8170 //        WRITE_CURBE_FIELD_TO_FILE(Break12Dependency);
8171 //        WRITE_CURBE_FIELD_TO_FILE(ThreadNumber);
8172 //
8173 //        WRITE_CURBE_FIELD_TO_FILE(Pic_init_qp_B);
8174 //        WRITE_CURBE_FIELD_TO_FILE(Pic_init_qp_P);
8175 //        WRITE_CURBE_FIELD_TO_FILE(Pic_init_qp_I);
8176 //
8177 //        WRITE_CURBE_FIELD_TO_FILE(NumofRowTile);
8178 //        WRITE_CURBE_FIELD_TO_FILE(NumofColumnTile);
8179 //
8180 //        WRITE_CURBE_FIELD_TO_FILE(TransquantBypassEnableFlag);
8181 //        WRITE_CURBE_FIELD_TO_FILE(PCMEnabledFlag);
8182 //        WRITE_CURBE_FIELD_TO_FILE(CuQpDeltaEnabledFlag);
8183 //        WRITE_CURBE_FIELD_TO_FILE(Stepping);
8184 //        WRITE_CURBE_FIELD_TO_FILE(WaveFrontSplitsEnable);
8185 //        WRITE_CURBE_FIELD_TO_FILE(HMEFlag);
8186 //        WRITE_CURBE_FIELD_TO_FILE(SuperHME);
8187 //        WRITE_CURBE_FIELD_TO_FILE(UltraHME);
8188 //        WRITE_CURBE_FIELD_TO_FILE(Cu64SkipCheckOnly);
8189 //        WRITE_CURBE_FIELD_TO_FILE(EnableCu64Check);
8190 //        WRITE_CURBE_FIELD_TO_FILE(Cu642Nx2NCheckOnly);
8191 //        WRITE_CURBE_FIELD_TO_FILE(EnableCu64AmpCheck);
8192 //        WRITE_CURBE_FIELD_TO_FILE(DisablePIntra);
8193 //        WRITE_CURBE_FIELD_TO_FILE(DisableIntraTURec);
8194 //        WRITE_CURBE_FIELD_TO_FILE(InheritIntraModeFromTU0);
8195 //        WRITE_CURBE_FIELD_TO_FILE(CostScalingForRA);
8196 //        WRITE_CURBE_FIELD_TO_FILE(DisableIntraNxN);
8197 //
8198 //        WRITE_CURBE_FIELD_TO_FILE(MaxRefIdxL0);
8199 //        WRITE_CURBE_FIELD_TO_FILE(MaxRefIdxL1);
8200 //        WRITE_CURBE_FIELD_TO_FILE(MaxBRefIdxL0);
8201 //
8202 //        WRITE_CURBE_FIELD_TO_FILE(SkipEarlyTermination);
8203 //        WRITE_CURBE_FIELD_TO_FILE(SkipEarlyTermSize);
8204 //        WRITE_CURBE_FIELD_TO_FILE(Dynamic64Enable);
8205 //        WRITE_CURBE_FIELD_TO_FILE(Dynamic64Order);
8206 //        WRITE_CURBE_FIELD_TO_FILE(Dynamic64Th);
8207 //        WRITE_CURBE_FIELD_TO_FILE(DynamicOrderTh);
8208 //        WRITE_CURBE_FIELD_TO_FILE(PerBFrameQPOffset);
8209 //        WRITE_CURBE_FIELD_TO_FILE(IncreaseExitThresh);
8210 //        WRITE_CURBE_FIELD_TO_FILE(Dynamic64Min32);
8211 //        WRITE_CURBE_FIELD_TO_FILE(LastFrameIsIntra);
8212 //
8213 //        WRITE_CURBE_FIELD_TO_FILE(LenSP);
8214 //        WRITE_CURBE_FIELD_TO_FILE(MaxNumSU);
8215 //
8216 //        WRITE_CURBE_FIELD_TO_FILE(CostTableIndex);
8217 //
8218 //        WRITE_CURBE_FIELD_TO_FILE(SliceType);
8219 //        WRITE_CURBE_FIELD_TO_FILE(TemporalMvpEnableFlag);
8220 //        WRITE_CURBE_FIELD_TO_FILE(CollocatedFromL0Flag);
8221 //        WRITE_CURBE_FIELD_TO_FILE(theSameRefList);
8222 //        WRITE_CURBE_FIELD_TO_FILE(IsLowDelay);
8223 //        WRITE_CURBE_FIELD_TO_FILE(MaxNumMergeCand);
8224 //        WRITE_CURBE_FIELD_TO_FILE(NumRefIdxL0);
8225 //        WRITE_CURBE_FIELD_TO_FILE(NumRefIdxL1);
8226 //
8227 //        WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_0);
8228 //        WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_0);
8229 //        WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_1);
8230 //        WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_1);
8231 //
8232 //        WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_2);
8233 //        WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_2);
8234 //        WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_3);
8235 //        WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_3);
8236 //
8237 //        WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_4);
8238 //        WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_4);
8239 //        WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_5);
8240 //        WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_5);
8241 //
8242 //        WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_6);
8243 //        WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_6);
8244 //        WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_7);
8245 //        WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_7);
8246 //
8247 //        WRITE_CURBE_FIELD_TO_FILE(LongTermReferenceFlags_L0);
8248 //        WRITE_CURBE_FIELD_TO_FILE(LongTermReferenceFlags_L1);
8249 //
8250 //        WRITE_CURBE_FIELD_TO_FILE(RefFrameWinWidth);
8251 //        WRITE_CURBE_FIELD_TO_FILE(RefFrameWinHeight);
8252 //
8253 //        WRITE_CURBE_FIELD_TO_FILE(RoundingInter);
8254 //        WRITE_CURBE_FIELD_TO_FILE(RoundingIntra);
8255 //        WRITE_CURBE_FIELD_TO_FILE(MaxThreadWidth);
8256 //        WRITE_CURBE_FIELD_TO_FILE(MaxThreadHeight);
8257 //
8258 //        const char *fileName = pDebugInterface->CreateFileName(
8259 //            "_HEVCMBEnc",
8260 //            CodechalDbgBufferType::bufCurbe,
8261 //            CodechalDbgExtType::txt);
8262 //
8263 //        std::ofstream ofs(fileName, std::ios::out);
8264 //        ofs << oss.str();
8265 //        ofs.close();
8266 //
8267 //    if (m_osInterface && pEncComBuf1)
8268 //    {
8269 //        m_osInterface->pfnUnlockResource(
8270 //            m_osInterface,
8271 //            presDBuffer);
8272 //    }
8273 //
8274 //    return MOS_STATUS_SUCCESS;
8275 //}
8276 
8277 #endif
VerifyCommandBufferSize()8278 MOS_STATUS CodechalEncHevcStateG11::VerifyCommandBufferSize()
8279 {
8280     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8281 
8282     CODECHAL_ENCODE_FUNCTION_ENTER;
8283 
8284     if (UseRenderCommandBuffer() || m_numPipe == 1)
8285     {
8286         // legacy mode & resize CommandBuffer Size for every BRC pass
8287         if (!m_singleTaskPhaseSupported)
8288         {
8289             CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
8290         }
8291         return eStatus;
8292     }
8293 
8294     // virtual engine
8295     uint32_t requestedSize =
8296         m_pictureStatesSize +
8297         m_extraPictureStatesSize +
8298         (m_sliceStatesSize * m_numSlices);
8299 
8300     requestedSize += (requestedSize * m_numPassesInOnePipe + m_hucCommandsSize);
8301 
8302     // Running in the multiple VDBOX mode
8303     int currentPipe = GetCurrentPipe();
8304     if (currentPipe < 0 || currentPipe >= m_numPipe)
8305     {
8306         eStatus = MOS_STATUS_INVALID_PARAMETER;
8307         return eStatus;
8308     }
8309     int currentPass = GetCurrentPass();
8310     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
8311     {
8312         eStatus = MOS_STATUS_INVALID_PARAMETER;
8313         return eStatus;
8314     }
8315 
8316     if (IsFirstPipe() && m_osInterface->bUsesPatchList)
8317     {
8318         CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
8319     }
8320 
8321     PMOS_COMMAND_BUFFER cmdBuffer = m_singleTaskPhaseSupported ? &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass];
8322 
8323     if (Mos_ResourceIsNull(&cmdBuffer->OsResource) ||
8324         m_sizeOfVeBatchBuffer < requestedSize)
8325     {
8326         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
8327 
8328         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
8329         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
8330         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
8331         allocParamsForBufferLinear.Format = Format_Buffer;
8332         allocParamsForBufferLinear.dwBytes = requestedSize;
8333         allocParamsForBufferLinear.pBufName = "Batch buffer for each VDBOX";
8334 
8335         if (!Mos_ResourceIsNull(&cmdBuffer->OsResource))
8336         {
8337             if (cmdBuffer->pCmdBase)
8338             {
8339                 m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
8340             }
8341             m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
8342         }
8343 
8344         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
8345             m_osInterface,
8346             &allocParamsForBufferLinear,
8347             &cmdBuffer->OsResource));
8348 
8349         m_sizeOfVeBatchBuffer = requestedSize;
8350     }
8351 
8352     if (cmdBuffer->pCmdBase == nullptr)
8353     {
8354         MOS_LOCK_PARAMS lockParams;
8355         MOS_ZeroMemory(&lockParams, sizeof(lockParams));
8356         lockParams.WriteOnly = true;
8357         cmdBuffer->pCmdPtr = cmdBuffer->pCmdBase = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &cmdBuffer->OsResource, &lockParams);
8358         cmdBuffer->iRemaining                    = m_sizeOfVeBatchBuffer;
8359         cmdBuffer->iOffset = 0;
8360 
8361         if (cmdBuffer->pCmdBase == nullptr)
8362         {
8363             eStatus = MOS_STATUS_NULL_POINTER;
8364             return eStatus;
8365         }
8366     }
8367 
8368     return eStatus;
8369 }
8370 
GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)8371 MOS_STATUS CodechalEncHevcStateG11::GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)
8372 {
8373     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8374 
8375     CODECHAL_ENCODE_FUNCTION_ENTER;
8376 
8377     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
8378     CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
8379 
8380     if (UseRenderCommandBuffer() || m_numPipe == 1)
8381     {
8382         // legacy mode
8383         m_realCmdBuffer.pCmdBase = m_realCmdBuffer.pCmdPtr = nullptr;
8384         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, 0));
8385         return eStatus;
8386     }
8387 
8388     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &m_realCmdBuffer, 0));
8389 
8390     int currentPipe = GetCurrentPipe();
8391     if (currentPipe < 0 || currentPipe >= m_numPipe)
8392     {
8393         eStatus = MOS_STATUS_INVALID_PARAMETER;
8394         return eStatus;
8395     }
8396     int currentPass = GetCurrentPass();
8397     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
8398     {
8399         eStatus = MOS_STATUS_INVALID_PARAMETER;
8400         return eStatus;
8401     }
8402 
8403     *cmdBuffer = m_singleTaskPhaseSupported ? m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass];
8404 
8405     if (m_osInterface->osCpInterface->IsCpEnabled() && cmdBuffer->iOffset == 0)
8406     {
8407         // Insert CP Prolog
8408         CODECHAL_ENCODE_NORMALMESSAGE("Adding cp prolog for secure scalable encode");
8409         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, cmdBuffer));
8410     }
8411     return eStatus;
8412 }
8413 
ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)8414 MOS_STATUS CodechalEncHevcStateG11::ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)
8415 {
8416     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8417 
8418     CODECHAL_ENCODE_FUNCTION_ENTER;
8419 
8420     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
8421 
8422     if (UseRenderCommandBuffer() || m_numPipe == 1)
8423     {
8424         // legacy mode
8425         m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, 0);
8426         return eStatus;
8427     }
8428 
8429     int currentPipe = GetCurrentPipe();
8430     if (currentPipe < 0 || currentPipe >= m_numPipe)
8431     {
8432         eStatus = MOS_STATUS_INVALID_PARAMETER;
8433         return eStatus;
8434     }
8435     int currentPass = GetCurrentPass();
8436     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
8437     {
8438         eStatus = MOS_STATUS_INVALID_PARAMETER;
8439         return eStatus;
8440     }
8441     uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
8442     m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][passIndex] = *cmdBuffer;
8443     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
8444 
8445     return eStatus;
8446 }
8447 
SubmitCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer,bool bNullRendering)8448 MOS_STATUS CodechalEncHevcStateG11::SubmitCommandBuffer(
8449     PMOS_COMMAND_BUFFER cmdBuffer,
8450     bool                bNullRendering)
8451 {
8452     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8453 
8454     CODECHAL_ENCODE_FUNCTION_ENTER;
8455 
8456     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
8457 
8458     if (UseRenderCommandBuffer() || m_numPipe == 1)
8459     {
8460         // legacy mode
8461         if (!UseRenderCommandBuffer())  // Set VE Hints for video contexts only
8462         {
8463             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(cmdBuffer));
8464         }
8465         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, bNullRendering));
8466         return eStatus;
8467     }
8468 
8469     bool cmdBufferReadyForSubmit = IsLastPipe();
8470 
8471     // In STF, Hold the command buffer submission till last pass
8472     if (m_singleTaskPhaseSupported)
8473     {
8474         cmdBufferReadyForSubmit = cmdBufferReadyForSubmit && IsLastPass();
8475     }
8476 
8477     if(!cmdBufferReadyForSubmit)
8478     {
8479         return eStatus;
8480     }
8481 
8482     int currentPass = GetCurrentPass();
8483     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
8484     {
8485         eStatus = MOS_STATUS_INVALID_PARAMETER;
8486         return eStatus;
8487     }
8488     uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
8489 
8490     for (uint32_t i = 0; i < m_numPipe; i++)
8491     {
8492         PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex];
8493 
8494         if(cmdBuffer->pCmdBase)
8495         {
8496             m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
8497         }
8498 
8499         cmdBuffer->pCmdBase = 0;
8500         cmdBuffer->iOffset = cmdBuffer->iRemaining = 0;
8501     }
8502     m_sizeOfVeBatchBuffer = 0;
8503 
8504     if(eStatus == MOS_STATUS_SUCCESS)
8505     {
8506         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(&m_realCmdBuffer));
8507         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
8508     }
8509 
8510     return eStatus;
8511 }
SendPrologWithFrameTracking(PMOS_COMMAND_BUFFER cmdBuffer,bool frameTrackingRequested,MHW_MI_MMIOREGISTERS * mmioRegister)8512 MOS_STATUS CodechalEncHevcStateG11::SendPrologWithFrameTracking(
8513     PMOS_COMMAND_BUFFER         cmdBuffer,
8514     bool                        frameTrackingRequested,
8515     MHW_MI_MMIOREGISTERS       *mmioRegister)
8516 {
8517     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8518 
8519     CODECHAL_ENCODE_FUNCTION_ENTER;
8520 
8521     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
8522 
8523     if (UseRenderCommandBuffer())
8524     {
8525         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SendPrologWithFrameTracking(cmdBuffer, frameTrackingRequested, mmioRegister));
8526         return eStatus;
8527     }
8528 
8529     if (!IsLastPipe())
8530     {
8531         return eStatus;
8532     }
8533 
8534     PMOS_COMMAND_BUFFER commandBufferInUse;
8535     if (m_realCmdBuffer.pCmdBase)
8536     {
8537         commandBufferInUse = &m_realCmdBuffer;
8538     }
8539     else
8540     {
8541         if (cmdBuffer && cmdBuffer->pCmdBase)
8542         {
8543             commandBufferInUse = cmdBuffer;
8544         }
8545         else
8546         {
8547             eStatus = MOS_STATUS_INVALID_PARAMETER;
8548             return eStatus;
8549         }
8550     }
8551     // initialize command buffer attributes
8552     commandBufferInUse->Attributes.bTurboMode = m_hwInterface->m_turboMode;
8553     commandBufferInUse->Attributes.dwNumRequestedEUSlices = m_hwInterface->m_numRequestedEuSlices;
8554     commandBufferInUse->Attributes.dwNumRequestedSubSlices = m_hwInterface->m_numRequestedSubSlices;
8555     commandBufferInUse->Attributes.dwNumRequestedEUs = m_hwInterface->m_numRequestedEus;
8556     commandBufferInUse->Attributes.bValidPowerGatingRequest = true;
8557 
8558     if (frameTrackingRequested && m_frameTrackingEnabled)
8559     {
8560         commandBufferInUse->Attributes.bEnableMediaFrameTracking = true;
8561         commandBufferInUse->Attributes.resMediaFrameTrackingSurface =
8562             &m_encodeStatusBuf.resStatusBuffer;
8563         commandBufferInUse->Attributes.dwMediaFrameTrackingTag = m_storeData;
8564         // Set media frame tracking address offset(the offset from the encoder status buffer page)
8565         commandBufferInUse->Attributes.dwMediaFrameTrackingAddrOffset = 0;
8566     }
8567 
8568     MHW_GENERIC_PROLOG_PARAMS  genericPrologParams;
8569     MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
8570     genericPrologParams.pOsInterface = m_hwInterface->GetOsInterface();
8571     genericPrologParams.pvMiInterface = m_hwInterface->GetMiInterface();
8572     genericPrologParams.bMmcEnabled = m_mmcState ? m_mmcState->IsMmcEnabled() : false;
8573     genericPrologParams.dwStoreDataValue = m_storeData - 1;
8574 
8575     CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(commandBufferInUse, &genericPrologParams));
8576 
8577     return eStatus;
8578 }
8579 
SetSliceStructs()8580 MOS_STATUS CodechalEncHevcStateG11::SetSliceStructs()
8581 {
8582     CODECHAL_ENCODE_FUNCTION_ENTER;
8583     MOS_STATUS                          eStatus = MOS_STATUS_SUCCESS;
8584     eStatus = CodechalEncodeHevcBase::SetSliceStructs();
8585     m_numPassesInOnePipe                        = m_numPasses;
8586     m_numPasses                                 = (m_numPasses + 1) * m_numPipe - 1;
8587     return eStatus;
8588 }
8589 
AllocateTileStatistics()8590 MOS_STATUS CodechalEncHevcStateG11::AllocateTileStatistics()
8591 {
8592     CODECHAL_ENCODE_FUNCTION_ENTER;
8593 
8594     MOS_STATUS                  eStatus = MOS_STATUS_SUCCESS;
8595 
8596     if (!m_hevcPicParams->tiles_enabled_flag)
8597     {
8598         return eStatus;
8599     }
8600 
8601     auto num_tile_rows    = m_hevcPicParams->num_tile_rows_minus1 + 1;
8602     auto num_tile_columns = m_hevcPicParams->num_tile_columns_minus1 + 1;
8603     auto num_tiles = num_tile_rows*num_tile_columns;
8604 
8605     MOS_ZeroMemory(&m_hevcFrameStatsOffset, sizeof(HEVC_TILE_STATS_INFO));
8606     MOS_ZeroMemory(&m_hevcTileStatsOffset, sizeof(HEVC_TILE_STATS_INFO));
8607     MOS_ZeroMemory(&m_hevcStatsSize, sizeof(HEVC_TILE_STATS_INFO));
8608 
8609     MOS_LOCK_PARAMS lockFlagsWriteOnly;
8610     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
8611     lockFlagsWriteOnly.WriteOnly = true;
8612 
8613     // Set the maximum size based on frame level statistics.
8614     m_hevcStatsSize.uiTileSizeRecord     = CODECHAL_CACHELINE_SIZE;
8615     m_hevcStatsSize.uiHevcPakStatistics  = m_sizeOfHcpPakFrameStats;
8616     m_hevcStatsSize.uiVdencStatistics    = 0;
8617     m_hevcStatsSize.uiHevcSliceStreamout = CODECHAL_CACHELINE_SIZE;
8618 
8619     // Maintain the offsets to use for patching addresses in to the HuC Pak Integration kernel Aggregated Frame Statistics Output Buffer
8620     // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions
8621     m_hevcFrameStatsOffset.uiTileSizeRecord     = 0;  // Tile Size Record is not present in resHuCPakAggregatedFrameStatsBuffer
8622     m_hevcFrameStatsOffset.uiHevcPakStatistics  = 0;
8623     m_hevcFrameStatsOffset.uiVdencStatistics    = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcPakStatistics + m_hevcStatsSize.uiHevcPakStatistics, CODECHAL_PAGE_SIZE);
8624     m_hevcFrameStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiVdencStatistics + m_hevcStatsSize.uiVdencStatistics, CODECHAL_PAGE_SIZE);
8625 
8626     // Frame level statistics
8627     m_hwInterface->m_pakIntAggregatedFrameStatsSize = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcSliceStreamout + (m_hevcStatsSize.uiHevcSliceStreamout * CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6), CODECHAL_PAGE_SIZE);
8628 
8629     // HEVC Frame Statistics Buffer - Output from HuC PAK Integration kernel
8630     if (Mos_ResourceIsNull(&m_resHuCPakAggregatedFrameStatsBuffer.sResource))
8631     {
8632         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
8633         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
8634         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
8635         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
8636         allocParamsForBufferLinear.Format = Format_Buffer;
8637         allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntAggregatedFrameStatsSize;
8638         allocParamsForBufferLinear.pBufName = "HCP Aggregated Frame Statistics Streamout Buffer";
8639 
8640         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
8641             m_osInterface,
8642             &allocParamsForBufferLinear,
8643             &m_resHuCPakAggregatedFrameStatsBuffer.sResource));
8644         m_resHuCPakAggregatedFrameStatsBuffer.dwSize = m_hwInterface->m_pakIntAggregatedFrameStatsSize;
8645 
8646         uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
8647             m_osInterface,
8648             &m_resHuCPakAggregatedFrameStatsBuffer.sResource,
8649             &lockFlagsWriteOnly);
8650 
8651         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8652         MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
8653         m_osInterface->pfnUnlockResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource);
8654     }
8655 
8656     // Maintain the offsets to use for patching addresses in to the Tile Based Statistics Buffer
8657     // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions
8658     m_hevcTileStatsOffset.uiTileSizeRecord     = 0; // TileReord is in a separated resource
8659     m_hevcTileStatsOffset.uiHevcPakStatistics  = 0; // PakStaticstics is head of m_resTileBasedStatisticsBuffer
8660     m_hevcTileStatsOffset.uiVdencStatistics    = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcPakStatistics + (m_hevcStatsSize.uiHevcPakStatistics * num_tiles), CODECHAL_PAGE_SIZE);
8661     m_hevcTileStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiVdencStatistics + (m_hevcStatsSize.uiVdencStatistics * num_tiles), CODECHAL_PAGE_SIZE);
8662     // Combined statistics size for all tiles
8663     m_hwInterface->m_pakIntTileStatsSize = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcSliceStreamout + m_hevcStatsSize.uiHevcSliceStreamout * CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6, CODECHAL_PAGE_SIZE);
8664 
8665     // Tile size record size for all tiles
8666     m_hwInterface->m_tileRecordSize = m_hevcStatsSize.uiTileSizeRecord * num_tiles;
8667 
8668     if (Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource) || m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_pakIntTileStatsSize)
8669     {
8670         if (!Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource))
8671         {
8672             m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource);
8673         }
8674         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
8675         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
8676         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
8677         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
8678         allocParamsForBufferLinear.Format = Format_Buffer;
8679         allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntTileStatsSize;
8680         allocParamsForBufferLinear.pBufName = "HCP Tile Level Statistics Streamout Buffer";
8681 
8682         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
8683             m_osInterface,
8684             &allocParamsForBufferLinear,
8685             &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource));
8686         m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize = m_hwInterface->m_pakIntTileStatsSize;
8687 
8688         uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
8689             m_osInterface,
8690             &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
8691             &lockFlagsWriteOnly);
8692         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8693 
8694         MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
8695         m_osInterface->pfnUnlockResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource);
8696     }
8697 
8698     if (Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource) || m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_tileRecordSize)
8699     {
8700         if (!Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource))
8701         {
8702             m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource);
8703         }
8704         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
8705         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
8706         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
8707         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
8708         allocParamsForBufferLinear.Format = Format_Buffer;
8709         allocParamsForBufferLinear.dwBytes = m_hwInterface->m_tileRecordSize;
8710         allocParamsForBufferLinear.pBufName = "Tile Record Buffer";
8711 
8712         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
8713             m_osInterface,
8714             &allocParamsForBufferLinear,
8715             &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource));
8716         m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize = m_hwInterface->m_tileRecordSize;
8717 
8718         uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
8719             m_osInterface,
8720             &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource,
8721             &lockFlagsWriteOnly);
8722         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8723 
8724         MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
8725         m_osInterface->pfnUnlockResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource);
8726     }
8727 
8728     return eStatus;
8729 }
8730 
SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams)8731 void CodechalEncHevcStateG11::SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams)
8732 {
8733     CODECHAL_ENCODE_FUNCTION_ENTER;
8734 
8735     CodechalEncodeHevcBase::SetHcpPipeBufAddrParams(pipeBufAddrParams);
8736 
8737     PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex];
8738     if (!Mos_ResourceIsNull(&tileStatisticsBuffer->sResource) && (m_numPipe > 1))
8739     {
8740         pipeBufAddrParams.presLcuBaseAddressBuffer = &tileStatisticsBuffer->sResource;
8741         pipeBufAddrParams.dwLcuStreamOutOffset = m_hevcTileStatsOffset.uiHevcSliceStreamout;
8742         pipeBufAddrParams.presFrameStatStreamOutBuffer = &tileStatisticsBuffer->sResource;
8743         pipeBufAddrParams.dwFrameStatStreamOutOffset = m_hevcTileStatsOffset.uiHevcPakStatistics;
8744     }
8745 }
8746 
ReadSseStatistics(PMOS_COMMAND_BUFFER cmdBuffer)8747 MOS_STATUS CodechalEncHevcStateG11::ReadSseStatistics(PMOS_COMMAND_BUFFER cmdBuffer)
8748 {
8749     CODECHAL_ENCODE_FUNCTION_ENTER;
8750 
8751     MOS_STATUS                  eStatus = MOS_STATUS_SUCCESS;
8752 
8753     if (!m_sseEnabled)
8754     {
8755         return eStatus;
8756     }
8757 
8758     // encodeStatus is offset by 2 DWs in the resource
8759     uint32_t sseOffsetinBytes = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2 + m_encodeStatusBuf.dwSumSquareErrorOffset;
8760     for (auto i = 0; i < 6; i++)    // 64 bit SSE values for luma/ chroma channels need to be copied
8761     {
8762         MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
8763         MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
8764         miCpyMemMemParams.presSrc     = m_hevcPicParams->tiles_enabled_flag && (m_numPipe > 1) ? &m_resHuCPakAggregatedFrameStatsBuffer.sResource : &m_resFrameStatStreamOutBuffer;
8765         miCpyMemMemParams.dwSrcOffset = (HEVC_PAK_STATISTICS_SSE_OFFSET + i) * sizeof(uint32_t);    // SSE luma offset is located at DW32 in Frame statistics, followed by chroma
8766         miCpyMemMemParams.presDst = &m_encodeStatusBuf.resStatusBuffer;
8767         miCpyMemMemParams.dwDstOffset = sseOffsetinBytes + i * sizeof(uint32_t);
8768         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
8769     }
8770 
8771     return eStatus;
8772 }
8773 
SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS & indObjBaseAddrParams)8774 void CodechalEncHevcStateG11::SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS& indObjBaseAddrParams)
8775 {
8776     PCODECHAL_ENCODE_BUFFER tileRecordBuffer    = &m_tileRecordBuffer[m_virtualEngineBbIndex];
8777     bool useTileRecordBuffer = !Mos_ResourceIsNull(&tileRecordBuffer->sResource);
8778 
8779     MOS_ZeroMemory(&indObjBaseAddrParams, sizeof(indObjBaseAddrParams));
8780     indObjBaseAddrParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
8781     indObjBaseAddrParams.presMvObjectBuffer = &m_resMbCodeSurface;
8782     indObjBaseAddrParams.dwMvObjectOffset = m_mvOffset;
8783     indObjBaseAddrParams.dwMvObjectSize = m_mbCodeSize - m_mvOffset;
8784     indObjBaseAddrParams.presPakBaseObjectBuffer = &m_resBitstreamBuffer;
8785     indObjBaseAddrParams.dwPakBaseObjectSize = m_bitstreamUpperBound;
8786     indObjBaseAddrParams.presPakTileSizeStasBuffer = useTileRecordBuffer ? &tileRecordBuffer->sResource : nullptr;
8787     indObjBaseAddrParams.dwPakTileSizeStasBufferSize = useTileRecordBuffer ? m_hwInterface->m_tileRecordSize : 0;
8788     indObjBaseAddrParams.dwPakTileSizeRecordOffset   = useTileRecordBuffer ? m_hevcTileStatsOffset.uiTileSizeRecord : 0;
8789 }
8790 
UpdateCmdBufAttribute(PMOS_COMMAND_BUFFER cmdBuffer,bool renderEngineInUse)8791 MOS_STATUS CodechalEncHevcStateG11::UpdateCmdBufAttribute(
8792     PMOS_COMMAND_BUFFER cmdBuffer,
8793     bool                renderEngineInUse)
8794 {
8795     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8796 
8797     // should not be there. Will remove it in the next change
8798     CODECHAL_ENCODE_FUNCTION_ENTER;
8799     if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe)
8800     {
8801         PMOS_CMD_BUF_ATTRI_VE attriExt =
8802             (PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe);
8803 
8804         memset((void *)attriExt, 0, sizeof(MOS_CMD_BUF_ATTRI_VE));
8805         attriExt->bUseVirtualEngineHint =
8806             attriExt->VEngineHintParams.NeedSyncWithPrevious = !renderEngineInUse;
8807     }
8808 
8809     return eStatus;
8810 }
8811 
SetAndPopulateVEHintParams(PMOS_COMMAND_BUFFER cmdBuffer)8812 MOS_STATUS CodechalEncHevcStateG11::SetAndPopulateVEHintParams(
8813     PMOS_COMMAND_BUFFER  cmdBuffer)
8814 {
8815     MOS_STATUS                      eStatus = MOS_STATUS_SUCCESS;
8816 
8817     CODECHAL_ENCODE_FUNCTION_ENTER;
8818 
8819     if (!MOS_VE_SUPPORTED(m_osInterface))
8820     {
8821         return eStatus;
8822     }
8823 
8824     CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS scalSetParms;
8825     MOS_ZeroMemory(&scalSetParms, sizeof(CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS));
8826 
8827     if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
8828     {
8829         scalSetParms.bNeedSyncWithPrevious       = true;
8830     }
8831 
8832     int32_t currentPass = GetCurrentPass();
8833     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
8834     {
8835         eStatus = MOS_STATUS_INVALID_PARAMETER;
8836         return eStatus;
8837     }
8838     uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
8839     if (m_numPipe >= 2)
8840     {
8841         for (auto i = 0; i < m_numPipe; i++)
8842         {
8843             scalSetParms.veBatchBuffer[i] = m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex].OsResource;
8844         }
8845     }
8846 
8847     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_SetHintParams(this, m_scalabilityState, &scalSetParms));
8848     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
8849     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_PopulateHintParams(m_scalabilityState, cmdBuffer));
8850 
8851     return eStatus;
8852 }
8853 
8854 #if USE_CODECHAL_DEBUG_TOOL
DumpFrameStatsBuffer(CodechalDebugInterface * debugInterface)8855 MOS_STATUS CodechalEncHevcStateG11::DumpFrameStatsBuffer(CodechalDebugInterface* debugInterface)
8856 {
8857     CODECHAL_ENCODE_CHK_NULL_RETURN(debugInterface);
8858 
8859     PMOS_RESOURCE resBuffer = &m_resFrameStatStreamOutBuffer;
8860     uint32_t offset = 0;
8861     uint32_t num_tiles = 1;
8862     //In scalable mode, HEVC PAK Frame Statistics gets dumped out for each tile
8863     if ( m_numPipe > 1)
8864     {
8865         resBuffer  = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;
8866         offset     = m_hevcTileStatsOffset.uiHevcPakStatistics;
8867         num_tiles = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1);
8868     }
8869     uint32_t size = MOS_ALIGN_CEIL(m_sizeOfHcpPakFrameStats * num_tiles, CODECHAL_CACHELINE_SIZE);
8870 
8871     CODECHAL_ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
8872         resBuffer,
8873         CodechalDbgAttr::attrFrameState,
8874         "FrameStatus",
8875         size,
8876         offset,
8877         CODECHAL_NUM_MEDIA_STATES));
8878 
8879     return MOS_STATUS_SUCCESS;
8880 }
8881 
DumpPakOutput()8882 MOS_STATUS CodechalEncHevcStateG11::DumpPakOutput()
8883 {
8884     std::string currPassName = "PAK_PASS" + std::to_string((int)m_currPass);
8885 
8886     CODECHAL_DEBUG_TOOL(
8887         int32_t currentPass = GetCurrentPass();
8888         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
8889             &m_resPakcuLevelStreamoutData.sResource,
8890             CodechalDbgAttr::attrCUStreamout,
8891             currPassName.data(),
8892             m_resPakcuLevelStreamoutData.dwSize,
8893             0,
8894             CODECHAL_NUM_MEDIA_STATES));
8895         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
8896             &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
8897             CodechalDbgAttr::attrTileBasedStats,
8898             currPassName.data(),
8899             m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize,
8900             0,
8901             CODECHAL_NUM_MEDIA_STATES));
8902         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
8903             &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite],
8904             CodechalDbgAttr::attrBrcPakStats,
8905             currPassName.data(),
8906             m_hevcBrcPakStatisticsSize,
8907             0,
8908             CODECHAL_NUM_MEDIA_STATES));
8909         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
8910             &m_HucStitchCmdBatchBuffer.OsResource,
8911             CodechalDbgAttr::attr2ndLvlBatchMfx,
8912             currPassName.data(),
8913             m_hwInterface->m_HucStitchCmdBatchBufferSize,
8914             0,
8915             CODECHAL_NUM_MEDIA_STATES));
8916         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
8917             &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass],
8918             CodechalDbgAttr::attrHuCStitchDataBuf,
8919             currPassName.data(),
8920             sizeof(HucCommandData),
8921             0,
8922             CODECHAL_NUM_MEDIA_STATES));
8923         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
8924             &m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass],
8925             sizeof(HucPakStitchDmemEncG11),
8926             currentPass,
8927             hucRegionDumpPakIntegrate));
8928     )
8929 
8930     return MOS_STATUS_SUCCESS;
8931 }
8932 #endif
8933 
EncodeMeKernel()8934 MOS_STATUS CodechalEncHevcStateG11::EncodeMeKernel()
8935 {
8936     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8937 
8938     CODECHAL_ENCODE_FUNCTION_ENTER;
8939 
8940     // Walker must be used for HME call and scaling one
8941     CODECHAL_ENCODE_ASSERT(m_hwWalker);
8942 
8943     if (m_hmeKernel && m_hmeKernel->Is4xMeEnabled())
8944     {
8945         CodechalKernelHme::CurbeParam curbeParam;
8946         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMeCurbeParams(curbeParam));
8947 
8948         CodechalKernelHme::SurfaceParams surfaceParam;
8949         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMeSurfaceParams(surfaceParam));
8950 
8951         m_hmeKernel->setnoMEKernelForPFrame(m_lowDelay);
8952 
8953         if (m_hmeKernel->Is16xMeEnabled())
8954         {
8955             if (m_hmeKernel->Is32xMeEnabled())
8956             {
8957                 surfaceParam.downScaledWidthInMb = m_downscaledWidthInMb32x;
8958                 surfaceParam.downScaledHeightInMb = m_downscaledFrameFieldHeightInMb32x;
8959                 surfaceParam.downScaledBottomFieldOffset = m_scaled32xBottomFieldOffset;
8960                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel32x));
8961             }
8962             surfaceParam.downScaledWidthInMb = m_downscaledWidthInMb16x;
8963             surfaceParam.downScaledHeightInMb = m_downscaledFrameFieldHeightInMb16x;
8964             surfaceParam.downScaledBottomFieldOffset = m_scaled16xBottomFieldOffset;
8965             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel16x));
8966         }
8967         surfaceParam.downScaledWidthInMb = m_downscaledWidthInMb4x;
8968         surfaceParam.downScaledHeightInMb = m_downscaledFrameFieldHeightInMb4x;
8969         surfaceParam.downScaledBottomFieldOffset = m_scaledBottomFieldOffset;
8970         curbeParam.brcEnable = m_brcEnabled;
8971         curbeParam.sumMVThreshold = m_sumMVThreshold;
8972         surfaceParam.meSumMvandDistortionBuffer = m_mvAndDistortionSumSurface;
8973         m_lastTaskInPhase = true;
8974 
8975         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel4x));
8976     }
8977 
8978     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::DumpHMESurfaces());
8979 
8980     return eStatus;
8981 }
8982