1 /*
2 * Copyright (c) 2017-2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     codechal_encode_hevc_mbenc_g12.cpp
24 //! \brief    HEVC dual-pipe encoder mbenc kernels for GEN12.
25 //!
26 
27 #include "codechal_encode_hevc_mbenc_g12.h"
28 #include "codechal_encode_hevc_brc_g12.h"
29 #include "mhw_vdbox_hcp_g12_X.h"
30 #include "codechal_kernel_hme_mdf_g12.h"
31 #include "codechal_kernel_header_g12.h"
32 #include "Gen12_HEVC_B_LCU32.h"
33 #include "Gen12_HEVC_B_LCU64.h"
34 #include "cm_wrapper.h"
35 
36 #include "Gen12_HEVC_BRC_INIT.h"
37 #include "Gen12_HEVC_BRC_RESET.h"
38 #include "Gen12_HEVC_BRC_UPDATE.h"
39 #include "Gen12_HEVC_BRC_LCUQP.h"
40 #include "Gen12LP_CoarseIntra_genx.h"
41 #include "Gen12LP_WeightedPrediction_genx.h"
42 
43 #if USE_PROPRIETARY_CODE
44 #include "cm_device_rt.h"
45 #endif
46 
47 #if MOS_MEDIASOLO_SUPPORTED
48 #include "mos_os_solo.h"
49 #endif // (_DEBUG || _RELEASE_INTERNAL)
50 
CodecHalHevcMbencG12(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)51 CodecHalHevcMbencG12::CodecHalHevcMbencG12(CodechalHwInterface* hwInterface,
52     CodechalDebugInterface* debugInterface,
53     PCODECHAL_STANDARD_INFO standardInfo)
54     : CodechalEncHevcStateG12(hwInterface, debugInterface, standardInfo)
55 {
56     m_useMdf = true;
57     for (int32_t idx = 0; idx < MAX_VME_FWD_REF + MAX_VME_BWD_REF; idx++)
58     {
59         m_surfRefArray[idx] = nullptr;
60         m_surf2XArray[idx]  = nullptr;
61     }
62 
63 }
64 
~CodecHalHevcMbencG12()65 CodecHalHevcMbencG12::~CodecHalHevcMbencG12() {
66     CODECHAL_ENCODE_FUNCTION_ENTER;
67 
68     if (m_wpState)
69     {
70         MOS_Delete(m_wpState);
71         m_wpState = nullptr;
72     }
73 
74     if (m_intraDistKernel)
75     {
76         MOS_Delete(m_intraDistKernel);
77         m_intraDistKernel = nullptr;
78     }
79 
80     if (m_hmeKernel)
81     {
82         MOS_Delete(m_hmeKernel);
83         m_hmeKernel = nullptr;
84     }
85 
86     if (m_swScoreboardState)
87     {
88         MOS_Delete(m_swScoreboardState);
89         m_swScoreboardState = nullptr;
90     }
91 
92     DestroyMDFResources();
93 }
94 
AllocateEncResources()95 MOS_STATUS CodecHalHevcMbencG12::AllocateEncResources()
96 {
97     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
98 
99     CODECHAL_ENCODE_FUNCTION_ENTER;
100 
101     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcStateG12::AllocateEncResources());
102 
103     CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateMeResources());
104 
105     if (m_hmeSupported)
106     {
107         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->AllocateResources());
108     }
109     // Intermediate CU Record Surface
110     if (!m_intermediateCuRecordLcu32)
111     {
112         //MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE
113         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateSurface2D(
114             m_widthAlignedLcu32,
115             m_heightAlignedLcu32 >> 1,
116             Format_A8,
117             m_intermediateCuRecordLcu32));
118     }
119 
120     // Scratch Surface
121     if (!m_scratchSurf)
122     {
123         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateSurface2D(
124             m_widthAlignedLcu32 >> 3,
125             m_heightAlignedLcu32 >> 5,
126             Format_A8,
127             m_scratchSurf));
128     }
129 
130     // Enc constant table for B
131     if (!m_constTableB)
132     {
133         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateBuffer(
134             m_encConstantDataLutSize,
135             m_constTableB));
136     }
137 
138     // Load Balance surface size
139     if (!m_loadBalance)
140     {
141         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateBuffer(
142             m_threadMapSize,
143             m_loadBalance));
144     }
145 
146     //Debug surface
147     if (!m_dbgSurface)
148     {
149         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateBuffer(
150             m_debugSurfaceSize,
151             m_dbgSurface));
152     }
153 
154     return MOS_STATUS_SUCCESS;
155 }
156 
157 
AllocateMeResources()158 MOS_STATUS CodecHalHevcMbencG12::AllocateMeResources()
159 {
160     CODECHAL_ENCODE_FUNCTION_ENTER;
161     if (m_hmeSupported)
162     {
163         // BRC Distortion Surface
164         if (!m_brcBuffers.meBrcDistortionSurface)
165         {
166             uint32_t width = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x << 3), 64);
167             uint32_t height = MOS_ALIGN_CEIL((m_downscaledHeightInMb4x << 2), 8) << 1;
168 
169             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateSurface2D(
170                 width,
171                 height,
172                 Format_A8,
173                 m_brcBuffers.meBrcDistortionSurface));
174         }
175 
176         // MV and Distortion Summation Surface
177         if (!m_brcBuffers.mvAndDistortionSumSurface)
178         {
179             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateBuffer(
180                 m_mvdistSummationSurfSize,
181                 m_brcBuffers.mvAndDistortionSumSurface));
182             CmEvent *event = nullptr;
183             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_brcBuffers.mvAndDistortionSumSurface->InitSurface(0, event));
184         }
185     }
186     return MOS_STATUS_SUCCESS;
187 }
188 
AllocateBrcResources()189 MOS_STATUS CodecHalHevcMbencG12::AllocateBrcResources()
190 {
191     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::AllocateBrcResources());
192 
193     // BRC Intra Distortion Surface
194     uint32_t width = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x << 3), 64);
195     uint32_t height = MOS_ALIGN_CEIL((m_downscaledHeightInMb4x << 2), 8) << 1;
196 
197     if (!m_brcBuffers.brcIntraDistortionSurface)
198     {
199         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateSurface2D(
200             width,
201             height,
202             Format_A8,
203             m_brcBuffers.brcIntraDistortionSurface));
204     }
205 
206     return MOS_STATUS_SUCCESS;
207 }
208 
FreeBrcResources()209 MOS_STATUS CodecHalHevcMbencG12::FreeBrcResources()
210 {
211     CODECHAL_ENCODE_FUNCTION_ENTER;
212 
213     CodechalEncHevcState::FreeBrcResources();
214 
215     if (m_brcBuffers.brcIntraDistortionSurface)
216     {
217         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_brcBuffers.brcIntraDistortionSurface))
218     }
219 
220     return MOS_STATUS_SUCCESS;
221 }
222 
FreeMeResources()223 MOS_STATUS CodecHalHevcMbencG12::FreeMeResources()
224 {
225     CODECHAL_ENCODE_FUNCTION_ENTER;
226 
227     if (m_brcBuffers.meBrcDistortionSurface)
228     {
229         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_brcBuffers.meBrcDistortionSurface))
230     }
231 
232     if (m_brcBuffers.mvAndDistortionSumSurface)
233     {
234         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_brcBuffers.mvAndDistortionSumSurface));
235     }
236 
237     return MOS_STATUS_SUCCESS;
238 }
239 
FreeEncResources()240 MOS_STATUS CodecHalHevcMbencG12::FreeEncResources()
241 {
242     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
243 
244     CODECHAL_ENCODE_FUNCTION_ENTER;
245 
246     CODECHAL_ENCODE_CHK_STATUS_RETURN(FreeMeResources());
247 
248     if (m_intermediateCuRecordLcu32)
249     {
250         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_intermediateCuRecordLcu32));
251         m_intermediateCuRecordLcu32 = nullptr;
252     }
253     if (m_scratchSurf)
254     {
255         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_scratchSurf));
256         m_scratchSurf = nullptr;
257     }
258     if (m_cu16X16QpIn)
259     {
260         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_cu16X16QpIn));
261         m_cu16X16QpIn = nullptr;
262     }
263     if (m_constTableB)
264     {
265         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_constTableB));
266         m_constTableB = nullptr;
267     }
268     if (m_cuSplitSurf)
269     {
270         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_cuSplitSurf));
271         m_cuSplitSurf = nullptr;
272     }
273     if (m_loadBalance)
274     {
275         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_loadBalance));
276         m_loadBalance = nullptr;
277     }
278     if (m_dbgSurface)
279     {
280         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_dbgSurface));
281         m_dbgSurface = nullptr;
282     }
283 
284     if (m_lcuLevelData)
285     {
286         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_lcuLevelData));
287         m_lcuLevelData = nullptr;
288     }
289     if (m_reconWithBoundaryPix)
290     {
291         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_reconWithBoundaryPix));
292         m_reconWithBoundaryPix = nullptr;
293     }
294 
295     //container surfaces
296     if (m_curSurf)
297     {
298         m_curSurf->NotifyUmdResourceChanged(nullptr);
299         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_curSurf));
300         m_curSurf = nullptr;
301     }
302     if (m_mbCodeBuffer)
303     {
304         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_mbCodeBuffer));
305         m_mbCodeBuffer = nullptr;
306     }
307     if (m_swScoreboardSurf)
308     {
309         m_swScoreboardSurf->NotifyUmdResourceChanged(nullptr);
310         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_swScoreboardSurf));
311         m_swScoreboardSurf = nullptr;
312     }
313     if (m_curSurf2X)
314     {
315         m_curSurf2X->NotifyUmdResourceChanged(nullptr);
316         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_curSurf2X));
317         m_curSurf2X = nullptr;
318     }
319     if (m_histInBuffer)
320     {
321         m_histInBuffer->NotifyUmdResourceChanged(nullptr);
322         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_histInBuffer));
323         m_histInBuffer = nullptr;
324     }
325     if (m_histOutBuffer)
326     {
327         m_histOutBuffer->NotifyUmdResourceChanged(nullptr);
328         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_histOutBuffer));
329         m_histOutBuffer = nullptr;
330     }
331     for (int32_t idx = 0; idx < MAX_VME_FWD_REF + MAX_VME_BWD_REF; idx++)
332     {
333         if (m_surfRefArray[idx])
334         {
335             m_surfRefArray[idx]->NotifyUmdResourceChanged(nullptr);
336             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_surfRefArray[idx]));
337             m_surfRefArray[idx] = nullptr;
338         }
339         if (m_surf2XArray[idx])
340         {
341             m_surf2XArray[idx]->NotifyUmdResourceChanged(nullptr);
342             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_surf2XArray[idx]));
343             m_surf2XArray[idx] = nullptr;
344         }
345     }
346 
347     //Free MDF objects
348     if (m_cmKrnB)
349     {
350         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroyKernel(m_cmKrnB));
351         m_cmKrnB = nullptr;
352     }
353     if (m_cmKrnB64)
354     {
355         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroyKernel(m_cmKrnB64));
356         m_cmKrnB64 = nullptr;
357     }
358     if (m_cmProgramB)
359     {
360         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroyProgram(m_cmProgramB));
361         m_cmProgramB = nullptr;
362     }
363     if (m_cmProgramB64)
364     {
365         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroyProgram(m_cmProgramB64));
366         m_cmProgramB64 = nullptr;
367     }
368     if (m_hevcBrcG12)
369     {
370         MOS_Delete(m_hevcBrcG12);
371     }
372 
373     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcStateG12::FreeEncResources());
374 
375     if (m_threadSpace)
376     {
377         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroyThreadSpace(m_threadSpace));
378         m_threadSpace = nullptr;
379     }
380 
381     return eStatus;
382 }
383 
AllocateMDFResources()384 MOS_STATUS CodecHalHevcMbencG12::AllocateMDFResources()
385 {
386     uint32_t devOp = CM_DEVICE_CREATE_OPTION_SCRATCH_SPACE_DISABLE | CM_DEVICE_CONFIG_FAST_PATH_ENABLE;
387 
388     if (!m_mfeEnabled)
389     {
390         //create CM device
391         if (!m_cmDev)
392         {
393             CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface);
394             m_osInterface->pfnNotifyStreamIndexSharing(m_osInterface);
395             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnCreateCmDevice(
396                 m_osInterface->pOsContext,
397                 m_cmDev,
398                 devOp,
399                 CM_DEVICE_CREATE_PRIORITY_DEFAULT));
400         }
401 
402         if (!m_surfIndexArray)
403         {
404             m_surfIndexArray = (MBencSurfaceIndex *) new (std::nothrow) (SurfaceIndex [m_maxMfeSurfaces][m_maxMultiFrames]);
405             CODECHAL_ENCODE_CHK_NULL_RETURN(m_surfIndexArray);
406         }
407     }
408     else
409     {
410         //create CM device
411         if (!m_cmDev)
412         {
413             if (!m_mfeEncodeSharedState->pCmDev)
414             {
415                 CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface);
416                 m_osInterface->pfnNotifyStreamIndexSharing(m_osInterface);
417                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnCreateCmDevice(
418                     m_osInterface->pOsContext,
419                     m_cmDev,
420                     devOp,
421                     CM_DEVICE_CREATE_PRIORITY_DEFAULT));
422 
423                 m_mfeEncodeSharedState->pCmDev = m_cmDev;
424             }
425             else
426             {
427                 m_cmDev = m_mfeEncodeSharedState->pCmDev;
428             }
429         }
430 
431         if (!m_mfeEncodeSharedState->commonSurface)
432         {
433             m_surfIndexArray = (MBencSurfaceIndex *) new (std::nothrow) ( SurfaceIndex [m_maxMfeSurfaces][m_maxMultiFrames]);
434             CODECHAL_ENCODE_CHK_NULL_RETURN(m_surfIndexArray);
435             m_mfeEncodeSharedState->commonSurface = reinterpret_cast<SurfaceIndex *>(m_surfIndexArray);
436         }
437         else
438         {
439             m_surfIndexArray = reinterpret_cast<MBencSurfaceIndex *>(m_mfeEncodeSharedState->commonSurface);
440         }
441 
442         if (!m_mfeEncodeSharedState->maxThreadWidthFrames)
443         {
444             m_mfeEncodeSharedState->maxThreadWidthFrames  = MOS_NewArray(uint32_t, m_maxMultiFrames);
445             CODECHAL_ENCODE_CHK_NULL_RETURN(m_mfeEncodeSharedState->maxThreadWidthFrames);
446         }
447     }
448 
449     //create CM Queue
450     if (!m_cmQueue)
451     {
452         CM_QUEUE_CREATE_OPTION queueCreateOption = CM_DEFAULT_QUEUE_CREATE_OPTION;
453         if (m_computeContextEnabled)
454         {
455             queueCreateOption.QueueType = CM_QUEUE_TYPE_COMPUTE;
456         }
457         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateQueueEx(m_cmQueue, queueCreateOption));
458     }
459 
460     //create CM task
461     if (!m_cmTask)
462     {
463         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateTask(m_cmTask));
464     }
465 
466     return MOS_STATUS_SUCCESS;
467 }
468 
DestroyMDFResources()469 MOS_STATUS CodecHalHevcMbencG12::DestroyMDFResources()
470 {
471     if (m_cmDev && m_cmTask)
472     {
473         m_cmDev->DestroyTask(m_cmTask);
474         m_cmTask = nullptr;
475     }
476 
477     if (!m_mfeEnabled)
478     {
479         delete[] m_surfIndexArray;
480         m_surfIndexArray = nullptr;
481         if (m_osInterface != nullptr)
482         {
483             m_osInterface->pfnDestroyCmDevice(m_cmDev);
484             m_cmDev = nullptr;
485         }
486     }
487     else
488     {
489         if (m_mfeLastStream)
490         {
491             MOS_DeleteArray(m_mfeEncodeSharedState->maxThreadWidthFrames);
492             m_mfeEncodeSharedState->maxThreadWidthFrames = nullptr;
493 
494             delete[] m_surfIndexArray;
495             m_surfIndexArray = nullptr;
496             m_mfeEncodeSharedState->commonSurface = nullptr;
497             if (m_osInterface != nullptr)
498             {
499                 m_osInterface->pfnDestroyCmDevice(m_cmDev);
500                 m_mfeEncodeSharedState->pCmDev = m_cmDev = nullptr;
501             }
502         }
503         else
504         {
505             m_surfIndexArray = nullptr;
506             m_cmDev = nullptr;
507         }
508     }
509 
510     return MOS_STATUS_SUCCESS;
511 }
512 
InitKernelState()513 MOS_STATUS CodecHalHevcMbencG12::InitKernelState()
514 {
515     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
516 
517     CODECHAL_ENCODE_FUNCTION_ENTER;
518 
519     m_colorBitMfeEnabled = m_mfeEnabled ? true : false;
520 
521     // Create weighted prediction kernel state
522     m_wpState = MOS_New(CodechalEncodeWPMdfG12, this);
523     CODECHAL_ENCODE_CHK_NULL_RETURN(m_wpState);
524     CODECHAL_ENCODE_CHK_STATUS_RETURN(((CodechalEncodeWPMdfG12 *)m_wpState)->InitKernelStateIsa((void *)GEN12LP_WEIGHTEDPREDICTION_GENX, GEN12LP_WEIGHTEDPREDICTION_GENX_SIZE));
525 
526     // create intra distortion kernel
527     m_intraDistKernel = MOS_New(CodechalKernelIntraDistMdfG12, this);
528     CODECHAL_ENCODE_CHK_NULL_RETURN(m_intraDistKernel);
529     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_intraDistKernel->InitializeKernelIsa(
530         (void*)GEN12LP_COARSEINTRA_GENX,
531         GEN12LP_COARSEINTRA_GENX_SIZE));
532 
533     // Create SW scoreboard init kernel state
534     CODECHAL_ENCODE_CHK_NULL_RETURN(m_swScoreboardState = MOS_New(CodechalEncodeSwScoreboardMdfG12, this));
535     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_swScoreboardState->InitKernelState());
536     // Create Hme kernel
537     m_hmeKernel = MOS_New(CodechalKernelHmeMdfG12, this);
538     CODECHAL_ENCODE_CHK_NULL_RETURN(m_hmeKernel)
539 
540     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->LoadProgram((void *)GEN12_HEVC_B_LCU32,
541         GEN12_HEVC_B_LCU32_SIZE,
542         m_cmProgramB,
543         "-nojitter"));
544 
545     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateKernel(m_cmProgramB,
546         "Gen12_HEVC_Enc_B",
547         m_cmKrnB));
548 
549     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->LoadProgram((void *)GEN12_HEVC_B_LCU64,
550         GEN12_HEVC_B_LCU64_SIZE,
551         m_cmProgramB64,
552         "-nojitter"));
553 
554     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateKernel(m_cmProgramB64,
555         "Gen12_HEVC_Enc_LCU64_B",
556         m_cmKrnB64));
557 
558     CODECHAL_ENCODE_CHK_NULL_RETURN(m_hevcBrcG12 = MOS_New(CodecHalHevcBrcG12, this));
559     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hevcBrcG12->InitBrcKernelState());
560     return eStatus;
561 }
562 
GetMaxBtCount()563 uint32_t CodecHalHevcMbencG12::GetMaxBtCount()
564 {
565     uint16_t btIdxAlignment = m_hwInterface->GetRenderInterface()->m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
566     uint32_t btCountPhase2 = btIdxAlignment;
567     return btCountPhase2;
568 }
569 
SetupKernelArgsB()570 MOS_STATUS CodecHalHevcMbencG12::SetupKernelArgsB()
571 {
572     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
573 
574     //Setup surfaces
575     //Setup first combined 1D surface
576     int idx = 0;
577     SurfaceIndex *surfIndex = nullptr;
578     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_combinedBuffer1->GetIndex(surfIndex));
579 
580     CODECHAL_ENCODE_CHK_NULL_RETURN(surfIndex);
581     (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
582 
583     //Setup second combined 1D surface
584     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_combinedBuffer2->GetIndex(surfIndex));
585     (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
586 
587     //VME Surface
588     (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *m_curVme;
589 
590     //Curr Pic
591     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_curSurf->GetIndex(surfIndex));
592     (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
593 
594     //Recon surface with populated boundary pixels.
595     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_reconWithBoundaryPix->GetIndex(surfIndex));
596     (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
597 
598     //Intermediate CU Record Surface for I and B kernel
599     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_intermediateCuRecordLcu32->GetIndex(surfIndex));
600     (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
601 
602     // PAK object command surface
603     (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *m_mbCodeSurfIdx;
604 
605     // CU packet for PAK surface
606     (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *m_mvDataSurfIdx;
607 
608     //Software Scoreboard surface
609     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_swScoreboardSurf->GetIndex(surfIndex));
610     (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
611 
612     // CU 16x16 QP data input surface
613     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cu16X16QpIn->GetIndex(surfIndex));
614     (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
615 
616     // Lcu level data input
617     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_lcuLevelData->GetIndex(surfIndex));
618     (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
619 
620     //ColocatedCUMVDataSurface
621     if (m_colocCumvData)
622     {
623         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_colocCumvData->GetIndex(surfIndex));
624     }
625     (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
626 
627     //HMEMotionPredDataSurface
628     if (m_hmeMotionPredData)
629     {
630         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeMotionPredData->GetIndex(surfIndex));
631     }
632     (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
633 
634     if (m_isMaxLcu64)
635     {
636         if (m_curSurf2X)
637         {
638             (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *m_cur2XVme;
639 
640         }
641         else
642         {
643             (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
644         }
645 
646     }
647 
648 
649     // Kernel debug surface
650     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_dbgSurface->GetIndex(surfIndex));
651     (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
652 
653     //Init all the surfaces with dummy value
654     if ((!m_mfeEnabled) || (m_mfeFirstStream))
655     {
656         for (int i = 0; i < m_maxMfeSurfaces; i++)
657         {
658             for (int j = 1; j < m_maxMultiFrames; j++)
659             {
660                 (*m_surfIndexArray)[i][j] = (*m_surfIndexArray)[i][0];
661             }
662         }
663     }
664 
665     if ((m_mfeLastStream) || (!m_mfeEnabled))
666     {
667         CmKernel *cmKrn = nullptr;
668         if (m_isMaxLcu64)
669         {
670             cmKrn = m_cmKrnB64;
671         }
672         else
673         {
674             cmKrn = m_cmKrnB;
675         }
676 
677         //Setup surfaces
678         //Setup first combined 1D surface
679         int idx = 0;
680         int commonIdx = 0;
681         CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
682 
683         //Setup second combined 1D surface
684         CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
685 
686         //VME Surface
687         CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
688 
689         //Curr Pic
690         CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
691 
692         //Recon surface with populated boundary pixels.
693         CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
694 
695         //Intermediate CU Record Surface for I and B kernel
696         CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
697 
698         // PAK object command surface
699         CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
700 
701         // CU packet for PAK surface
702         CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
703 
704         //Software Scoreboard surface
705         CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
706 
707         // CU 16x16 QP data input surface
708         CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
709 
710         // Lcu level data input
711         CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
712 
713         //ColocatedCUMVDataSurface
714         CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
715 
716         //HMEMotionPredDataSurface
717         CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
718 
719         if (m_isMaxLcu64)
720         {
721             CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
722         }
723 
724         //Enc const table
725         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_constTableB->GetIndex(surfIndex));
726         CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex), surfIndex));
727 
728         //load Balance surface
729         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_loadBalance->GetIndex(surfIndex));
730         CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex), surfIndex));
731 
732         //reserved entries
733         if (!m_isMaxLcu64)
734         {
735             CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex), surfIndex));
736             CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex), surfIndex));
737         }
738 
739         // Kernel debug surface
740         CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
741     }
742 
743     return eStatus;
744 }
745 
SetColorBitRemap(uint8_t * remapTable,int32_t multiFrameNumber,int32_t curColor,int32_t * totalColor,int32_t * totalFrameAdj)746 void CodecHalHevcMbencG12::SetColorBitRemap(uint8_t * remapTable, int32_t multiFrameNumber, int32_t curColor, int32_t * totalColor, int32_t * totalFrameAdj)
747 {
748     if (multiFrameNumber == 1)
749     {
750         *totalColor = curColor;
751         uint8_t * curColorLOC;
752         for (int32_t i = 0; i < *totalColor; i++)
753         {
754             curColorLOC = (uint8_t *)(remapTable + i * m_frameColorMapEntrySize);
755 
756             curColorLOC[m_frameColorMapLocCurFrame] = static_cast<uint8_t>(multiFrameNumber - 1);
757             curColorLOC[m_frameColorMapLocCurColor] = static_cast<uint8_t>(i);
758             curColorLOC[m_frameColorMapLocTotalFrame] = 0;
759             curColorLOC[m_frameColorMapLocTotalColor] = static_cast<uint8_t>(*totalColor);
760         }
761     }
762     else if (multiFrameNumber == 2)
763     {
764         *totalColor = ((curColor + 1) >> 1) << 2;
765         uint8_t * curColorLOC;
766         int32_t n1 = 0;
767         int32_t n2 = 0;
768         for (int32_t i = 0; i < *totalColor; i++)
769         {
770             curColorLOC = (uint8_t *)(remapTable + i * m_frameColorMapEntrySize);
771             curColorLOC[m_frameColorMapLocCurFrame] = i & 0x1;
772 
773             if (curColorLOC[m_frameColorMapLocCurFrame] == 0)
774             {
775                 if (n1 >= curColor)
776                     curColorLOC[m_frameColorMapLocCurFrame] = m_frameColorMapFrameInvalid;
777                 curColorLOC[m_frameColorMapLocCurColor] = static_cast<uint8_t>(n1);
778                 n1++;
779             }
780             else if (curColorLOC[m_frameColorMapLocCurFrame] == 1)
781             {
782                 if (n2 >= curColor)
783                     curColorLOC[m_frameColorMapLocCurFrame] = m_frameColorMapFrameInvalid;
784                 curColorLOC[m_frameColorMapLocCurColor] = static_cast<uint8_t>(n2);
785                 n2++;
786             }
787             curColorLOC[m_frameColorMapLocTotalColor] = static_cast<uint8_t>(*totalColor);
788             if ((n1 >= curColor) && (n2 >= curColor))
789             {
790                 *totalColor = i + 1;
791                 break;
792             }
793         }
794         for (int32_t i = 0; i < *totalColor; i++)
795         {
796             curColorLOC = (uint8_t *)(remapTable + i * m_frameColorMapEntrySize);
797             curColorLOC[m_frameColorMapLocTotalColor] = static_cast<uint8_t>(*totalColor);
798             if (curColorLOC[m_frameColorMapLocCurFrame] != m_frameColorMapFrameInvalid)
799                 curColorLOC[m_frameColorMapLocTotalFrame] = static_cast<uint8_t>(totalFrameAdj[curColorLOC[m_frameColorMapLocCurFrame]]);
800             else
801                 curColorLOC[m_frameColorMapLocTotalFrame] = 0;
802         }
803     }
804     else if (multiFrameNumber == 3)
805     {
806         *totalColor = curColor << 2;
807         uint8_t * curColorLOC;
808         int32_t n1 = 0;
809         int32_t n2 = 0;
810         for (int32_t i = 0; i < *totalColor; i++)
811         {
812             curColorLOC = (uint8_t *)(remapTable + i * m_frameColorMapEntrySize);
813             if ((i & 3) < 2)
814             {
815                 curColorLOC[m_frameColorMapLocCurFrame] = i & 0x3;
816                 if (n1 >= curColor)
817                     curColorLOC[m_frameColorMapLocCurFrame] = m_frameColorMapFrameInvalid;
818                 curColorLOC[m_frameColorMapLocCurColor] = i >> 2;
819                 curColorLOC[m_frameColorMapLocTotalFrame] = static_cast<uint8_t>(multiFrameNumber);
820                 if ((i & 3) == 1)
821                     n1++;
822             }
823             else
824             {
825                 curColorLOC[m_frameColorMapLocCurFrame] = 2;
826                 if (n2 >= curColor)
827                     curColorLOC[m_frameColorMapLocCurFrame] = m_frameColorMapFrameInvalid;
828                 curColorLOC[m_frameColorMapLocCurColor] = static_cast<uint8_t>(n2);
829                 n2++;
830             }
831 
832             if ((n1 >= curColor) && (n2 >= curColor))
833             {
834                 *totalColor = i + 1;
835                 break;
836             }
837         }
838         for (int32_t i = 0; i < *totalColor; i++)
839         {
840             curColorLOC = (uint8_t *)(remapTable + i * m_frameColorMapEntrySize);
841             curColorLOC[m_frameColorMapLocTotalColor] = static_cast<uint8_t>(*totalColor);
842             if (curColorLOC[m_frameColorMapLocCurFrame] != m_frameColorMapFrameInvalid)
843                 curColorLOC[m_frameColorMapLocTotalFrame] = static_cast<uint8_t>(totalFrameAdj[curColorLOC[m_frameColorMapLocCurFrame]]);
844             else
845                 curColorLOC[m_frameColorMapLocTotalFrame] = 0;
846         }
847     }
848     else if (multiFrameNumber == 4)
849     {
850         *totalColor = curColor << 2;
851         uint8_t * curColorLOC;
852         for (int32_t i = 0; i < *totalColor; i++)
853         {
854             curColorLOC = (uint8_t *)(remapTable + i * m_frameColorMapEntrySize);
855 
856             curColorLOC[m_frameColorMapLocCurFrame] = i & 0x3;
857             curColorLOC[m_frameColorMapLocCurColor] = i >> 2;
858             curColorLOC[m_frameColorMapLocTotalFrame] = static_cast<uint8_t>(totalFrameAdj[curColorLOC[m_frameColorMapLocCurFrame]]);
859             curColorLOC[m_frameColorMapLocTotalColor] = static_cast<uint8_t>(*totalColor);
860         }
861     }
862     else
863     {
864         CODECHAL_ENCODE_ASSERTMESSAGE(" Error: MultiFrameNumber , not supported!");
865     }
866 
867     return;
868 }
869 
EncodeMbEncKernel(CODECHAL_MEDIA_STATE_TYPE encFunctionType)870 MOS_STATUS CodecHalHevcMbencG12::EncodeMbEncKernel(
871     CODECHAL_MEDIA_STATE_TYPE   encFunctionType)
872 {
873     CODECHAL_ENCODE_FUNCTION_ENTER;
874     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
875 
876     uint32_t                        walkerResolutionX, walkerResolutionY, maxthreadWidth, maxthreadHeight;
877 
878     PerfTagSetting perfTag;
879     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_MBENC_KERNEL);
880 
881     CmKernel *cmKrn = nullptr;
882     if (m_isMaxLcu64)
883     {
884         cmKrn             = m_cmKrnB64;
885         if (m_hevcSeqParams->TargetUsage == 1)
886         {
887             walkerResolutionX = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE) >> 6;
888             walkerResolutionY = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE) >> 6;
889         }
890         else
891         {
892             walkerResolutionX = 2 * (MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE) >> 6);
893             walkerResolutionY = 2 * (MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE) >> 6);
894         }
895     }
896     else
897     {
898         cmKrn             = m_cmKrnB;
899         walkerResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
900         walkerResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
901     }
902 
903     if (m_numberConcurrentGroup > 1)
904     {
905         if (m_degree45Needed)
906         {
907             maxthreadWidth  = walkerResolutionX;
908             maxthreadHeight = walkerResolutionX + (walkerResolutionX + walkerResolutionY + m_numberConcurrentGroup - 2) / m_numberConcurrentGroup;
909         }
910         else //for tu4 we ensure threadspace width and height is even or a multiple of 4
911         {
912             maxthreadWidth  = (walkerResolutionX + 1) & 0xfffe; //ensuring width is even
913             maxthreadHeight = ((walkerResolutionX + 1) >> 1) + (walkerResolutionX + 2 * (((walkerResolutionY + 3) & 0xfffc) - 1) + (2 * m_numberConcurrentGroup - 1)) / (2 * m_numberConcurrentGroup);
914         }
915 
916         maxthreadHeight *= m_numberEncKernelSubThread;
917         maxthreadHeight += 1;
918     }
919     else
920     {
921         maxthreadWidth = walkerResolutionX;
922         maxthreadHeight = walkerResolutionY;
923         maxthreadHeight *= m_numberEncKernelSubThread;
924     }
925 
926     // Generate Lcu Level Data
927     CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateLcuLevelData(m_lcuLevelInputDataSurface[m_currRecycledBufIdx]));
928 
929     // Generate Concurrent Thread Group Data
930     uint32_t    curIdx = m_currRecycledBufIdx;
931     CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateConcurrentThreadGroupData(m_encBCombinedBuffer1[curIdx].sResource));
932 
933     if (m_mfeEnabled)
934     {
935         if (m_mfeEncodeSharedState->maxTheadWidth < maxthreadWidth)
936         {
937             m_mfeEncodeSharedState->maxTheadWidth = maxthreadWidth;
938         }
939         if (m_mfeEncodeSharedState->maxTheadHeight < maxthreadHeight)
940         {
941             m_mfeEncodeSharedState->maxTheadHeight = maxthreadHeight;
942         }
943 
944         m_mfeEncodeSharedState->maxThreadWidthFrames[m_mfeEncodeParams.submitIndex] = maxthreadWidth;
945         m_mfeLastStream = (m_mfeEncodeParams.submitIndex == m_mfeEncodeParams.submitNumber - 1);
946         m_mfeFirstStream = (m_mfeEncodeParams.submitIndex == 0);
947 
948         if (m_mfeLastStream)
949         {
950             for (uint32_t i = 0; i < m_mfeEncodeParams.submitNumber; i++)
951             {
952                 m_totalFrameAdj[i] = m_mfeEncodeSharedState->maxTheadWidth - m_mfeEncodeSharedState->maxThreadWidthFrames[i];
953             }
954         }
955     }
956 
957     int32_t totalColor = m_numberConcurrentGroup;
958     if ((!m_mfeEnabled) || (m_mfeLastStream))
959     {
960         SetColorBitRemap(m_FrameBalance, m_mfeEncodeParams.submitNumber, m_numberConcurrentGroup, &totalColor, m_totalFrameAdj);
961     }
962 
963     m_mbCodeIdxForTempMVP = 0xFF;
964     if (m_pictureCodingType == I_TYPE || m_hevcSeqParams->sps_temporal_mvp_enable_flag == false)
965     {
966         // No temoporal MVP in the I frame
967         m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
968     }
969     else
970     {
971         if (m_hevcPicParams->CollocatedRefPicIndex != 0xFF &&
972             m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC)
973         {
974             uint8_t frameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx;
975             // ref must be valid, ref list has max 127 entries
976             if (frameIdx < 0x7F && m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].PicEntry != 0xFF)
977             {
978                 m_mbCodeIdxForTempMVP = m_refList[frameIdx]->ucScalingIdx;
979             }
980         }
981         if (m_mbCodeIdxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag)
982         {
983             // Temporal reference MV index is invalid and so disable the temporal MVP
984             CODECHAL_ENCODE_ASSERT(false);
985             m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
986         }
987     }
988 
989     if (m_mfeEnabled && m_mfeLastStream)
990     {
991         //update the TS variables before submitting the kernels
992         maxthreadWidth = m_mfeEncodeSharedState->maxTheadWidth;
993         maxthreadHeight = m_mfeEncodeSharedState->maxTheadHeight;
994     }
995 
996     if ((!m_mfeEnabled) || (m_mfeLastStream))
997     {
998         uint32_t threadCount = maxthreadWidth * maxthreadHeight * m_numberConcurrentGroup;
999         CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetThreadCount(threadCount));
1000     }
1001 
1002     // setup curbe, setup surfaces and send all kernel args
1003     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitCurbeDataB());
1004     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupSurfacesB());
1005     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupKernelArgsB());
1006 
1007     if (m_mfeEnabled && (!m_mfeLastStream))
1008     {
1009         //Only last stream need to submit the kernels.
1010         return eStatus;
1011     }
1012 
1013     if (m_threadSpace != nullptr && m_resolutionChanged)
1014     {
1015         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroyThreadSpace(m_threadSpace));
1016         m_threadSpace = nullptr;
1017     }
1018 
1019     if (m_threadSpace == nullptr)
1020     {
1021         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateThreadSpace(
1022                                               maxthreadWidth,
1023                                               maxthreadHeight,
1024                                               m_threadSpace));
1025 
1026         m_threadSpace->SetThreadSpaceColorCount(totalColor);
1027     }
1028 
1029     switch (m_swScoreboardState->GetDependencyPattern())
1030     {
1031     case dependencyWavefront26Degree:
1032         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_threadSpace->SelectMediaWalkingPattern(CM_WALK_WAVEFRONT26));
1033         break;
1034     case dependencyWavefront26ZDegree:
1035         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_threadSpace->SelectMediaWalkingPattern(CM_WALK_WAVEFRONT26ZIG));
1036         break;
1037     case dependencyWavefront26DDegree:
1038         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_threadSpace->SelectMediaWalkingPattern(CM_WALK_WAVEFRONT26D));
1039         break;
1040     case dependencyWavefront26XDDegree:
1041         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_threadSpace->SelectMediaWalkingPattern(CM_WALK_WAVEFRONT26XD));
1042         break;
1043     case dependencyWavefront45XDDegree:
1044         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_threadSpace->SelectMediaWalkingPattern(CM_WALK_WAVEFRONT45XD_2));
1045         break;
1046     case dependencyWavefront45DDegree:
1047         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_threadSpace->SelectMediaWalkingPattern(CM_WALK_WAVEFRONT45D));
1048         break;
1049     case dependencyWavefront45Degree:
1050         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_threadSpace->SelectMediaWalkingPattern(CM_WALK_WAVEFRONT));
1051         break;
1052     case dependencyWavefront26XDegree:
1053         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_threadSpace->SelectMediaWalkingPattern(CM_WALK_WAVEFRONT26X));
1054         break;
1055     case dependencyWavefront26XDegreeAlt:
1056         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_threadSpace->SelectMediaWalkingPattern(CM_WALK_WAVEFRONT26XALT));
1057         break;
1058     default:
1059         CODECHAL_ENCODE_ASSERTMESSAGE("Walking pattern is not supported right now");
1060         eStatus = MOS_STATUS_INVALID_PARAMETER;
1061         return eStatus;
1062     }
1063 
1064     CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->AssociateThreadSpace(m_threadSpace));
1065 
1066     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmTask->AddKernel(cmKrn));
1067 
1068     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1069     {
1070         CmEvent * event = CM_NO_EVENT;
1071         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmQueue->EnqueueFast(m_cmTask, event));
1072 
1073         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmTask->Reset());
1074         m_lastTaskInPhase = false;
1075     }
1076     else
1077     {
1078         m_cmTask->AddSync();
1079     }
1080 
1081     CODECHAL_DEBUG_TOOL(
1082         CODEC_REF_LIST currRefList = *(m_refList[m_currReconstructedPic.FrameIdx]);
1083         currRefList.RefPic = m_currOriginalPic;
1084 
1085         m_debugInterface->m_currPic            = m_currOriginalPic;
1086         m_debugInterface->m_bufferDumpFrameNum = m_storeData;
1087         m_debugInterface->m_frameType          = m_pictureCodingType;
1088 
1089         DumpMbEncPakOutput(&currRefList, m_debugInterface);
1090         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1091             &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
1092             CodechalDbgAttr::attrOutput,
1093             "HistoryOut",
1094             m_historyOutBufferSize,
1095             m_historyOutBufferOffset,
1096             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1097         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1098             &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
1099             CodechalDbgAttr::attrOutput,
1100             "CombinedBuffer2",
1101             m_encBCombinedBuffer2[m_currRecycledBufIdx].dwSize,
1102             0,
1103             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1104         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
1105             m_swScoreboardState->GetCurSwScoreboardSurface(),
1106             CodechalDbgAttr::attrOutput,
1107             "SBoutSurface",
1108             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1109     )
1110 
1111 #if 0 // the dump should be done in the GetStatusReport. However, if ENC causes PAK hangs-up, there is no way to get them.
1112     {
1113         //CODECHAL_DEBUG_TOOL(
1114         currRefList = *(m_refList[m_currReconstructedPic.FrameIdx]);
1115         currRefList.RefPic = m_currOriginalPic;
1116 
1117         m_debugInterface->m_currPic = m_currOriginalPic;
1118         m_debugInterface->m_bufferDumpFrameNum = m_storeData;
1119         m_debugInterface->m_frameType = m_pictureCodingType;
1120 
1121         DumpMbEncPakOutput(&currRefList, m_debugInterface);
1122 
1123         //CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeMbEncMbPakOutput(
1124         //    m_debugInterface,
1125         //    this,
1126         //    &currRefList,
1127         //    (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
1128         //    CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
1129         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1130             &currRefList.resRefMbCodeBuffer,
1131             CodechalDbgAttr::attrOutput,
1132             "MbCode",
1133             m_picWidthInMb * m_frameFieldHeightInMb * 64,
1134             CodecHal_PictureIsBottomField(currRefList.RefPic) ? m_frameFieldHeightInMb * m_picWidthInMb * 64 : 0,
1135             (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
1136             CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
1137 
1138         if (m_mvDataSize)
1139         {
1140             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1141                 &currRefList.resRefMvDataBuffer,
1142                 CodechalDbgAttr::attrOutput,
1143                 "MbData",
1144                 m_picWidthInMb * m_frameFieldHeightInMb * (32 * 4),
1145                 CodecHal_PictureIsBottomField(currRefList.RefPic) ? MOS_ALIGN_CEIL(m_frameFieldHeightInMb * m_picWidthInMb * (32 * 4), 0x1000) : 0,
1146                 (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
1147                 CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
1148         }
1149         if (CodecHalIsFeiEncode(m_codecFunction))
1150         {
1151             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1152                 &m_resDistortionBuffer,
1153                 CodechalDbgAttr::attrOutput,
1154                 "DistortionSurf",
1155                 m_picWidthInMb * m_frameFieldHeightInMb * 48,
1156                 CodecHal_PictureIsBottomField(currRefList.RefPic) ? MOS_ALIGN_CEIL(m_frameFieldHeightInMb * m_picWidthInMb * 48, 0x1000) : 0,
1157                 (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
1158                 CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
1159         }
1160     }
1161 #endif
1162     return eStatus;
1163 }
1164 
1165 // ------------------------------------------------------------------------------
1166 //| Purpose:    Setup Curbe for HEVC MbEnc I Kernels
1167 //| Return:     N/A
1168 //------------------------------------------------------------------------------
InitCurbeDataB()1169 MOS_STATUS CodecHalHevcMbencG12::InitCurbeDataB()
1170 {
1171     uint32_t            curIdx = m_currRecycledBufIdx;
1172     MOS_LOCK_PARAMS lockFlags;
1173     MOS_STATUS      eStatus = MOS_STATUS_SUCCESS;
1174 
1175     uint8_t tuMapping = ((m_hevcSeqParams->TargetUsage) / 3) % 3;  // Map TU 1,4,6 to 0,1,2
1176 
1177     // Initialize the CURBE data
1178     MBENC_CURBE curbe;
1179 
1180     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
1181     {
1182         curbe.QPType = QP_TYPE_CONSTANT;
1183         curbe.ROIEnable = (m_hevcPicParams->NumROI || m_mbQpDataEnabled) ? true : false;
1184     }
1185     else
1186     {
1187         curbe.QPType = m_lcuBrcEnabled ? QP_TYPE_CU_LEVEL : QP_TYPE_FRAME;
1188     }
1189 
1190     // TU based settings
1191     curbe.EnableCu64Check = m_tuSettings[EnableCu64CheckTuParam][tuMapping];
1192     curbe.MaxNumIMESearchCenter = m_tuSettings[MaxNumIMESearchCenterTuParam][tuMapping];
1193     curbe.MaxTransformDepthInter = m_tuSettings[Log2TUMaxDepthInterTuParam][tuMapping];
1194     curbe.MaxTransformDepthIntra = m_tuSettings[Log2TUMaxDepthIntraTuParam][tuMapping];
1195     curbe.Dynamic64Order = m_tuSettings[Dynamic64OrderTuParam][tuMapping];
1196     curbe.DynamicOrderTh = m_tuSettings[DynamicOrderThTuParam][tuMapping];
1197     curbe.Dynamic64Enable = m_tuSettings[Dynamic64EnableTuParam][tuMapping];
1198     curbe.Dynamic64Th = m_tuSettings[Dynamic64ThTuParam][tuMapping];
1199     curbe.IncreaseExitThresh = m_tuSettings[IncreaseExitThreshTuParam][tuMapping];
1200     curbe.IntraSpotCheck = m_tuSettings[IntraSpotCheckFlagTuParam][tuMapping];
1201     curbe.Fake32Enable = m_tuSettings[Fake32EnableTuParam][tuMapping];
1202     curbe.Dynamic64Min32         = m_tuSettings[Dynamic64Min32][tuMapping];
1203 
1204     curbe.FrameWidthInSamples = m_frameWidth;
1205     curbe.FrameHeightInSamples = m_frameHeight;
1206 
1207     curbe.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
1208     curbe.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
1209     curbe.Log2MaxTUSize = m_hevcSeqParams->log2_max_transform_block_size_minus2 + 2;
1210     curbe.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
1211 
1212     curbe.ChromaFormatType = m_hevcSeqParams->chroma_format_idc;
1213 
1214     curbe.TUDepthControl = curbe.MaxTransformDepthInter;
1215 
1216     int32_t sliceQp   = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
1217     curbe.FrameQP = abs(sliceQp);
1218     curbe.FrameQPSign = (sliceQp > 0) ? 0 : 1;
1219 
1220 #if 0 // no need in the optimized kernel because kernel does the table look-up
1221     LoadCosts(CODECHAL_HEVC_B_SLICE, (uint8_t)sliceQp);
1222     curbe.DW4_ModeIntra32x32Cost = m_modeCostCre[LUTCREMODE_INTRA_32X32];
1223     curbe.DW4_ModeIntraNonDC32x32Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_32X32];
1224 
1225     curbe.DW5_ModeIntra16x16Cost = m_modeCostCre[LUTCREMODE_INTRA_16X16];
1226     curbe.DW5_ModeIntraNonDC16x16Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_16X16];
1227     curbe.DW5_ModeIntra8x8Cost = m_modeCostCre[LUTCREMODE_INTRA_8X8];
1228     curbe.DW5_ModeIntraNonDC8x8Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_8X8];
1229 
1230     curbe.DW6_ModeIntraNonPred = m_modeCostCre[LUTCREMODE_INTRA_NONPRED];
1231 
1232     curbe.DW7_ChromaIntraModeCost = m_modeCostCre[LUTCREMODE_INTRA_CHROMA];
1233 
1234     curbe.DW12_IntraModeCostMPM = m_modeCostRde[LUTRDEMODE_INTRA_MPM];
1235 
1236     curbe.DW13_IntraTUDept0Cost = m_modeCostRde[LUTRDEMODE_TU_DEPTH_0];
1237     curbe.DW13_IntraTUDept1Cost = m_modeCostRde[LUTRDEMODE_TU_DEPTH_1];
1238 
1239     curbe.DW14_IntraTU4x4CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_4X4];
1240     curbe.DW14_IntraTU8x8CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_8X8];
1241     curbe.DW14_IntraTU16x16CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_16X16];
1242     curbe.DW14_IntraTU32x32CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_32X32];
1243     curbe.DW15_LambdaRD = (uint16_t)m_lambdaRD;
1244     curbe.DW17_IntraNonDC8x8Penalty = m_modeCostRde[LUTRDEMODE_INTRA_NONDC_8X8];
1245     curbe.DW17_IntraNonDC32x32Penalty = m_modeCostRde[LUTRDEMODE_INTRA_NONDC_32X32];
1246 #endif
1247 
1248     curbe.NumofColumnTile = m_hevcPicParams->num_tile_columns_minus1 + 1;
1249     curbe.NumofRowTile    = m_hevcPicParams->num_tile_rows_minus1 + 1;
1250 
1251     curbe.HMEFlag = m_hmeSupported ? 3 : 0;
1252 
1253     curbe.MaxRefIdxL0 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10 - 1;
1254     curbe.MaxRefIdxL1 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L1_REF_G10 - 1;
1255     curbe.MaxBRefIdxL0 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10 - 1;
1256 
1257     // Check whether Last Frame is I frame or not
1258     if (m_frameNum && m_lastPictureCodingType == I_TYPE)
1259     {
1260         // This is the flag to notify kernel not to use the history buffer
1261         curbe.LastFrameIsIntra = true;
1262     }
1263     else
1264     {
1265         curbe.LastFrameIsIntra = false;
1266     }
1267 
1268     curbe.SliceType             = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
1269     curbe.TemporalMvpEnableFlag = m_hevcSliceParams->slice_temporal_mvp_enable_flag;
1270     curbe.CollocatedFromL0Flag  = m_hevcSliceParams->collocated_from_l0_flag;
1271     curbe.theSameRefList        = m_sameRefList;
1272     curbe.IsLowDelay            = m_lowDelay;
1273     curbe.NumRefIdxL0           = m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1;
1274     curbe.NumRefIdxL1           = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? 0 : (m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1);
1275     if (m_hevcSeqParams->TargetUsage == 1)
1276     {
1277         // MaxNumMergeCand C Model uses 4 for TU1,
1278         // for quality consideration, make sure not larger than the value from App as it will be used in PAK
1279         curbe.MaxNumMergeCand   = MOS_MIN(m_hevcSliceParams->MaxNumMergeCand, 4);
1280     }
1281     else
1282     {
1283         // MaxNumMergeCand C Model uses 2 for TU4,7
1284         // for quality consideration, make sure not larger than the value from App as it will be used in PAK
1285        curbe.MaxNumMergeCand   = MOS_MIN(m_hevcSliceParams->MaxNumMergeCand, 2);
1286     }
1287 
1288     int32_t tbRefListL0[CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10] = { 0 }, tbRefListL1[CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L1_REF_G10] = { 0 };
1289     curbe.FwdPocNumber_L0_mTb_0 = tbRefListL0[0] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][0]);
1290     curbe.BwdPocNumber_L1_mTb_0 = tbRefListL1[0] = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][0]) : ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][0]);
1291     curbe.FwdPocNumber_L0_mTb_1 = tbRefListL0[1] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][1]);
1292     curbe.BwdPocNumber_L1_mTb_1 = tbRefListL1[1] = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][1]) : ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][1]);
1293 
1294     curbe.FwdPocNumber_L0_mTb_2 = tbRefListL0[2] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][2]);
1295     curbe.BwdPocNumber_L1_mTb_2 = tbRefListL1[2] = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][2]) : ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][2]);
1296     curbe.FwdPocNumber_L0_mTb_3 = tbRefListL0[3] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][3]);
1297     curbe.BwdPocNumber_L1_mTb_3 = tbRefListL1[3] = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][3]) : ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][3]);
1298 
1299     curbe.RefFrameWinHeight = m_frameHeight;
1300     curbe.RefFrameWinWidth = m_frameWidth;
1301 
1302     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetRoundingIntraInterToUse());
1303 
1304     curbe.RoundingInter      = (m_roundingInterInUse + 1) << 4;  // Should be an input from par in the cmodel (slice state)
1305     curbe.RoundingIntra      = (m_roundingIntraInUse + 1) << 4;  // Should be an input from par in the cmodel (slice state)
1306     curbe.RDEQuantRoundValue = (m_roundingInterInUse + 1) << 4;
1307 
1308     uint32_t gopB = m_hevcSeqParams->GopRefDist;
1309 
1310     curbe.CostScalingForRA = (m_hevcSeqParams->LowDelayMode) ? 0 : 1;
1311 
1312     // get the min distance between current pic and ref pics
1313     uint32_t minPocDist     = 255;
1314     uint32_t costTableIndex = 0;
1315 
1316     if (curbe.SliceType == CODECHAL_ENCODE_HEVC_B_SLICE)
1317     {
1318         if (curbe.CostScalingForRA == 1)
1319         {
1320             for (uint8_t ref = 0; ref < curbe.NumRefIdxL0; ref++)
1321             {
1322                 if ((uint32_t)abs(tbRefListL0[ref]) < minPocDist)
1323                     minPocDist = abs(tbRefListL0[ref]);
1324             }
1325             for (uint8_t ref = 0; ref < curbe.NumRefIdxL1; ref++)
1326             {
1327                 if ((uint32_t)abs(tbRefListL1[ref]) < minPocDist)
1328                     minPocDist = abs(tbRefListL1[ref]);
1329             }
1330 
1331             if (gopB == 4)
1332             {
1333                 costTableIndex = minPocDist;
1334                 if (minPocDist == 4)
1335                     costTableIndex -= 1;
1336             }
1337             if (gopB == 8)
1338             {
1339                 costTableIndex = minPocDist + 3;
1340                 if (minPocDist == 4)
1341                     costTableIndex -= 1;
1342                 if (minPocDist == 8)
1343                     costTableIndex -= 4;
1344             }
1345         }
1346     }
1347     else if (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE)
1348     {
1349         costTableIndex = 8;
1350     }
1351     else
1352     {
1353         costTableIndex = 9;
1354     }
1355 
1356     curbe.CostTableIndex = costTableIndex;
1357 
1358     // the following fields are needed by the new optimized kernel in v052417
1359     curbe.Log2ParallelMergeLevel  = m_hevcPicParams->log2_parallel_merge_level_minus2 + 2;
1360     curbe.MaxIntraRdeIter = 1;
1361     curbe.CornerNeighborPixel = 0;
1362     curbe.IntraNeighborAvailFlags = 0;
1363     curbe.SubPelMode = 3; // qual-pel search
1364     curbe.InterSADMeasure = 2; // Haar transform
1365     curbe.IntraSADMeasure = 2; // Haar transform
1366     curbe.IntraPrediction = 0; // enable 32x32, 16x16, and 8x8 luma intra prediction
1367     curbe.RefIDCostMode = 1; // 0: AVC and 1: linear method
1368     curbe.TUBasedCostSetting = 0;
1369     curbe.ConcurrentGroupNum = m_numberConcurrentGroup;
1370     curbe.WaveFrontSplitVQFix = ((1 << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)) == 64) ? 1 : 0;
1371     curbe.NumofUnitInWaveFront = m_numWavefrontInOneRegion;
1372     curbe.LoadBalenceEnable = 0; // when this flag is false, kernel does not use LoadBalance (or MBENC_B_FRAME_CONCURRENT_TG_DATA) buffe
1373     curbe.ThreadNumber = MOS_MIN(2, m_numberEncKernelSubThread);
1374     curbe.Pic_init_qp_B           = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
1375     curbe.Pic_init_qp_P           = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
1376     curbe.Pic_init_qp_I           = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
1377     curbe.SuperHME = m_16xMeSupported;
1378     curbe.UltraHME = m_32xMeSupported;
1379     curbe.EnableCu64Check         = (m_hevcSeqParams->TargetUsage == 1);
1380     curbe.PerBFrameQPOffset = 0;
1381 
1382     switch (m_hevcSeqParams->TargetUsage)
1383     {
1384     case 1:
1385         curbe.Degree45 = 0;
1386         curbe.Break12Dependency = 0;
1387         curbe.DisableTemporal16and8 = 0;
1388         break;
1389     case 4:
1390         curbe.Degree45              = 1;
1391         curbe.Break12Dependency     = 1;
1392         curbe.DisableTemporal16and8 = 0;
1393         break;
1394     default:
1395         curbe.Degree45              = 1;
1396         curbe.Break12Dependency     = 1;
1397         curbe.DisableTemporal16and8 = 1;
1398         break;
1399     }
1400 
1401     curbe.WaveFrontSplitsEnable     = curbe.Degree45;  // when 45 degree, enable wave front split
1402     curbe.LongTermReferenceFlags_L0 = 0;
1403     for (uint32_t i = 0; i < curbe.NumRefIdxL0; i++)
1404     {
1405         curbe.LongTermReferenceFlags_L0 |= (m_hevcSliceParams->RefPicList[0][i].PicFlags & PICTURE_LONG_TERM_REFERENCE) << i;
1406     }
1407     curbe.LongTermReferenceFlags_L1 = 0;
1408     for (uint32_t i = 0; i < curbe.NumRefIdxL1; i++)
1409     {
1410         curbe.LongTermReferenceFlags_L1 |= (m_hevcSliceParams->RefPicList[1][i].PicFlags & PICTURE_LONG_TERM_REFERENCE) << i;
1411     }
1412 
1413     curbe.Stepping = 0;
1414     curbe.Cu64SkipCheckOnly = 0;
1415     curbe.Cu642Nx2NCheckOnly = 0;
1416     curbe.EnableCu64AmpCheck = 1;
1417     curbe.IntraSpeedMode = 0; // 35 mode
1418     curbe.DisableIntraNxN = 0;
1419 
1420 #if 0 //needed only when using A stepping on simu/emu
1421     if (m_hwInterface->GetPlatform().usRevId == 0)
1422     {
1423         curbe.Stepping = 1;
1424         curbe.TUDepthControl = 1;
1425         curbe.MaxTransformDepthInter = 1;
1426         curbe.MaxTransformDepthIntra = 0;
1427         //buf->curbe.EnableCu64Check       = 1;
1428         curbe.Cu64SkipCheckOnly = 0;
1429         curbe.Cu642Nx2NCheckOnly = 1;
1430         curbe.EnableCu64AmpCheck = 0;
1431         curbe.DisableIntraNxN = 1;
1432         curbe.MaxNumMergeCand = 1;
1433     }
1434 #endif
1435 
1436     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1437     lockFlags.WriteOnly = 1;
1438     auto buf = (PMBENC_COMBINED_BUFFER1)m_osInterface->pfnLockResource(
1439         m_osInterface,
1440         &m_encBCombinedBuffer1[curIdx].sResource,
1441         &lockFlags);
1442     CODECHAL_ENCODE_CHK_NULL_RETURN(buf);
1443 
1444     if (curbe.Degree45)
1445     {
1446         MOS_ZeroMemory(&buf->concurrent, sizeof(buf->concurrent));
1447     }
1448     buf->Curbe = curbe;
1449 
1450     m_osInterface->pfnUnlockResource(
1451         m_osInterface,
1452         &m_encBCombinedBuffer1[curIdx].sResource);
1453 
1454     // clean-up the thread dependency buffer in the second combined buffer
1455     if (m_numberEncKernelSubThread > 1)
1456     {
1457         MOS_LOCK_PARAMS lockFlags;
1458 
1459         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1460         lockFlags.WriteOnly = 1;
1461         auto data = (uint8_t*)m_osInterface->pfnLockResource(
1462             m_osInterface,
1463             &m_encBCombinedBuffer2[curIdx].sResource,
1464             &lockFlags);
1465         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1466 
1467         MOS_ZeroMemory(&data[m_threadTaskBufferOffset], m_threadTaskBufferSize);
1468 
1469         m_osInterface->pfnUnlockResource(
1470             m_osInterface,
1471             &m_encBCombinedBuffer2[curIdx].sResource);
1472     }
1473 
1474     if (m_initEncConstTable)
1475     {
1476         // Initialize the Enc Constant Table surface
1477         if (m_isMaxLcu64)
1478         {
1479             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_constTableB->WriteSurface(
1480                 (unsigned char *)m_encLcu64ConstantDataLut,
1481                 nullptr,
1482                 sizeof(m_encLcu64ConstantDataLut)));
1483         }
1484         else
1485         {
1486             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_constTableB->WriteSurface(
1487                 (unsigned char *)m_encLcu32ConstantDataLut,
1488                 nullptr,
1489                 sizeof(m_encLcu32ConstantDataLut)));
1490         }
1491         m_initEncConstTable = false;
1492     }
1493 
1494     if (m_resolutionChanged)
1495     {
1496         m_initEncLoadBalence = true;
1497     }
1498 
1499     if (m_initEncLoadBalence)
1500     {
1501         // Initialize the Enc Constant Table surface
1502         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_loadBalance->WriteSurface((unsigned char *)m_FrameBalance, nullptr, sizeof(m_FrameBalance)));
1503 
1504         m_initEncLoadBalence = false;
1505     }
1506 
1507     return eStatus;
1508 }
1509 
SetupSurfacesB()1510 MOS_STATUS CodecHalHevcMbencG12::SetupSurfacesB()
1511 {
1512     CODECHAL_ENCODE_FUNCTION_ENTER;
1513     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1514 
1515     //Concurrent Thread Group Data
1516     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateBuffer(
1517         &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource,
1518         m_combinedBuffer1));
1519 
1520     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateBuffer(
1521         &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
1522         m_combinedBuffer2));
1523 
1524     CODECHAL_DEBUG_TOOL(
1525         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1526             &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource,
1527             CodechalDbgAttr::attrOutput,
1528             "CombinedBuffer1",
1529             m_encBCombinedBuffer1[m_currRecycledBufIdx].dwSize,
1530             0,
1531             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1532     )
1533 
1534     CODECHAL_DEBUG_TOOL(
1535         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1536             &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
1537             CodechalDbgAttr::attrOutput,
1538             "HistoryIn",
1539             sizeof(MBENC_COMBINED_BUFFER2::ucHistoryInBuffer),
1540             sizeof(MBENC_COMBINED_BUFFER2::ucBrcCombinedEncBuffer),
1541             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1542         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1543             &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
1544             CodechalDbgAttr::attrOutput,
1545             "ThreadTask",
1546             m_threadTaskBufferSize,
1547             m_threadTaskBufferOffset,
1548             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));)
1549 
1550     PMOS_SURFACE inputSurface = m_rawSurfaceToEnc;
1551 
1552     // Cur and VME surfaces
1553     //Source Y and UV
1554     //first create the 2D cur input surface
1555     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1556         &inputSurface->OsResource,
1557         m_curSurf));
1558 
1559     CODECHAL_DEBUG_TOOL(
1560         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1561             inputSurface,
1562             CodechalDbgAttr::attrEncodeRawInputSurface,
1563             "MbEnc_Input_SrcSurf")));
1564 
1565     if (m_curVme)
1566     {
1567         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroyVmeSurfaceG7_5(m_curVme));
1568         m_curVme = nullptr;
1569     }
1570 
1571     for (int32_t surface_idx = 0; surface_idx < 4; surface_idx++)
1572     {
1573         int32_t ll = 0;
1574         CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
1575         if (!CodecHal_PictureIsInvalid(refPic) &&
1576             !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
1577         {
1578             int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
1579 
1580             PMOS_SURFACE refSurfacePtr = nullptr;
1581             if (surface_idx == 0 && m_useWeightedSurfaceForL0)
1582             {
1583                 refSurfacePtr = m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L0_START + surface_idx);
1584             }
1585             else
1586             {
1587                 refSurfacePtr = &m_refList[idx]->sRefBuffer;
1588             }
1589 
1590             // Picture Y VME
1591             //m_hwInterface->CacheabilitySettings[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
1592             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1593                 &refSurfacePtr->OsResource,
1594                 m_surfRefArray[surface_idx]));
1595 
1596             CODECHAL_DEBUG_TOOL(
1597                 m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
1598                 std::string refSurfName      = "RefSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex)) +
1599                                                "_L0" + std::to_string(surface_idx);
1600                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1601                     refSurfacePtr,
1602                     CodechalDbgAttr::attrReferenceSurfaces,
1603                     refSurfName.data())));
1604         }
1605         else
1606         {
1607             // Providing Dummy surface as per VME requirement.
1608             //m_hwInterface->CacheabilitySettings[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
1609             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1610                 &inputSurface->OsResource,
1611                 m_surfRefArray[surface_idx]));
1612         }
1613 
1614         ll = 1;
1615         refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
1616         if (!CodecHal_PictureIsInvalid(refPic) &&
1617             !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
1618         {
1619             int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
1620 
1621             PMOS_SURFACE refSurfacePtr = nullptr;
1622             if (surface_idx == 0 && m_useWeightedSurfaceForL1)
1623             {
1624                 refSurfacePtr = m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L1_START + surface_idx);
1625             }
1626             else
1627             {
1628                 refSurfacePtr = &m_refList[idx]->sRefBuffer;
1629             }
1630 
1631             // Picture Y VME
1632             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1633                 &refSurfacePtr->OsResource,
1634                 m_surfRefArray[MAX_VME_BWD_REF + surface_idx]));
1635 
1636             CODECHAL_DEBUG_TOOL(
1637                 m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
1638                 std::string refSurfName      = "RefSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex)) +
1639                                                "_L1" + std::to_string(surface_idx);
1640                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1641                     refSurfacePtr,
1642                     CodechalDbgAttr::attrEncodeRawInputSurface,
1643                     refSurfName.data())));
1644         }
1645         else
1646         {
1647             // Providing Dummy surface as per VME requirement.
1648             //m_hwInterface->CacheabilitySettings[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
1649             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1650                 &inputSurface->OsResource,
1651                 m_surfRefArray[MAX_VME_BWD_REF + surface_idx]));
1652         }
1653     }
1654 
1655     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateVmeSurfaceG7_5(
1656         m_curSurf,
1657         &m_surfRefArray[0],
1658         &m_surfRefArray[MAX_VME_BWD_REF],
1659         MAX_VME_FWD_REF,
1660         MAX_VME_BWD_REF,
1661         m_curVme));
1662 
1663     /* WA for 16k resolution tests with P010 format. Recon surface is NV12 format with width=2*original_width
1664        32k width is not supported by MEDIA_SURFACE_STATE_CMD.
1665        We can therefore change the recon dimensions to 16k width and 32k pitch,
1666        this will cover the portion of the surface that VME uses */
1667     if (MEDIA_IS_WA(m_waTable, Wa16kWidth32kPitchNV12ReconForP010Input) && m_curVme && m_encode16KSequence && (uint8_t)HCP_CHROMA_FORMAT_YUV420 == m_chromaFormat && inputSurface->Format == Format_P010)
1668     {
1669        CM_VME_SURFACE_STATE_PARAM  vmeDimensionParam;
1670        vmeDimensionParam.width   = ENCODE_HEVC_16K_PIC_WIDTH;
1671        vmeDimensionParam.height  = ENCODE_HEVC_16K_PIC_HEIGHT;
1672        m_cmDev->SetVmeSurfaceStateParam(m_curVme, &vmeDimensionParam);
1673     }
1674 
1675     // Current Y with reconstructed boundary pixels
1676     if (!m_reconWithBoundaryPix)
1677     {
1678         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateSurface2D(
1679             &m_currPicWithReconBoundaryPix.OsResource,
1680             m_reconWithBoundaryPix));
1681     }
1682 
1683     // PAK object command surface
1684     if (m_mbCodeBuffer)
1685     {
1686         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_mbCodeBuffer));
1687         m_mbCodeBuffer = nullptr;
1688     }
1689 
1690     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateBuffer(
1691         &m_resMbCodeSurface,
1692         m_mbCodeBuffer));
1693 
1694     // PAK object command surface
1695     CM_BUFFER_STATE_PARAM bufParams;
1696     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateBufferAlias(
1697         m_mbCodeBuffer,
1698         m_mbCodeSurfIdx));
1699     bufParams.uiBaseAddressOffset = 0;
1700     bufParams.uiSize = m_mvOffset;
1701     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mbCodeBuffer->SetSurfaceStateParam(
1702         m_mbCodeSurfIdx,
1703         &bufParams));
1704 
1705     // CU packet for PAK surface
1706     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateBufferAlias(
1707         m_mbCodeBuffer,
1708         m_mvDataSurfIdx));
1709     bufParams.uiBaseAddressOffset = m_mvOffset;
1710     bufParams.uiSize = m_mbCodeSize - m_mvOffset;
1711     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mbCodeBuffer->SetSurfaceStateParam(
1712         m_mvDataSurfIdx,
1713         &bufParams));
1714 
1715     //Software Scoreboard surface
1716     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1717         &m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource,
1718         m_swScoreboardSurf));
1719 
1720     CODECHAL_DEBUG_TOOL(
1721         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
1722             m_swScoreboardState->GetCurSwScoreboardSurface(),
1723             CodechalDbgAttr::attrOutput,
1724             "SBinSurface",
1725             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));)
1726 
1727     if ((!m_mbQpDataEnabled) || (m_brcEnabled))
1728     {
1729         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1730             &m_brcBuffers.sBrcMbQpBuffer.OsResource,
1731             m_cu16X16QpIn));
1732     }
1733     else
1734     {
1735         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1736             &m_mbQpDataSurface.OsResource,
1737             m_cu16X16QpIn));
1738     }
1739 
1740     // Lcu level data input
1741     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1742         &m_lcuLevelInputDataSurface[m_currRecycledBufIdx].OsResource,
1743         m_lcuLevelData));
1744 
1745     CODECHAL_DEBUG_TOOL(
1746         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
1747             &m_lcuLevelInputDataSurface[m_currRecycledBufIdx],
1748             CodechalDbgAttr::attrOutput,
1749             "LcuInfoSurface",
1750             CODECHAL_MEDIA_STATE_HEVC_B_MBENC));)
1751 
1752     // Colocated CU Motion Vector Data Surface
1753     if (m_colocCumvData)
1754     {
1755         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_colocCumvData));
1756         m_colocCumvData = nullptr;
1757     }
1758 
1759     if (m_mbCodeIdxForTempMVP != 0xFF)
1760     {
1761         //m_hwInterface->CacheabilitySettings[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
1762         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateBuffer(
1763             m_trackedBuf->GetMvTemporalBuffer(m_mbCodeIdxForTempMVP),
1764             m_colocCumvData));
1765 
1766         CODECHAL_DEBUG_TOOL(
1767             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1768                 m_trackedBuf->GetMvTemporalBuffer(m_mbCodeIdxForTempMVP),
1769                 CodechalDbgAttr::attrOutput,
1770                 "CollocatedMV",
1771                 m_sizeOfMvTemporalBuffer,
1772                 0,
1773                 CODECHAL_MEDIA_STATE_HEVC_B_MBENC)););
1774     }
1775 
1776     // HME motion predictor data
1777     if (m_hmeEnabled)
1778     {
1779         m_hmeMotionPredData = m_hmeKernel->GetCmSurface(CodechalKernelHme::SurfaceId::me4xMvDataBuffer);
1780     }
1781 
1782     if (m_isMaxLcu64)
1783     {
1784         PMOS_SURFACE currScaledSurface2x = m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER);
1785 
1786         //VME 2X Inter prediction Surface for current frame
1787         //m_hwInterface->CacheabilitySettings[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
1788         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1789             &currScaledSurface2x->OsResource,
1790             m_curSurf2X));
1791 
1792         CODECHAL_DEBUG_TOOL(
1793             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1794                 currScaledSurface2x,
1795                 CodechalDbgAttr::attrReferenceSurfaces,
1796                 "2xScaledSurf"))
1797         );
1798 
1799         if (m_cur2XVme)
1800         {
1801             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroyVmeSurfaceG7_5(m_cur2XVme));
1802             m_cur2XVme = nullptr;
1803         }
1804 
1805         // RefFrame's 2x DS surface
1806         for (int32_t surface_idx = 0; surface_idx < 4; surface_idx++)
1807         {
1808             int32_t ll = 0;
1809             CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
1810             if (!CodecHal_PictureIsInvalid(refPic) &&
1811                 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
1812             {
1813                 int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
1814 
1815                 // Picture Y VME
1816                 //m_hwInterface->CacheabilitySettings[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
1817 
1818                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1819                     &m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx)->OsResource,
1820                     m_surf2XArray[surface_idx]));
1821 
1822                 CODECHAL_DEBUG_TOOL(
1823                     m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
1824                     std::string refSurfName = "Ref2xScaledSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex)) +
1825                                                "_L0" + std::to_string(surface_idx);
1826                     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1827                         m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
1828                         CodechalDbgAttr::attrReferenceSurfaces,
1829                         refSurfName.data())));
1830             }
1831             else
1832             {
1833                 // Providing Dummy surface as per VME requirement.
1834                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1835                     &currScaledSurface2x->OsResource,
1836                     m_surf2XArray[surface_idx]));
1837             }
1838 
1839             ll = 1;
1840             refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
1841             if (!CodecHal_PictureIsInvalid(refPic) &&
1842                 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
1843             {
1844                 int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
1845 
1846                 // Picture Y VME
1847                 //m_hwInterface->CacheabilitySettings[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
1848                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1849                     &m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx)->OsResource,
1850                     m_surf2XArray[MAX_VME_BWD_REF + surface_idx]));
1851 
1852                 CODECHAL_DEBUG_TOOL(
1853                     m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
1854                     std::string refSurfName = "Ref2xScaledSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex)) +
1855                                                "_L1" + std::to_string(surface_idx);
1856                     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1857                         m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
1858                         CodechalDbgAttr::attrReferenceSurfaces,
1859                         refSurfName.data())));
1860             }
1861             else
1862             {
1863                 // Providing Dummy surface as per VME requirement.
1864                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1865                     &currScaledSurface2x->OsResource,
1866                     m_surf2XArray[MAX_VME_BWD_REF + surface_idx]));
1867             }
1868         }
1869 
1870         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateVmeSurfaceG7_5(
1871             m_curSurf2X,
1872             &m_surf2XArray[0],
1873             &m_surf2XArray[MAX_VME_BWD_REF],
1874             MAX_VME_FWD_REF,
1875             MAX_VME_BWD_REF,
1876             m_cur2XVme));
1877     }
1878 
1879     if (m_isMaxLcu64)
1880     {
1881         // Encoder History Input Buffer
1882         if (!m_histInBuffer)
1883         {
1884             //m_hwInterface->CacheabilitySettings[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
1885             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateSurface2D(
1886                 &m_encoderHistoryInputBuffer.OsResource,
1887                 m_histInBuffer));
1888         }
1889 
1890         // Encoder History Input Buffer
1891         if (!m_histOutBuffer)
1892         {
1893             //m_hwInterface->CacheabilitySettings[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
1894             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateSurface2D(
1895                 &m_encoderHistoryOutputBuffer.OsResource,
1896                 m_histOutBuffer));
1897         }
1898     }
1899 
1900     return eStatus;
1901 }
1902 
EncodeIntraDistKernel()1903 MOS_STATUS CodecHalHevcMbencG12::EncodeIntraDistKernel()
1904 {
1905     CodechalKernelIntraDistMdfG12::CurbeParam curbeParam;
1906     curbeParam.downScaledWidthInMb4x = m_downscaledWidthInMb4x;
1907     curbeParam.downScaledHeightInMb4x = m_downscaledHeightInMb4x;
1908 
1909     CodechalKernelIntraDistMdfG12::SurfaceParams surfaceParam;
1910     surfaceParam.input4xDsSurface = m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
1911     surfaceParam.intraDistSurface = m_brcBuffers.brcIntraDistortionSurface;
1912     surfaceParam.intraDistBottomFieldOffset = m_brcBuffers.dwMeBrcDistortionBottomFieldOffset;
1913     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_intraDistKernel->Execute(curbeParam, surfaceParam));
1914 
1915     return MOS_STATUS_SUCCESS;
1916 }
1917 
1918 //to remove this function after the fix in CodechalEncHevcState::GetRoundingIntraInterToUse() checked in.
GetRoundingIntraInterToUse()1919 MOS_STATUS CodecHalHevcMbencG12::GetRoundingIntraInterToUse()
1920 {
1921     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1922 
1923     CODECHAL_ENCODE_FUNCTION_ENTER;
1924 
1925     if (m_hevcPicParams->CustomRoundingOffsetsParams.fields.EnableCustomRoudingIntra)
1926     {
1927         m_roundingIntraInUse = m_hevcPicParams->CustomRoundingOffsetsParams.fields.RoundingOffsetIntra;
1928     }
1929     else
1930     {
1931         if (m_hevcPicParams->CodingType == I_TYPE)
1932         {
1933             m_roundingIntraInUse = 10;
1934         }
1935         else if (m_HierchGopBRCEnabled)
1936         {
1937             //Hierachical B GOP
1938             if (m_hevcPicParams->CodingType == P_TYPE)
1939             {
1940                 m_roundingIntraInUse = 4;
1941             }
1942             else if (m_hevcPicParams->CodingType == B_TYPE)
1943             {
1944                 m_roundingIntraInUse = 3;
1945                 if (m_lowDelay && !m_hevcSeqParams->LowDelayMode)
1946                 {
1947                     // RAB test, anchor frame
1948                     m_roundingIntraInUse = 4;
1949                 }
1950             }
1951             else
1952             {
1953                 m_roundingIntraInUse = 2;
1954             }
1955         }
1956         else
1957         {
1958             m_roundingIntraInUse = 10;
1959         }
1960     }
1961 
1962     if (m_hevcPicParams->CustomRoundingOffsetsParams.fields.EnableCustomRoudingInter)
1963     {
1964         m_roundingInterInUse = m_hevcPicParams->CustomRoundingOffsetsParams.fields.RoundingOffsetInter;
1965     }
1966     else
1967     {
1968         if (m_HierchGopBRCEnabled)
1969         {
1970             //Hierachical B GOP
1971             if (m_hevcPicParams->CodingType == I_TYPE ||
1972                 m_hevcPicParams->CodingType == P_TYPE)
1973             {
1974                 m_roundingInterInUse = 4;
1975             }
1976             else if (m_hevcPicParams->CodingType == B_TYPE)
1977             {
1978                 m_roundingInterInUse = 3;
1979                 if (m_lowDelay && !m_hevcSeqParams->LowDelayMode)
1980                 {
1981                     // RAB test, anchor frame
1982                     m_roundingInterInUse = 4;
1983                 }
1984             }
1985             else
1986             {
1987                 m_roundingInterInUse = 2;
1988             }
1989         }
1990         else
1991         {
1992             m_roundingInterInUse = 4;
1993         }
1994     }
1995 
1996     CODECHAL_ENCODE_VERBOSEMESSAGE("Rounding intra in use:%d, rounding inter in use:%d.\n", m_roundingIntraInUse, m_roundingInterInUse);
1997 
1998     return eStatus;
1999 }
2000