1 /*
2 * Copyright (c) 2017-2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_encode_hevc_g12.cpp
24 //! \brief HEVC dual-pipe encoder for GEN12.
25 //!
26
27 #include "codechal_encode_hevc_g12.h"
28 #include "codechal_encode_csc_ds_g12.h"
29 #include "codechal_mmc_encode_hevc_g12.h"
30 #include "codechal_encode_wp_g12.h"
31 #include "codechal_kernel_header_g12.h"
32 #include "codechal_kernel_hme_g12.h"
33 #include "codechal_debug.h"
34 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
35 #include "igcodeckrn_g12.h"
36 #endif
37 #include "codeckrnheader.h"
38 #include "mhw_vdbox_hcp_g12_X.h"
39 #include "mhw_vdbox_g12_X.h"
40 #include "mhw_mi_g12_X.h"
41 #include "mhw_render_g12_X.h"
42 #include "cm_queue_rt.h"
43 #include "codechal_debug.h"
44
45 //! \cond SKIP_DOXYGEN
46 #define CRECOST(lambda, mode, lcu, slice) (Map44LutValue((uint32_t)((lambda) * (m_modeBits[(lcu)][(mode)][(slice)]) * (m_modeBitsScale[(mode)][(slice)])), 0x8F))
47 #define RDEBITS62(mode, lcu, slice) (GetU62ModeBits((float)((m_modeBits[(lcu)][(mode)][(slice)]) * (m_modeBitsScale[(mode)][(slice)]))))
48 //! \endcond
49
SetGpuCtxCreatOption()50 MOS_STATUS CodechalEncHevcStateG12::SetGpuCtxCreatOption()
51 {
52 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
53
54 CODECHAL_ENCODE_FUNCTION_ENTER;
55
56 if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
57 {
58 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SetGpuCtxCreatOption());
59 }
60 else
61 {
62 m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS_ENHANCED);
63 CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt);
64 }
65
66 return eStatus;
67 }
68
AddHcpPipeModeSelectCmd(MOS_COMMAND_BUFFER * cmdBuffer)69 MOS_STATUS CodechalEncHevcStateG12::AddHcpPipeModeSelectCmd(MOS_COMMAND_BUFFER *cmdBuffer)
70 {
71 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
72
73 // call MI_VD_CONTROL_STATE before HCP_PIPE_SELECT to init the pipe.
74 {
75 MHW_MI_VD_CONTROL_STATE_PARAMS vdControlStateParams;
76 //set up VD_CONTROL_STATE command
77 {
78 MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
79 vdControlStateParams.initialization = true;
80 CODECHAL_ENCODE_CHK_STATUS_RETURN(
81 static_cast<MhwMiInterfaceG12 *>(m_miInterface)->AddMiVdControlStateCmd(cmdBuffer, &vdControlStateParams));
82 }
83 }
84
85 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 pipeModeSelectParams;
86 SetHcpPipeModeSelectParams(pipeModeSelectParams);
87 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
88
89 return eStatus;
90 }
91
SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS & vdboxPipeModeSelectParams)92 void CodechalEncHevcStateG12::SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS &vdboxPipeModeSelectParams)
93 {
94 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 &pipeModeSelectParams =
95 static_cast<MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 &>(vdboxPipeModeSelectParams);
96 pipeModeSelectParams = {};
97 CodechalEncodeHevcBase::SetHcpPipeModeSelectParams(vdboxPipeModeSelectParams);
98
99 pipeModeSelectParams.pakPiplnStrmoutEnabled = m_pakPiplStrmOutEnable;
100 pipeModeSelectParams.pakFrmLvlStrmoutEnable = (m_brcEnabled && m_numPipe > 1);
101
102 if (m_numPipe > 1)
103 {
104 // Running in the multiple VDBOX mode
105 if (IsFirstPipe())
106 {
107 pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT;
108 }
109 else if (IsLastPipe())
110 {
111 pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT;
112 }
113 else
114 {
115 pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE;
116 }
117 pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_CODEC_BE;
118 }
119 else
120 {
121 pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
122 pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY;
123 }
124 }
125
SetHcpPicStateParams(MHW_VDBOX_HEVC_PIC_STATE & picStateParams)126 void CodechalEncHevcStateG12::SetHcpPicStateParams(MHW_VDBOX_HEVC_PIC_STATE &picStateParams)
127 {
128 CODECHAL_ENCODE_FUNCTION_ENTER;
129
130 CodechalEncodeHevcBase::SetHcpPicStateParams(picStateParams);
131 picStateParams.sseEnabledInVmeEncode = m_sseEnabled;
132 }
133
AddHcpSurfaceStateCmds(MOS_COMMAND_BUFFER * cmdBuffer)134 MOS_STATUS CodechalEncHevcStateG12::AddHcpSurfaceStateCmds(MOS_COMMAND_BUFFER *cmdBuffer)
135 {
136 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
137
138 MHW_VDBOX_SURFACE_PARAMS srcSurfaceParams;
139 SetHcpSrcSurfaceParams(srcSurfaceParams);
140 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(cmdBuffer, &srcSurfaceParams));
141
142 MHW_VDBOX_SURFACE_PARAMS reconSurfaceParams;
143 SetHcpReconSurfaceParams(reconSurfaceParams);
144 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(cmdBuffer, &reconSurfaceParams));
145
146 // Add the surface state for reference picture, GEN12 HW change
147 MHW_VDBOX_SURFACE_PARAMS refSurfaceParams;
148 SetHcpRefSurfaceParams(refSurfaceParams);
149
150 if (m_mmcState->IsMmcEnabled())
151 {
152 refSurfaceParams.refsMmcEnable = 0;
153 refSurfaceParams.refsMmcType = 0;
154 refSurfaceParams.dwCompressionFormat = 0;
155
156 //add for B frame support
157 if (m_pictureCodingType != I_TYPE)
158 {
159 for (uint8_t i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
160 {
161 if (i < CODEC_MAX_NUM_REF_FRAME_HEVC &&
162 m_picIdx[i].bValid && m_currUsedRefPic[i])
163 {
164 uint8_t idx = m_picIdx[i].ucPicIdx;
165 uint8_t frameStoreId = m_refIdxMapping[i];
166
167 MOS_MEMCOMP_STATE mmcState = MOS_MEMCOMP_DISABLED;
168 ENCODE_CHK_STATUS_RETURN(m_mmcState->GetSurfaceMmcState(const_cast<PMOS_SURFACE>(&m_refList[idx]->sRefReconBuffer), &mmcState));
169 refSurfaceParams.refsMmcEnable |= (mmcState == MOS_MEMCOMP_RC || mmcState == MOS_MEMCOMP_MC) ? (1 << frameStoreId) : 0;
170 refSurfaceParams.refsMmcType |= (mmcState == MOS_MEMCOMP_RC) ? (1 << frameStoreId) : 0;
171 if (mmcState == MOS_MEMCOMP_RC || mmcState == MOS_MEMCOMP_MC)
172 {
173 ENCODE_CHK_STATUS_RETURN(m_mmcState->GetSurfaceMmcFormat(const_cast<PMOS_SURFACE>(&m_refList[idx]->sRefReconBuffer), &refSurfaceParams.dwCompressionFormat));
174 }
175 }
176 }
177 }
178 }
179
180 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(cmdBuffer, &refSurfaceParams));
181
182 return eStatus;
183 }
184
AddHcpPictureStateCmd(MOS_COMMAND_BUFFER * cmdBuffer)185 MOS_STATUS CodechalEncHevcStateG12::AddHcpPictureStateCmd(MOS_COMMAND_BUFFER *cmdBuffer)
186 {
187 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
188
189 MHW_VDBOX_HEVC_PIC_STATE_G12 picStateParams;
190
191 SetHcpPicStateParams(picStateParams);
192
193 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPicStateCmd(cmdBuffer, &picStateParams));
194
195 return eStatus;
196 }
197
UpdateYUY2SurfaceInfo(MOS_SURFACE & surface,bool is10Bit)198 MOS_STATUS CodechalEncHevcStateG12::UpdateYUY2SurfaceInfo(
199 MOS_SURFACE &surface,
200 bool is10Bit)
201 {
202 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
203
204 CODECHAL_ENCODE_FUNCTION_ENTER;
205
206 if (surface.Format == Format_YUY2V)
207 {
208 // surface has been updated
209 return eStatus;
210 }
211
212 if (surface.Format != Format_YUY2 &&
213 surface.Format != Format_Y210 &&
214 surface.Format != Format_Y216)
215 {
216 eStatus = MOS_STATUS_INVALID_PARAMETER;
217 return eStatus;
218 }
219
220 if (surface.dwWidth < m_oriFrameWidth / 2 || surface.dwHeight < m_oriFrameHeight * 2)
221 {
222 eStatus = MOS_STATUS_INVALID_PARAMETER;
223 return eStatus;
224 }
225
226 surface.Format = is10Bit ? Format_Y216V : Format_YUY2V;
227 surface.dwWidth = m_oriFrameWidth;
228 surface.dwHeight = m_oriFrameHeight;
229
230 surface.YPlaneOffset.iSurfaceOffset = 0;
231 surface.YPlaneOffset.iXOffset = 0;
232 surface.YPlaneOffset.iYOffset = 0;
233
234 surface.UPlaneOffset.iSurfaceOffset = surface.dwHeight * surface.dwPitch;
235 surface.UPlaneOffset.iXOffset = 0;
236 surface.UPlaneOffset.iYOffset = surface.dwHeight;
237
238 surface.VPlaneOffset.iSurfaceOffset = surface.dwHeight * surface.dwPitch;
239 surface.VPlaneOffset.iXOffset = 0;
240 surface.VPlaneOffset.iYOffset = surface.dwHeight;
241
242 return eStatus;
243 }
244
InitializePicture(const EncoderParams & params)245 MOS_STATUS CodechalEncHevcStateG12::InitializePicture(const EncoderParams ¶ms)
246 {
247 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
248
249 CODECHAL_ENCODE_FUNCTION_ENTER;
250
251 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::InitializePicture(params));
252
253 if (m_resolutionChanged)
254 {
255 ResizeBufferOffset();
256 }
257
258 m_sseEnabled = false;
259 // only 420 format support SSE output
260 // see TDR in scalability case, disable SSE for now before HW confirm the capability.
261 if (m_sseSupported &&
262 m_hevcSeqParams->chroma_format_idc == HCP_CHROMA_FORMAT_YUV420 &&
263 m_numPipe == 1)
264 {
265 m_sseEnabled = true;
266 }
267
268 // for HEVC VME, HUC based WP is not supported.
269 m_hevcPicParams->bEnableGPUWeightedPrediction = false;
270
271 m_pakPiplStrmOutEnable = m_sseEnabled || (m_brcEnabled && m_numPipe > 1);
272
273 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData(m_tileParams, params.dwBitstreamSize));
274 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileStatistics());
275 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResourcesVariableSize());
276
277 return eStatus;
278 }
279
SetPictureStructs()280 MOS_STATUS CodechalEncHevcStateG12::SetPictureStructs()
281 {
282 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
283
284 CODECHAL_ENCODE_FUNCTION_ENTER;
285
286 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::SetPictureStructs());
287
288 if (m_minMaxQpControlEnabled)
289 {
290 //if Min Max QP is on disable Frame Panic Mode
291 m_enableFramePanicMode = false;
292 }
293
294 // This is an additional (the 5th) PAK pass for BRC panic mode. Enabled for the single pipe case only.
295 // Panic mode is not supported with Min/Max QP
296 if (m_brcEnabled && m_enableFramePanicMode && (false == m_hevcSeqParams->DisableHRDConformance) &&
297 (I_TYPE != m_hevcPicParams->CodingType) &&
298 (m_numPipe == 1))
299 {
300 m_numPasses++;
301 }
302
303 m_virtualEngineBbIndex = m_currOriginalPic.FrameIdx;
304
305 if ((uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_chromaFormat &&
306 (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat)
307 {
308 uint8_t currRefIdx = m_hevcPicParams->CurrReconstructedPic.FrameIdx;
309 UpdateYUY2SurfaceInfo(m_refList[currRefIdx]->sRefBuffer, m_is10BitHevc);
310
311 if (m_pictureCodingType != I_TYPE)
312 {
313 for (uint32_t i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
314 {
315 if (!m_picIdx[i].bValid || !m_currUsedRefPic[i])
316 {
317 continue;
318 }
319 uint8_t picIdx = m_picIdx[i].ucPicIdx;
320 CODECHAL_ENCODE_ASSERT(picIdx < 127);
321
322 UpdateYUY2SurfaceInfo((m_refList[picIdx]->sRefBuffer), m_is10BitHevc);
323 }
324 }
325 }
326
327 return eStatus;
328 }
329
SetKernelParams(EncOperation encOperation,MHW_KERNEL_PARAM * kernelParams,uint32_t idx)330 MOS_STATUS CodechalEncHevcStateG12::SetKernelParams(
331 EncOperation encOperation,
332 MHW_KERNEL_PARAM *kernelParams,
333 uint32_t idx)
334 {
335 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
336
337 kernelParams->iThreadCount = m_hwInterface->GetRenderInterface()->GetHwCaps()->dwMaxThreads;
338 kernelParams->iIdCount = 1;
339
340 uint32_t curbeAlignment = m_hwInterface->GetRenderInterface()->m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment();
341 switch (encOperation)
342 {
343 case ENC_MBENC:
344 {
345 switch (idx)
346 {
347 case MBENC_LCU32_KRNIDX:
348 kernelParams->iBTCount = MBENC_B_FRAME_END - MBENC_B_FRAME_BEGIN;
349 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MBENC_LCU32_BTI), (size_t)curbeAlignment);
350 kernelParams->iBlockWidth = CODECHAL_HEVC_MAX_LCU_SIZE_G9;
351 kernelParams->iBlockHeight = CODECHAL_HEVC_MAX_LCU_SIZE_G9;
352 break;
353
354 case MBENC_LCU64_KRNIDX:
355 kernelParams->iBTCount = MBENC_B_FRAME_END - MBENC_B_FRAME_BEGIN;
356 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MBENC_LCU64_BTI), (size_t)curbeAlignment);
357 kernelParams->iBlockWidth = CODECHAL_HEVC_MAX_LCU_SIZE_G10;
358 kernelParams->iBlockHeight = CODECHAL_HEVC_MAX_LCU_SIZE_G10;
359 break;
360
361 default:
362 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported MBENC mode requested");
363 return MOS_STATUS_INVALID_PARAMETER;
364 }
365 }
366 break;
367
368 case ENC_BRC:
369 {
370 switch (idx)
371 {
372 case CODECHAL_HEVC_BRC_INIT:
373 case CODECHAL_HEVC_BRC_RESET:
374 kernelParams->iBTCount = BRC_INIT_RESET_END - BRC_INIT_RESET_BEGIN;
375 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(BRC_INITRESET_CURBE), (size_t)curbeAlignment);
376 kernelParams->iBlockWidth = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
377 kernelParams->iBlockHeight = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
378 break;
379
380 case CODECHAL_HEVC_BRC_FRAME_UPDATE:
381 kernelParams->iBTCount = BRC_UPDATE_END - BRC_UPDATE_BEGIN;
382 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(BRCUPDATE_CURBE), (size_t)curbeAlignment);
383 kernelParams->iBlockWidth = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
384 kernelParams->iBlockHeight = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
385 break;
386
387 case CODECHAL_HEVC_BRC_LCU_UPDATE:
388 kernelParams->iBTCount = BRC_LCU_UPDATE_END - BRC_LCU_UPDATE_BEGIN;
389 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(BRCUPDATE_CURBE), (size_t)curbeAlignment);
390 kernelParams->iBlockWidth = CODECHAL_HEVC_LCU_BRC_BLOCK_SIZE;
391 kernelParams->iBlockHeight = CODECHAL_HEVC_LCU_BRC_BLOCK_SIZE;
392 break;
393
394 default:
395 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested");
396 return MOS_STATUS_INVALID_PARAMETER;
397 }
398 }
399 break;
400
401 default:
402 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
403 return MOS_STATUS_INVALID_PARAMETER;
404 }
405
406 return eStatus;
407 }
408
SetBindingTable(EncOperation encOperation,PCODECHAL_ENCODE_BINDING_TABLE_GENERIC hevcEncBindingTable,uint32_t idx)409 MOS_STATUS CodechalEncHevcStateG12::SetBindingTable(
410 EncOperation encOperation,
411 PCODECHAL_ENCODE_BINDING_TABLE_GENERIC hevcEncBindingTable,
412 uint32_t idx)
413 {
414 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
415
416 CODECHAL_ENCODE_CHK_NULL_RETURN(hevcEncBindingTable);
417
418 MOS_ZeroMemory(hevcEncBindingTable, sizeof(*hevcEncBindingTable));
419
420 switch (encOperation)
421 {
422 case ENC_MBENC:
423 {
424 switch (idx)
425 {
426 case MBENC_LCU32_KRNIDX:
427 case MBENC_LCU64_KRNIDX:
428 hevcEncBindingTable->dwNumBindingTableEntries = MBENC_B_FRAME_END - MBENC_B_FRAME_BEGIN;
429 hevcEncBindingTable->dwBindingTableStartOffset = MBENC_B_FRAME_BEGIN;
430 break;
431
432 default:
433 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported MBENC mode requested");
434 return MOS_STATUS_INVALID_PARAMETER;
435 }
436 }
437 break;
438
439 case ENC_BRC:
440 {
441 switch (idx)
442 {
443 case CODECHAL_HEVC_BRC_INIT:
444 hevcEncBindingTable->dwNumBindingTableEntries = BRC_INIT_RESET_END - BRC_INIT_RESET_BEGIN;
445 hevcEncBindingTable->dwBindingTableStartOffset = BRC_INIT_RESET_BEGIN;
446 break;
447
448 case CODECHAL_HEVC_BRC_RESET:
449 hevcEncBindingTable->dwNumBindingTableEntries = BRC_INIT_RESET_END - BRC_INIT_RESET_BEGIN;
450 hevcEncBindingTable->dwBindingTableStartOffset = BRC_INIT_RESET_BEGIN;
451 break;
452
453 case CODECHAL_HEVC_BRC_FRAME_UPDATE:
454 hevcEncBindingTable->dwNumBindingTableEntries = BRC_UPDATE_END - BRC_UPDATE_BEGIN;
455 hevcEncBindingTable->dwBindingTableStartOffset = BRC_UPDATE_BEGIN;
456 break;
457
458 case CODECHAL_HEVC_BRC_LCU_UPDATE:
459 hevcEncBindingTable->dwNumBindingTableEntries = BRC_LCU_UPDATE_END - BRC_LCU_UPDATE_BEGIN;
460 hevcEncBindingTable->dwBindingTableStartOffset = BRC_LCU_UPDATE_BEGIN;
461 break;
462
463 default:
464 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested");
465 return MOS_STATUS_INVALID_PARAMETER;
466 }
467 }
468 break;
469
470 default:
471 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
472 return MOS_STATUS_INVALID_PARAMETER;
473 }
474
475 for (uint32_t i = 0; i < hevcEncBindingTable->dwNumBindingTableEntries; i++)
476 {
477 hevcEncBindingTable->dwBindingTableEntries[i] = i;
478 }
479
480 return eStatus;
481 }
482
AllocateEncResources()483 MOS_STATUS CodechalEncHevcStateG12::AllocateEncResources()
484 {
485 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
486
487 CODECHAL_ENCODE_FUNCTION_ENTER;
488
489 // Surfaces used by I & B Kernels
490 uint32_t width = 0, height = 0;
491 uint32_t size = 0;
492
493 MEDIA_WA_TABLE* waTable = m_osInterface->pfnGetWaTable(m_osInterface);
494 uint32_t memType = (MEDIA_IS_WA(waTable, WaForceAllocateLML4)) ? MOS_MEMPOOL_DEVICEMEMORY : 0;
495
496 if (!m_useMdf)
497 {
498 // Intermediate CU Record surface
499 if (Mos_ResourceIsNull(&m_intermediateCuRecordSurfaceLcu32.OsResource))
500 {
501 width = m_widthAlignedLcu32;
502 height = m_heightAlignedLcu32 >> 1;
503
504 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
505 &m_intermediateCuRecordSurfaceLcu32,
506 width,
507 height,
508 "Intermediate CU record surface",
509 MOS_TILE_Y));
510 }
511
512 // Scratch Surface for I-kernel
513 if (Mos_ResourceIsNull(&m_scratchSurface.OsResource))
514 {
515 width = m_widthAlignedLcu32 >> 3;
516 height = m_heightAlignedLcu32 >> 5;
517
518 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
519 &m_scratchSurface,
520 width,
521 height,
522 "Scratch surface for I and B Kernels"));
523 }
524
525 // CU based QP surface
526 if (Mos_ResourceIsNull(&m_16x16QpInputData.OsResource))
527 {
528 width = MOS_ALIGN_CEIL(m_picWidthInMb, 64);
529 height = MOS_ALIGN_CEIL(m_picHeightInMb, 64);
530
531 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
532 &m_16x16QpInputData,
533 width,
534 height,
535 "16x16 QP Data Input surface"));
536 }
537
538 // Surfaces used by B Kernels
539 // Enc constant table for B LCU32
540 if (Mos_ResourceIsNull(&m_encConstantTableForB.sResource))
541 {
542 size = m_encConstantDataLutSize;
543
544 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
545 &m_encConstantTableForB,
546 size,
547 "Enc Constant Table surface For LCU32/LCU64"));
548 }
549
550 //Debug surface
551 for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_debugSurface); i++)
552 {
553 if (Mos_ResourceIsNull(&m_debugSurface[i].sResource))
554 {
555 size = m_debugSurfaceSize;
556
557 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
558 &m_debugSurface[i],
559 size,
560 "Kernel debug surface"));
561 }
562 }
563 }
564
565 // LCU Level Input Data
566 for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_lcuLevelInputDataSurface); i++)
567 {
568 if (Mos_ResourceIsNull(&m_lcuLevelInputDataSurface[i].OsResource))
569 {
570 width = 16 * ((m_widthAlignedMaxLcu >> 6) << 1);
571 height = ((m_heightAlignedMaxLcu >> 6) << 1);
572
573 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
574 &m_lcuLevelInputDataSurface[i],
575 width,
576 height,
577 "Lcu Level Data Input surface",
578 MOS_TILE_LINEAR));
579 }
580 }
581
582 m_brcInputForEncKernelBuffer = nullptr;
583
584 //Current Picture Y with Reconstructed boundary pixels
585 if (Mos_ResourceIsNull(&m_currPicWithReconBoundaryPix.OsResource))
586 {
587 width = m_widthAlignedLcu32;
588 height = m_heightAlignedLcu32;
589
590 if (m_isMaxLcu64)
591 {
592 width = m_widthAlignedMaxLcu;
593 height = m_heightAlignedMaxLcu;
594 }
595
596 uint32_t aligned_height = (uint32_t) (height * m_alignReconFactor);
597 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurface(
598 &m_currPicWithReconBoundaryPix,
599 width,
600 aligned_height,
601 "Current Picture Y with Reconstructed Boundary Pixels surface",
602 memType));
603 }
604
605 // Encoder History Input Surface
606 if (Mos_ResourceIsNull(&m_encoderHistoryInputBuffer.OsResource))
607 {
608 width = 32 * ((m_widthAlignedMaxLcu >> 6) << 1);
609 height = ((m_heightAlignedMaxLcu >> 6) << 1);
610
611 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
612 &m_encoderHistoryInputBuffer,
613 width,
614 height,
615 "Encoder History Input surface"));
616 }
617
618 // Encoder History Output Surface
619 if (Mos_ResourceIsNull(&m_encoderHistoryOutputBuffer.OsResource))
620 {
621 width = 32 * ((m_widthAlignedMaxLcu >> 6) << 1);
622 height = ((m_heightAlignedMaxLcu >> 6) << 1);
623
624 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
625 &m_encoderHistoryOutputBuffer,
626 width,
627 height,
628 "Encoder History Output surface"));
629 }
630
631 if (m_hmeSupported && !m_useMdf)
632 {
633 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->AllocateResources());
634 // BRC Distortion surface
635 if (Mos_ResourceIsNull(&m_brcBuffers.sMeBrcDistortionBuffer.OsResource))
636 {
637 width = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x << 3), 64);
638 height = MOS_ALIGN_CEIL((m_downscaledHeightInMb4x << 2), 8) << 1;
639
640 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
641 &m_brcBuffers.sMeBrcDistortionBuffer,
642 width,
643 height,
644 "Brc Distortion surface Buffer"));
645 }
646
647 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateMeResources());
648 }
649
650 for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer1); i++)
651 {
652 if (Mos_ResourceIsNull(&m_encBCombinedBuffer1[i].sResource))
653 {
654 size = sizeof(MBENC_COMBINED_BUFFER1);
655
656 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
657 &m_encBCombinedBuffer1[i],
658 size,
659 "Enc B combined buffer1"));
660
661 MOS_LOCK_PARAMS lockFlags;
662 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
663 lockFlags.WriteOnly = 1;
664 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
665 m_osInterface,
666 &m_encBCombinedBuffer1[i].sResource,
667 &lockFlags);
668 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
669
670 MOS_ZeroMemory(data, size);
671
672 m_osInterface->pfnUnlockResource(
673 m_osInterface,
674 &m_encBCombinedBuffer1[i].sResource);
675 }
676 }
677
678 for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer2); i++)
679 {
680 if (Mos_ResourceIsNull(&m_encBCombinedBuffer2[i].sResource))
681 {
682 uint32_t numLcu64 = m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 64 / 64;
683 MBENC_COMBINED_BUFFER2 fixedBuf;
684
685 m_historyOutBufferSize = MOS_ALIGN_CEIL(32 * numLcu64, CODECHAL_CACHELINE_SIZE);
686 m_threadTaskBufferSize = MOS_ALIGN_CEIL(96 * numLcu64, CODECHAL_CACHELINE_SIZE);
687
688 size = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE) + m_historyOutBufferSize + m_threadTaskBufferSize;
689
690 m_historyOutBufferOffset = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE);
691 m_threadTaskBufferOffset = m_historyOutBufferOffset + m_historyOutBufferSize;
692
693 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
694 &m_encBCombinedBuffer2[i],
695 size,
696 "Enc B combined buffer2"));
697
698 MOS_LOCK_PARAMS lockFlags;
699 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
700 lockFlags.WriteOnly = 1;
701 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
702 m_osInterface,
703 &m_encBCombinedBuffer2[i].sResource,
704 &lockFlags);
705 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
706
707 MOS_ZeroMemory(data, size);
708
709 m_osInterface->pfnUnlockResource(
710 m_osInterface,
711 &m_encBCombinedBuffer2[i].sResource);
712 }
713 }
714
715 return eStatus;
716 }
717
FreeEncResources()718 MOS_STATUS CodechalEncHevcStateG12::FreeEncResources()
719 {
720 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
721
722 CODECHAL_ENCODE_FUNCTION_ENTER;
723
724 MOS_DeleteArray(m_mbEncKernelStates);
725 m_mbEncKernelStates = nullptr;
726 MOS_FreeMemory(m_mbEncKernelBindingTable);
727 m_mbEncKernelBindingTable = nullptr;
728
729 MOS_DeleteArray(m_brcKernelStates);
730 m_brcKernelStates = nullptr;
731 MOS_FreeMemory(m_brcKernelBindingTable);
732 m_brcKernelBindingTable = nullptr;
733
734 HmeParams hmeParams;
735 MOS_ZeroMemory(&hmeParams, sizeof(hmeParams));
736 hmeParams.presMvAndDistortionSumSurface = &m_mvAndDistortionSumSurface.sResource;
737 CODECHAL_ENCODE_CHK_STATUS_RETURN(DestroyMEResources(&hmeParams));
738
739 // Surfaces used by I kernel
740 // Release Intermediate CU Record Surface
741 m_osInterface->pfnFreeResource(
742 m_osInterface,
743 &m_intermediateCuRecordSurfaceLcu32.OsResource);
744
745 // Release Scratch Surface for I-kernel
746 m_osInterface->pfnFreeResource(
747 m_osInterface,
748 &m_scratchSurface.OsResource);
749
750 // Release CU based QP surface
751 m_osInterface->pfnFreeResource(
752 m_osInterface,
753 &m_16x16QpInputData.OsResource);
754
755 // Release LCU Level Input Data
756 for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_lcuLevelInputDataSurface); i++)
757 {
758 m_osInterface->pfnFreeResource(
759 m_osInterface,
760 &m_lcuLevelInputDataSurface[i].OsResource);
761 }
762
763 // Release Current Picture Y with Reconstructed boundary pixels surface
764 m_osInterface->pfnFreeResource(
765 m_osInterface,
766 &m_currPicWithReconBoundaryPix.OsResource);
767
768 // Release Encoder History Input Data
769 m_osInterface->pfnFreeResource(
770 m_osInterface,
771 &m_encoderHistoryInputBuffer.OsResource);
772
773 // Release Encoder History Output Data
774 m_osInterface->pfnFreeResource(
775 m_osInterface,
776 &m_encoderHistoryOutputBuffer.OsResource);
777
778 // Release Debug surface
779 for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_debugSurface); i++)
780 {
781 m_osInterface->pfnFreeResource(
782 m_osInterface,
783 &m_debugSurface[i].sResource);
784 }
785
786 // Surfaces used by B Kernels
787 // Enc constant table for B LCU32
788 m_osInterface->pfnFreeResource(
789 m_osInterface,
790 &m_encConstantTableForB.sResource);
791
792 CODECHAL_ENCODE_CHK_STATUS_RETURN(FreeMeResources());
793
794 for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer1); i++)
795 {
796 m_osInterface->pfnFreeResource(
797 m_osInterface,
798 &m_encBCombinedBuffer1[i].sResource);
799 }
800
801 for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer2); i++)
802 {
803 m_osInterface->pfnFreeResource(
804 m_osInterface,
805 &m_encBCombinedBuffer2[i].sResource);
806 }
807
808 if (m_swScoreboard)
809 {
810 MOS_FreeMemory(m_swScoreboard);
811 m_swScoreboard = nullptr;
812 }
813
814 if (m_numDelay)
815 {
816 m_osInterface->pfnFreeResource(m_osInterface, &m_resDelayMinus);
817 }
818
819 return eStatus;
820 }
821
AllocateMeResources()822 MOS_STATUS CodechalEncHevcStateG12::AllocateMeResources()
823 {
824 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
825
826 CODECHAL_ENCODE_FUNCTION_ENTER;
827
828 // Mv and Distortion Summation Surface
829 if (Mos_ResourceIsNull(&m_mvAndDistortionSumSurface.sResource))
830 {
831 uint32_t size = m_mvdistSummationSurfSize;
832
833 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
834 &m_mvAndDistortionSumSurface,
835 size,
836 "Mv and Distortion Summation surface"));
837
838 // Initialize the surface to zero for now till HME is updated to output the data into this surface
839 MOS_LOCK_PARAMS lockFlags;
840 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
841 lockFlags.WriteOnly = 1;
842 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
843 m_osInterface,
844 &m_mvAndDistortionSumSurface.sResource,
845 &lockFlags);
846 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
847
848 MOS_ZeroMemory(data, size);
849
850 m_osInterface->pfnUnlockResource(
851 m_osInterface,
852 &m_mvAndDistortionSumSurface.sResource);
853 }
854
855 return eStatus;
856 }
857
FreeMeResources()858 MOS_STATUS CodechalEncHevcStateG12::FreeMeResources()
859 {
860 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
861
862 CODECHAL_ENCODE_FUNCTION_ENTER;
863
864 m_osInterface->pfnFreeResource(
865 m_osInterface,
866 &m_brcBuffers.sMeBrcDistortionBuffer.OsResource);
867
868 return eStatus;
869 }
870
AllocatePakResources()871 MOS_STATUS CodechalEncHevcStateG12::AllocatePakResources()
872 {
873 CODECHAL_ENCODE_FUNCTION_ENTER;
874
875 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
876
877 uint32_t mvt_size = MOS_ALIGN_CEIL(((m_frameWidth + 63) >> 6) * ((m_frameHeight + 15) >> 4), 2) * CODECHAL_CACHELINE_SIZE;
878 uint32_t mvtb_size = MOS_ALIGN_CEIL(((m_frameWidth + 31) >> 5) * ((m_frameHeight + 31) >> 5), 2) * CODECHAL_CACHELINE_SIZE;
879 m_sizeOfMvTemporalBuffer = MOS_MAX(mvt_size, mvtb_size);
880
881 const uint32_t minLcuSize = 16;
882 const uint32_t picWidthInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameWidth, minLcuSize); //assume smallest LCU to get max width
883 const uint32_t picHeightInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameHeight, minLcuSize); //assume smallest LCU to get max height
884
885 MHW_VDBOX_HCP_BUFFER_SIZE_PARAMS hcpBufSizeParam;
886 MOS_ZeroMemory(&hcpBufSizeParam, sizeof(hcpBufSizeParam));
887 hcpBufSizeParam.ucMaxBitDepth = m_bitDepth;
888 hcpBufSizeParam.ucChromaFormat = m_chromaFormat;
889 // We should move the buffer allocation to picture level if the size is dependent on LCU size
890 hcpBufSizeParam.dwCtbLog2SizeY = 6; //assume Max LCU size
891 hcpBufSizeParam.dwPicWidth = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE);
892 hcpBufSizeParam.dwPicHeight = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE);
893
894 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
895 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
896 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
897 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
898 allocParamsForBufferLinear.Format = Format_Buffer;
899
900 // Deblocking Filter Row Store Scratch data surface
901 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
902 MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_LINE,
903 &hcpBufSizeParam);
904
905 if (eStatus != MOS_STATUS_SUCCESS)
906 {
907 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Row Store Scratch Buffer.");
908 return eStatus;
909 }
910
911 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
912 allocParamsForBufferLinear.pBufName = "DeblockingScratchBuffer";
913
914 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
915 m_osInterface,
916 &allocParamsForBufferLinear,
917 &m_resDeblockingFilterRowStoreScratchBuffer);
918
919 if (eStatus != MOS_STATUS_SUCCESS)
920 {
921 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Row Store Scratch Buffer.");
922 return eStatus;
923 }
924
925 // Deblocking Filter Tile Row Store Scratch data surface
926 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
927 MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_LINE,
928 &hcpBufSizeParam);
929
930 if (eStatus != MOS_STATUS_SUCCESS)
931 {
932 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Row Store Scratch Buffer.");
933 return eStatus;
934 }
935
936 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
937 allocParamsForBufferLinear.pBufName = "DeblockingTileRowScratchBuffer";
938
939 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
940 m_osInterface,
941 &allocParamsForBufferLinear,
942 &m_resDeblockingFilterTileRowStoreScratchBuffer);
943
944 if (eStatus != MOS_STATUS_SUCCESS)
945 {
946 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Row Store Scratch Buffer.");
947 return eStatus;
948 }
949
950 // Deblocking Filter Column Row Store Scratch data surface
951 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
952 MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_COL,
953 &hcpBufSizeParam);
954
955 if (eStatus != MOS_STATUS_SUCCESS)
956 {
957 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Column Store Scratch Buffer.");
958 return eStatus;
959 }
960
961 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
962 allocParamsForBufferLinear.pBufName = "DeblockingColumnScratchBuffer";
963
964 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
965 m_osInterface,
966 &allocParamsForBufferLinear,
967 &m_resDeblockingFilterColumnRowStoreScratchBuffer);
968
969 if (eStatus != MOS_STATUS_SUCCESS)
970 {
971 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Column Row Store Scratch Buffer.");
972 return eStatus;
973 }
974
975 // Metadata Line buffer
976 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
977 MHW_VDBOX_HCP_INTERNAL_BUFFER_META_LINE,
978 &hcpBufSizeParam);
979
980 if (eStatus != MOS_STATUS_SUCCESS)
981 {
982 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Line Buffer.");
983 return eStatus;
984 }
985
986 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
987 allocParamsForBufferLinear.pBufName = "MetadataLineBuffer";
988
989 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
990 m_osInterface,
991 &allocParamsForBufferLinear,
992 &m_resMetadataLineBuffer);
993
994 if (eStatus != MOS_STATUS_SUCCESS)
995 {
996 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Line Buffer.");
997 return eStatus;
998 }
999
1000 // Metadata Tile Line buffer
1001 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
1002 MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_LINE,
1003 &hcpBufSizeParam);
1004
1005 if (eStatus != MOS_STATUS_SUCCESS)
1006 {
1007 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Line Buffer.");
1008 return eStatus;
1009 }
1010
1011 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
1012 allocParamsForBufferLinear.pBufName = "MetadataTileLineBuffer";
1013
1014 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1015 m_osInterface,
1016 &allocParamsForBufferLinear,
1017 &m_resMetadataTileLineBuffer);
1018
1019 if (eStatus != MOS_STATUS_SUCCESS)
1020 {
1021 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Line Buffer.");
1022 return eStatus;
1023 }
1024
1025 // Metadata Tile Column buffer
1026 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
1027 MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_COL,
1028 &hcpBufSizeParam);
1029
1030 if (eStatus != MOS_STATUS_SUCCESS)
1031 {
1032 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Column Buffer.");
1033 return eStatus;
1034 }
1035
1036 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
1037 allocParamsForBufferLinear.pBufName = "MetadataTileColumnBuffer";
1038
1039 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1040 m_osInterface,
1041 &allocParamsForBufferLinear,
1042 &m_resMetadataTileColumnBuffer);
1043
1044 if (eStatus != MOS_STATUS_SUCCESS)
1045 {
1046 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Column Buffer.");
1047 return eStatus;
1048 }
1049
1050 // SAO Line buffer
1051 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
1052 MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_LINE,
1053 &hcpBufSizeParam);
1054
1055 if (eStatus != MOS_STATUS_SUCCESS)
1056 {
1057 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Line Buffer.");
1058 return eStatus;
1059 }
1060
1061 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
1062 allocParamsForBufferLinear.pBufName = "SaoLineBuffer";
1063
1064 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1065 m_osInterface,
1066 &allocParamsForBufferLinear,
1067 &m_resSaoLineBuffer);
1068
1069 if (eStatus != MOS_STATUS_SUCCESS)
1070 {
1071 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Line Buffer.");
1072 return eStatus;
1073 }
1074
1075 // SAO Tile Line buffer
1076 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
1077 MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_LINE,
1078 &hcpBufSizeParam);
1079
1080 if (eStatus != MOS_STATUS_SUCCESS)
1081 {
1082 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Line Buffer.");
1083 return eStatus;
1084 }
1085
1086 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
1087 allocParamsForBufferLinear.pBufName = "SaoTileLineBuffer";
1088
1089 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1090 m_osInterface,
1091 &allocParamsForBufferLinear,
1092 &m_resSaoTileLineBuffer);
1093
1094 if (eStatus != MOS_STATUS_SUCCESS)
1095 {
1096 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Line Buffer.");
1097 return eStatus;
1098 }
1099
1100 // SAO Tile Column buffer
1101 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
1102 MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_COL,
1103 &hcpBufSizeParam);
1104
1105 if (eStatus != MOS_STATUS_SUCCESS)
1106 {
1107 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Column Buffer.");
1108 return eStatus;
1109 }
1110
1111 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
1112 allocParamsForBufferLinear.pBufName = "SaoTileColumnBuffer";
1113
1114 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1115 m_osInterface,
1116 &allocParamsForBufferLinear,
1117 &m_resSaoTileColumnBuffer);
1118
1119 if (eStatus != MOS_STATUS_SUCCESS)
1120 {
1121 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Column Buffer.");
1122 return eStatus;
1123 }
1124
1125 // Lcu ILDB StreamOut buffer
1126 // Allocate the buffer size
1127 // This is not enabled with HCP_PIPE_MODE_SELECT yet, placeholder here
1128 allocParamsForBufferLinear.dwBytes = CODECHAL_CACHELINE_SIZE;
1129 allocParamsForBufferLinear.pBufName = "LcuILDBStreamOutBuffer";
1130
1131 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1132 m_osInterface,
1133 &allocParamsForBufferLinear,
1134 &m_resLcuIldbStreamOutBuffer);
1135
1136 if (eStatus != MOS_STATUS_SUCCESS)
1137 {
1138 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU ILDB StreamOut Buffer.");
1139 return eStatus;
1140 }
1141
1142 // Lcu Base Address buffer
1143 // HEVC Encoder Mode: Slice size is written to this buffer when slice size conformance is enabled.
1144 // 1 CL (= 16 DWs = 64 bytes) per slice * Maximum number of slices in a frame.
1145 // Align to page for HUC requirement
1146 uint32_t maxLcu = picWidthInMinLCU * picHeightInMinLCU;
1147 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(maxLcu * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
1148 allocParamsForBufferLinear.pBufName = "LcuBaseAddressBuffer";
1149
1150 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1151 m_osInterface,
1152 &allocParamsForBufferLinear,
1153 &m_resLcuBaseAddressBuffer);
1154
1155 if (eStatus != MOS_STATUS_SUCCESS)
1156 {
1157 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU Base Address Buffer.");
1158 return eStatus;
1159 }
1160
1161 // SAO StreamOut buffer
1162 // size = MOS_ALIGN_CEIL(picWidthInMinLCU, 4) * 16
1163 uint32_t size = MOS_ALIGN_CEIL(picWidthInMinLCU, 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU;
1164 //extra added size to cover tile enabled case, per tile width aligned to 4. 20: max tile column No.
1165 size += 3 * 20 * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU;
1166 allocParamsForBufferLinear.dwBytes = size;
1167 allocParamsForBufferLinear.pBufName = "SaoStreamOutBuffer";
1168
1169 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1170 m_osInterface,
1171 &allocParamsForBufferLinear,
1172 &m_resSaoStreamOutBuffer);
1173
1174 if (eStatus != MOS_STATUS_SUCCESS)
1175 {
1176 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO StreamOut Buffer.");
1177 return eStatus;
1178 }
1179
1180 uint32_t maxTileNumber = (MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE) / CODECHAL_HEVC_MIN_TILE_SIZE) *
1181 (MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_HEVC_MIN_TILE_SIZE) / CODECHAL_HEVC_MIN_TILE_SIZE);
1182
1183 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1184 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
1185 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1186 allocParamsForBufferLinear.Format = Format_Buffer;
1187
1188 // Allocate Frame Statistics Streamout Data Destination Buffer. DW98-100 in HCP pipe buffer address command
1189 allocParamsForBufferLinear.dwBytes = m_sizeOfHcpPakFrameStats * maxTileNumber; //Each tile has 8 cache size bytes of data
1190 allocParamsForBufferLinear.pBufName = "FrameStatStreamOutBuffer";
1191
1192 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
1193 m_osInterface,
1194 &allocParamsForBufferLinear,
1195 &m_resFrameStatStreamOutBuffer));
1196
1197 // PAK CU Level Streamout Data: DW57-59 in HCP pipe buffer address command
1198 // One CU has 16-byte. But, each tile needs to be aliged to the cache line
1199 uint32_t frameWidthInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MIN_CU_SIZE);
1200 uint32_t frameHeightInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MIN_CU_SIZE);
1201 size = MOS_ALIGN_CEIL(frameWidthInCus * frameHeightInCus * 16, CODECHAL_CACHELINE_SIZE);
1202 allocParamsForBufferLinear.dwBytes = size;
1203 allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
1204
1205 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
1206 m_osInterface,
1207 &allocParamsForBufferLinear,
1208 &m_resPakcuLevelStreamoutData.sResource));
1209 m_resPakcuLevelStreamoutData.dwSize = size;
1210 CODECHAL_ENCODE_VERBOSEMESSAGE("first allocate cu steam out buffer, size=0x%x.\n", size);
1211
1212 // Allocate SSE Source Pixel Row Store Buffer. Implementation for each tile column is shown as below:
1213 // tileWidthInLCU = ((tileWidthInLCU+3) * BYTES_PER_CACHE_LINE)*(4+4) ; tileWidthInLCU <<= 1; // double the size as RTL treats it as 10 bit data
1214 // Here, we consider each LCU column is one tile column.
1215
1216 m_sizeOfSseSrcPixelRowStoreBufferPerLcu = (CODECHAL_CACHELINE_SIZE * (4 + 4)) << 1; //size per LCU plus 10-bit
1217 size = m_sizeOfSseSrcPixelRowStoreBufferPerLcu * (picWidthInMinLCU + 3); // already aligned to cacheline size
1218 allocParamsForBufferLinear.dwBytes = size;
1219 allocParamsForBufferLinear.pBufName = "SseSrcPixelRowStoreBuffer";
1220
1221 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
1222 m_osInterface,
1223 &allocParamsForBufferLinear,
1224 &m_resSseSrcPixelRowStoreBuffer));
1225
1226 // SAO Row Store buffer, HSAO
1227 // Aligned to 4 for each tile column
1228 uint32_t maxTileColumn = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE);
1229 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(picWidthInMinLCU + 3 * maxTileColumn, 4) * 16;
1230 allocParamsForBufferLinear.pBufName = "SaoRowStoreBuffer";
1231
1232 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1233 m_osInterface,
1234 &allocParamsForBufferLinear,
1235 &m_SAORowStoreBuffer);
1236
1237 if (eStatus != MOS_STATUS_SUCCESS)
1238 {
1239 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO row store Buffer.");
1240 return eStatus;
1241 }
1242
1243 //HCP scalability Sync buffer
1244 size = CODECHAL_HEVC_MAX_NUM_HCP_PIPE * CODECHAL_CACHELINE_SIZE;
1245 allocParamsForBufferLinear.dwBytes = size;
1246 allocParamsForBufferLinear.pBufName = "GEN12 Hcp scalability Sync buffer ";
1247
1248 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
1249 m_osInterface,
1250 &allocParamsForBufferLinear,
1251 &m_resHcpScalabilitySyncBuffer.sResource));
1252 m_resHcpScalabilitySyncBuffer.dwSize = size;
1253
1254 // create the tile coding state parameters
1255 m_tileParams = (PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12)MOS_AllocAndZeroMemory(sizeof(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12) * maxTileNumber);
1256
1257 if (m_enableHWSemaphore)
1258 {
1259 // Create the HW sync objects which will be used by each reference frame and BRC in GEN12
1260 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1261 allocParamsForBufferLinear.pBufName = "SemaphoreMemory";
1262
1263 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1264 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1265 lockFlagsWriteOnly.WriteOnly = 1;
1266
1267 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resBrcSemaphoreMem); i++)
1268 {
1269 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1270 m_osInterface,
1271 &allocParamsForBufferLinear,
1272 &m_resBrcSemaphoreMem[i].sResource);
1273 m_resBrcSemaphoreMem[i].dwSize = allocParamsForBufferLinear.dwBytes;
1274 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Cannot create BRC HW Semaphore Memory.");
1275
1276 uint32_t *data = (uint32_t *)m_osInterface->pfnLockResource(
1277 m_osInterface,
1278 &m_resBrcSemaphoreMem[i].sResource,
1279 &lockFlagsWriteOnly);
1280
1281 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1282
1283 *data = 1;
1284
1285 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1286 m_osInterface,
1287 &m_resBrcSemaphoreMem[i].sResource));
1288 }
1289
1290 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1291 m_osInterface,
1292 &allocParamsForBufferLinear,
1293 &m_resPipeStartSemaMem);
1294 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Cannot create Scalability pipe start sync HW semaphore.");
1295
1296 uint32_t *data = (uint32_t *)m_osInterface->pfnLockResource(
1297 m_osInterface,
1298 &m_resPipeStartSemaMem,
1299 &lockFlagsWriteOnly);
1300
1301 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1302 *data = 0;
1303 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1304 m_osInterface,
1305 &m_resPipeStartSemaMem));
1306
1307 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1308 m_osInterface,
1309 &allocParamsForBufferLinear,
1310 &m_resPipeCompleteSemaMem);
1311 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Cannot create Scalability pipe completion sync HW semaphore.");
1312
1313 data = (uint32_t *)m_osInterface->pfnLockResource(
1314 m_osInterface,
1315 &m_resPipeCompleteSemaMem,
1316 &lockFlagsWriteOnly);
1317
1318 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1319 *data = 0;
1320 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1321 m_osInterface,
1322 &m_resPipeCompleteSemaMem));
1323 }
1324
1325 if (m_hucPakStitchEnabled)
1326 {
1327 if (Mos_ResourceIsNull(&m_resHucStatus2Buffer))
1328 {
1329 // HUC STATUS 2 Buffer for HuC status check in COND_BB_END
1330 allocParamsForBufferLinear.dwBytes = sizeof(uint64_t);
1331 allocParamsForBufferLinear.pBufName = "HUC STATUS 2 Buffer";
1332
1333 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1334 m_osInterface->pfnAllocateResource(
1335 m_osInterface,
1336 &allocParamsForBufferLinear,
1337 &m_resHucStatus2Buffer),
1338 "%s: Failed to allocate HUC STATUS 2 Buffer\n",
1339 __FUNCTION__);
1340 }
1341
1342 uint8_t *data;
1343
1344 // Pak stitch DMEM
1345 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemEncG12), CODECHAL_CACHELINE_SIZE);
1346 allocParamsForBufferLinear.pBufName = "PAK Stitch Dmem Buffer";
1347 auto numOfPasses = CODECHAL_DP_MAX_NUM_BRC_PASSES;
1348 for (auto j = 0; j < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; j++)
1349 {
1350 for (auto i = 0; i < numOfPasses; i++)
1351 {
1352 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1353 m_osInterface,
1354 &allocParamsForBufferLinear,
1355 &m_resHucPakStitchDmemBuffer[j][i]);
1356
1357 if (eStatus != MOS_STATUS_SUCCESS)
1358 {
1359 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate PAK Stitch Dmem Buffer.");
1360 return eStatus;
1361 }
1362 }
1363 }
1364 // BRC Data Buffer
1365 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
1366 allocParamsForBufferLinear.pBufName = "BRC Data Buffer";
1367
1368 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1369 m_osInterface,
1370 &allocParamsForBufferLinear,
1371 &m_resBrcDataBuffer);
1372
1373 if (eStatus != MOS_STATUS_SUCCESS)
1374 {
1375 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate BRC Data Buffer Buffer.");
1376 return eStatus;
1377 }
1378
1379 MOS_LOCK_PARAMS lockFlags;
1380 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1381 lockFlags.WriteOnly = 1;
1382
1383 data = (uint8_t *)m_osInterface->pfnLockResource(
1384 m_osInterface,
1385 &m_resBrcDataBuffer,
1386 &lockFlags);
1387
1388 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1389
1390 MOS_ZeroMemory(
1391 data,
1392 allocParamsForBufferLinear.dwBytes);
1393
1394 m_osInterface->pfnUnlockResource(m_osInterface, &m_resBrcDataBuffer);
1395 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
1396 {
1397 for (auto j = 0; j < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; j++)
1398 {
1399 // HuC stitching Data buffer
1400 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE);
1401 allocParamsForBufferLinear.pBufName = "HEVC HuC Stitch Data Buffer";
1402 CODECHAL_ENCODE_CHK_STATUS_RETURN(
1403 m_osInterface->pfnAllocateResource(
1404 m_osInterface,
1405 &allocParamsForBufferLinear,
1406 &m_resHucStitchDataBuffer[i][j]));
1407
1408 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1409 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1410 lockFlagsWriteOnly.WriteOnly = 1;
1411
1412 uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource(
1413 m_osInterface,
1414 &m_resHucStitchDataBuffer[i][j],
1415 &lockFlagsWriteOnly);
1416 CODECHAL_ENCODE_CHK_NULL_RETURN(pData);
1417 MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes);
1418 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[i][j]);
1419 }
1420 }
1421
1422 //Second level BB for huc stitching cmd
1423 MOS_ZeroMemory(&m_HucStitchCmdBatchBuffer, sizeof(m_HucStitchCmdBatchBuffer));
1424 m_HucStitchCmdBatchBuffer.bSecondLevel = true;
1425 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
1426 m_osInterface,
1427 &m_HucStitchCmdBatchBuffer,
1428 nullptr,
1429 m_hwInterface->m_HucStitchCmdBatchBufferSize));
1430 }
1431
1432 // Pak obj and CU records for skip frame
1433 uint32_t mbCodeSize = m_mbCodeSize + 8 * CODECHAL_CACHELINE_SIZE; // Must reserve at least 8 cachelines after MI_BATCH_BUFFER_END_CMD since HW prefetch max 8 cachelines from BB everytime
1434
1435 MOS_ALLOC_GFXRES_PARAMS allocParams;
1436 MOS_ZeroMemory(&allocParams, sizeof(allocParams));
1437 allocParams.Type = MOS_GFXRES_BUFFER;
1438 allocParams.Format = Format_Buffer;
1439 allocParams.TileType = MOS_TILE_LINEAR;
1440 allocParams.dwBytes = mbCodeSize;
1441 allocParams.pBufName = "skipFrameMbCodeSurface";
1442
1443 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1444 m_osInterface,
1445 &allocParams,
1446 &m_skipFrameInfo.m_resMbCodeSkipFrameSurface);
1447 if (eStatus != MOS_STATUS_SUCCESS)
1448 {
1449 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate PAK object buffer for skip frame");
1450 return eStatus;
1451 }
1452
1453 if (m_numDelay)
1454 {
1455 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1456 allocParamsForBufferLinear.pBufName = "DelayMinusMemory";
1457
1458 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1459 m_osInterface,
1460 &allocParamsForBufferLinear,
1461 &m_resDelayMinus),
1462 "Failed to allocate delay minus memory.");
1463
1464 uint8_t * data;
1465 MOS_LOCK_PARAMS lockFlags;
1466 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1467 lockFlags.WriteOnly = 1;
1468 data = (uint8_t *)m_osInterface->pfnLockResource(
1469 m_osInterface,
1470 &m_resDelayMinus,
1471 &lockFlags);
1472
1473 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1474
1475 MOS_ZeroMemory(data, sizeof(uint32_t));
1476
1477 m_osInterface->pfnUnlockResource(m_osInterface, &m_resDelayMinus);
1478 }
1479
1480 return eStatus;
1481 }
1482
FreePakResources()1483 MOS_STATUS CodechalEncHevcStateG12::FreePakResources()
1484 {
1485 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1486
1487 CODECHAL_ENCODE_FUNCTION_ENTER;
1488
1489 // Release Frame Statistics Streamout Data Destination Buffer
1490 m_osInterface->pfnFreeResource(
1491 m_osInterface,
1492 &m_resFrameStatStreamOutBuffer);
1493
1494 // PAK CU Level Stream out buffer
1495 m_osInterface->pfnFreeResource(
1496 m_osInterface,
1497 &m_resPakcuLevelStreamoutData.sResource);
1498
1499 // Release SSE Source Pixel Row Store Buffer
1500 m_osInterface->pfnFreeResource(
1501 m_osInterface,
1502 &m_resSseSrcPixelRowStoreBuffer);
1503
1504 // Release Hcp scalability Sync buffer
1505 m_osInterface->pfnFreeResource(
1506 m_osInterface,
1507 &m_resHcpScalabilitySyncBuffer.sResource);
1508
1509 m_osInterface->pfnFreeResource(
1510 m_osInterface,
1511 &m_resPakcuLevelStreamoutData.sResource);
1512
1513 m_osInterface->pfnFreeResource(
1514 m_osInterface,
1515 &m_resPakSliceLevelStreamoutData.sResource);
1516
1517 m_osInterface->pfnFreeResource(
1518 m_osInterface,
1519 &m_SAORowStoreBuffer);
1520
1521 m_osInterface->pfnFreeResource(
1522 m_osInterface,
1523 &m_skipFrameInfo.m_resMbCodeSkipFrameSurface);
1524
1525 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resTileBasedStatisticsBuffer); i++)
1526 {
1527 m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[i].sResource);
1528 }
1529 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileRecordBuffer); i++)
1530 {
1531 m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[i].sResource);
1532 }
1533 m_osInterface->pfnFreeResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource);
1534
1535 MOS_FreeMemory(m_tileParams);
1536
1537 if (m_useVirtualEngine)
1538 {
1539 for (uint32_t i = 0; i < CODECHAL_NUM_UNCOMPRESSED_SURFACE_HEVC; i++)
1540 {
1541 for (uint32_t j = 0; j < CODECHAL_HEVC_MAX_NUM_HCP_PIPE; j++)
1542 {
1543 for (auto k = 0; k < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; k++)
1544 {
1545 PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[i][j][k];
1546 if (cmdBuffer->pCmdBase)
1547 {
1548 m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
1549 }
1550 m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
1551 }
1552 }
1553 }
1554 }
1555
1556 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_refSync); i++)
1557 {
1558 auto sync = &m_refSync[i];
1559
1560 if (!Mos_ResourceIsNull(&sync->resSyncObject))
1561 {
1562 // if this object has been signaled before, we need to wait to ensure singal-wait is in pair.
1563 if (sync->uiSemaphoreObjCount || sync->bInUsed)
1564 {
1565 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
1566 syncParams.GpuContext = m_renderContext;
1567 syncParams.presSyncResource = &sync->resSyncObject;
1568 syncParams.uiSemaphoreCount = sync->uiSemaphoreObjCount;
1569 m_osInterface->pfnEngineWait(m_osInterface, &syncParams);
1570 }
1571 }
1572 m_osInterface->pfnFreeResource(m_osInterface, &sync->resSemaphoreMem.sResource);
1573 }
1574
1575 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resBrcSemaphoreMem); i++)
1576 {
1577 m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcSemaphoreMem[i].sResource);
1578 }
1579 m_osInterface->pfnFreeResource(m_osInterface, &m_resPipeStartSemaMem);
1580 m_osInterface->pfnFreeResource(m_osInterface, &m_resPipeCompleteSemaMem);
1581
1582 if (m_hucPakStitchEnabled)
1583 {
1584 m_osInterface->pfnFreeResource(m_osInterface, &m_resHucStatus2Buffer);
1585 m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcDataBuffer);
1586
1587 for (int i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
1588 {
1589 for (int j = 0; j < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; j++)
1590 {
1591 m_osInterface->pfnFreeResource(m_osInterface, &m_resHucPakStitchDmemBuffer[i][j]);
1592 m_osInterface->pfnFreeResource(m_osInterface, &m_resHucStitchDataBuffer[i][j]);
1593 }
1594 }
1595 Mhw_FreeBb(m_osInterface, &m_HucStitchCmdBatchBuffer, nullptr);
1596 }
1597 return CodechalEncHevcState::FreePakResources();
1598 }
1599
GetKernelHeaderAndSize(void * binary,EncOperation operation,uint32_t krnStateIdx,void * krnHeader,uint32_t * krnSize)1600 MOS_STATUS CodechalEncHevcStateG12::GetKernelHeaderAndSize(
1601 void * binary,
1602 EncOperation operation,
1603 uint32_t krnStateIdx,
1604 void * krnHeader,
1605 uint32_t * krnSize)
1606 {
1607 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1608
1609 CODECHAL_ENCODE_FUNCTION_ENTER;
1610
1611 CODECHAL_ENCODE_CHK_NULL_RETURN(binary);
1612 CODECHAL_ENCODE_CHK_NULL_RETURN(krnHeader);
1613 CODECHAL_ENCODE_CHK_NULL_RETURN(krnSize);
1614
1615 PCODECHAL_HEVC_KERNEL_HEADER kernelHeaderTable = (PCODECHAL_HEVC_KERNEL_HEADER)binary;
1616
1617 PCODECHAL_KERNEL_HEADER currKrnHeader = nullptr;
1618 switch (operation)
1619 {
1620 case ENC_MBENC:
1621 {
1622 switch (krnStateIdx)
1623 {
1624 case MBENC_LCU32_KRNIDX:
1625 currKrnHeader = &kernelHeaderTable->HEVC_Enc_LCU32;
1626 break;
1627
1628 case MBENC_LCU64_KRNIDX:
1629 currKrnHeader = &kernelHeaderTable->HEVC_Enc_LCU64;
1630 break;
1631
1632 default:
1633 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported MBENC mode requested");
1634 return MOS_STATUS_INVALID_PARAMETER;
1635 }
1636 }
1637 break;
1638
1639 case ENC_BRC:
1640 {
1641 switch (krnStateIdx)
1642 {
1643 case CODECHAL_HEVC_BRC_INIT:
1644 currKrnHeader = &kernelHeaderTable->HEVC_brc_init;
1645 break;
1646
1647 case CODECHAL_HEVC_BRC_RESET:
1648 currKrnHeader = &kernelHeaderTable->HEVC_brc_reset;
1649 break;
1650
1651 case CODECHAL_HEVC_BRC_FRAME_UPDATE:
1652 currKrnHeader = &kernelHeaderTable->HEVC_brc_update;
1653 break;
1654
1655 case CODECHAL_HEVC_BRC_LCU_UPDATE:
1656 currKrnHeader = &kernelHeaderTable->HEVC_brc_lcuqp;
1657 break;
1658
1659 default:
1660 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested, krnStateIdx=%d", krnStateIdx);
1661 return MOS_STATUS_INVALID_PARAMETER;
1662 }
1663 break;
1664 }
1665
1666 default:
1667 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
1668 return MOS_STATUS_INVALID_PARAMETER;
1669 }
1670
1671 *((PCODECHAL_KERNEL_HEADER)krnHeader) = *currKrnHeader;
1672
1673 PCODECHAL_KERNEL_HEADER nextKrnHeader = (currKrnHeader + 1);
1674 PCODECHAL_KERNEL_HEADER invalidEntry = &(kernelHeaderTable->HEVC_brc_lcuqp) + 1;
1675 uint32_t nextKrnOffset = *krnSize;
1676 if (nextKrnHeader < invalidEntry)
1677 {
1678 nextKrnOffset = nextKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT;
1679 }
1680 *krnSize = nextKrnOffset - (currKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1681
1682 return eStatus;
1683 }
1684
InitKernelStateMbEnc()1685 MOS_STATUS CodechalEncHevcStateG12::InitKernelStateMbEnc()
1686 {
1687 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1688
1689 CODECHAL_ENCODE_FUNCTION_ENTER;
1690
1691 PMHW_STATE_HEAP_INTERFACE stateHeapInterface = m_hwInterface->GetRenderInterface()->m_stateHeapInterface;
1692 m_numMbEncEncKrnStates = MBENC_NUM_KRN;
1693
1694 m_mbEncKernelStates =
1695 MOS_NewArray(MHW_KERNEL_STATE, m_numMbEncEncKrnStates);
1696 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
1697
1698 m_mbEncKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory(
1699 sizeof(GenericBindingTable) * m_numMbEncEncKrnStates);
1700 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelBindingTable);
1701
1702 PMHW_KERNEL_STATE kernelStatePtr = m_mbEncKernelStates;
1703
1704 for (uint32_t krnStateIdx = 0; krnStateIdx < m_numMbEncEncKrnStates; krnStateIdx++)
1705 {
1706 auto kernelSize = m_combinedKernelSize;
1707 CODECHAL_KERNEL_HEADER currKrnHeader;
1708 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetKernelHeaderAndSize(
1709 m_kernelBinary,
1710 ENC_MBENC,
1711 krnStateIdx,
1712 &currKrnHeader,
1713 &kernelSize));
1714
1715 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
1716 ENC_MBENC,
1717 &kernelStatePtr->KernelParams,
1718 krnStateIdx));
1719
1720 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
1721 ENC_MBENC,
1722 &m_mbEncKernelBindingTable[krnStateIdx],
1723 krnStateIdx));
1724
1725 kernelStatePtr->dwCurbeOffset = stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
1726 kernelStatePtr->KernelParams.pBinary =
1727 m_kernelBinary +
1728 (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1729 kernelStatePtr->KernelParams.iSize = kernelSize;
1730 kernelStatePtr->dwCurbeOffset = stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
1731 kernelStatePtr->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1732 kernelStatePtr->KernelParams.iSize = kernelSize;
1733
1734 CODECHAL_ENCODE_CHK_STATUS_RETURN(stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
1735 stateHeapInterface,
1736 kernelStatePtr->KernelParams.iBTCount,
1737 &kernelStatePtr->dwSshSize,
1738 &kernelStatePtr->dwBindingTableSize));
1739
1740 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(stateHeapInterface, kernelStatePtr));
1741
1742 kernelStatePtr++;
1743 }
1744
1745 return eStatus;
1746 }
1747
InitKernelStateBrc()1748 MOS_STATUS CodechalEncHevcStateG12::InitKernelStateBrc()
1749 {
1750 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1751
1752 CODECHAL_ENCODE_FUNCTION_ENTER;
1753
1754 PMHW_STATE_HEAP_INTERFACE stateHeapInterface = m_hwInterface->GetRenderInterface()->m_stateHeapInterface;
1755 m_numBrcKrnStates = CODECHAL_HEVC_BRC_NUM;
1756
1757 m_brcKernelStates = MOS_NewArray(MHW_KERNEL_STATE, m_numBrcKrnStates);
1758 CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
1759
1760 m_brcKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory(
1761 sizeof(GenericBindingTable) * m_numBrcKrnStates);
1762
1763 PMHW_KERNEL_STATE kernelStatePtr = m_brcKernelStates;
1764
1765 kernelStatePtr++; // Skipping BRC_COARSE_INTRA as it not in Gen11
1766
1767 // KrnStateIdx initialization starts at 1 as Gen11 does not support BRC_COARSE_INTRA kernel in BRC. It is part of the Combined Common Kernel
1768 for (uint32_t krnStateIdx = 1; krnStateIdx < m_numBrcKrnStates; krnStateIdx++)
1769 {
1770 auto kernelSize = m_combinedKernelSize;
1771 CODECHAL_KERNEL_HEADER currKrnHeader;
1772
1773 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetKernelHeaderAndSize(
1774 m_kernelBinary,
1775 ENC_BRC,
1776 krnStateIdx,
1777 &currKrnHeader,
1778 (uint32_t *)&kernelSize));
1779
1780 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
1781 ENC_BRC,
1782 &kernelStatePtr->KernelParams,
1783 krnStateIdx));
1784
1785 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
1786 ENC_BRC,
1787 &m_brcKernelBindingTable[krnStateIdx],
1788 krnStateIdx));
1789
1790 kernelStatePtr->dwCurbeOffset = stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
1791 kernelStatePtr->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1792 kernelStatePtr->KernelParams.iSize = kernelSize;
1793
1794 CODECHAL_ENCODE_CHK_STATUS_RETURN(stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
1795 stateHeapInterface,
1796 kernelStatePtr->KernelParams.iBTCount,
1797 &kernelStatePtr->dwSshSize,
1798 &kernelStatePtr->dwBindingTableSize));
1799
1800 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(stateHeapInterface, kernelStatePtr));
1801
1802 kernelStatePtr++;
1803 }
1804
1805 return eStatus;
1806 }
1807
GetFrameBrcLevel()1808 MOS_STATUS CodechalEncHevcStateG12::GetFrameBrcLevel()
1809 {
1810 CODECHAL_ENCODE_FUNCTION_ENTER;
1811
1812 //if L0/L1 both points to previous frame, then its LBD otherwise its is level 1 RA B.
1813 auto B_or_LDB_brclevel = m_lowDelay ? HEVC_BRC_FRAME_TYPE_P_OR_LB : HEVC_BRC_FRAME_TYPE_B;
1814 std::map<int, HEVC_BRC_FRAME_TYPE> codingtype_to_brclevel{
1815 {I_TYPE, HEVC_BRC_FRAME_TYPE_I},
1816 {P_TYPE, HEVC_BRC_FRAME_TYPE_P_OR_LB},
1817 {B_TYPE, B_or_LDB_brclevel},
1818 {B1_TYPE, HEVC_BRC_FRAME_TYPE_B1},
1819 {B2_TYPE, HEVC_BRC_FRAME_TYPE_B2}};
1820
1821 //Both I or P/LDB type at same HierarchLevelPlus1
1822 auto intra_LDBFrame_to_Brclevel = (m_pictureCodingType == I_TYPE) ? HEVC_BRC_FRAME_TYPE_I : HEVC_BRC_FRAME_TYPE_P_OR_LB;
1823 std::map<int, HEVC_BRC_FRAME_TYPE> hierchLevelPlus1_to_brclevel{
1824 {1, intra_LDBFrame_to_Brclevel},
1825 {2, HEVC_BRC_FRAME_TYPE_B},
1826 {3, HEVC_BRC_FRAME_TYPE_B1},
1827 {4, HEVC_BRC_FRAME_TYPE_B2}};
1828
1829 if (m_hevcSeqParams->HierarchicalFlag && m_hevcSeqParams->GopRefDist > 1 && m_hevcSeqParams->GopRefDist <= 8)
1830 {
1831 if (m_hevcPicParams->HierarchLevelPlus1 > 0) // LDB or RAB
1832 {
1833 m_currFrameBrcLevel = hierchLevelPlus1_to_brclevel.count(m_hevcPicParams->HierarchLevelPlus1) ? hierchLevelPlus1_to_brclevel[m_hevcPicParams->HierarchLevelPlus1] : HEVC_BRC_FRAME_TYPE_INVALID;
1834 //Invalid HierarchLevelPlus1 or LBD frames at level 3 eror check.
1835 if ((m_currFrameBrcLevel == HEVC_BRC_FRAME_TYPE_INVALID) ||
1836 (m_hevcSeqParams->LowDelayMode && m_currFrameBrcLevel == HEVC_BRC_FRAME_TYPE_B2))
1837 {
1838 CODECHAL_ENCODE_ASSERTMESSAGE("HEVC_BRC_FRAME_TYPE_INVALID or LBD picture doesn't support Level 4\n");
1839 return MOS_STATUS_INVALID_PARAMETER;
1840 }
1841 }
1842 else
1843 {
1844 if (!m_hevcSeqParams->LowDelayMode) // RA B
1845 {
1846 m_currFrameBrcLevel = codingtype_to_brclevel.count(m_pictureCodingType) ? codingtype_to_brclevel[m_pictureCodingType] : HEVC_BRC_FRAME_TYPE_INVALID;
1847 //Invalid CodingType.
1848 if (m_currFrameBrcLevel == HEVC_BRC_FRAME_TYPE_INVALID)
1849 {
1850 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid CodingType\n");
1851 return MOS_STATUS_INVALID_PARAMETER;
1852 }
1853 }
1854 else // Low Delay mode: Flat case
1855 {
1856 m_currFrameBrcLevel = (m_pictureCodingType == I_TYPE) ? HEVC_BRC_FRAME_TYPE_I : HEVC_BRC_FRAME_TYPE_P_OR_LB;
1857 }
1858 }
1859 }
1860 else // Flat B
1861 {
1862 m_currFrameBrcLevel = (m_pictureCodingType == I_TYPE) ? HEVC_BRC_FRAME_TYPE_I : B_or_LDB_brclevel;
1863 }
1864
1865 return MOS_STATUS_SUCCESS;
1866 }
1867
GetMaxBtCount()1868 uint32_t CodechalEncHevcStateG12::GetMaxBtCount()
1869 {
1870 uint16_t btIdxAlignment = m_hwInterface->GetRenderInterface()->m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
1871
1872 // BRC Init kernel
1873 uint32_t btCountPhase1 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_INIT].KernelParams.iBTCount, btIdxAlignment);
1874
1875 // SwScoreboard kernel
1876 uint32_t btCountPhase2 = MOS_ALIGN_CEIL(m_swScoreboardState->GetBTCount(), btIdxAlignment);
1877
1878 // Csc+Ds+Conversion kernel
1879 btCountPhase2 += MOS_ALIGN_CEIL(m_cscDsState->GetBTCount(), btIdxAlignment);
1880
1881 // Intra Distortion kernel
1882 if (m_intraDistKernel)
1883 {
1884 btCountPhase2 += MOS_ALIGN_CEIL(m_intraDistKernel->GetBTCount(), btIdxAlignment);
1885 }
1886
1887 // HME 4x, 16x, 32x kernel
1888 if (m_hmeKernel)
1889 {
1890 btCountPhase2 += (MOS_ALIGN_CEIL(m_hmeKernel->GetBTCount(), btIdxAlignment) * 3);
1891 }
1892
1893 // Weighted prediction kernel
1894 btCountPhase2 += MOS_ALIGN_CEIL(m_wpState->GetBTCount(), btIdxAlignment);
1895 uint32_t btCountPhase3 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
1896 MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
1897 MOS_ALIGN_CEIL(m_mbEncKernelStates[MBENC_LCU32_KRNIDX].KernelParams.iBTCount, btIdxAlignment);
1898
1899 uint32_t btCountPhase4 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
1900 MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
1901 MOS_ALIGN_CEIL(m_mbEncKernelStates[MBENC_LCU64_KRNIDX].KernelParams.iBTCount, btIdxAlignment);
1902
1903 uint32_t maxBtCount = MOS_MAX(btCountPhase1, btCountPhase2);
1904 maxBtCount = MOS_MAX(maxBtCount, btCountPhase3);
1905 maxBtCount = MOS_MAX(maxBtCount, btCountPhase4);
1906
1907 return maxBtCount;
1908 }
1909
CalcScaledDimensions()1910 MOS_STATUS CodechalEncHevcStateG12::CalcScaledDimensions()
1911 {
1912 return MOS_STATUS_SUCCESS;
1913 }
1914
GetMaxRefFrames(uint8_t & maxNumRef0,uint8_t & maxNumRef1)1915 void CodechalEncHevcStateG12::GetMaxRefFrames(uint8_t &maxNumRef0, uint8_t &maxNumRef1)
1916 {
1917 maxNumRef0 = m_maxNumVmeL0Ref;
1918 maxNumRef1 = m_maxNumVmeL1Ref;
1919
1920 return;
1921 }
1922
GetStatusReport(EncodeStatus * encodeStatus,EncodeStatusReport * encodeStatusReport)1923 MOS_STATUS CodechalEncHevcStateG12::GetStatusReport(
1924 EncodeStatus * encodeStatus,
1925 EncodeStatusReport *encodeStatusReport)
1926 {
1927 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1928
1929 CODECHAL_ENCODE_FUNCTION_ENTER;
1930
1931 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus);
1932 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport);
1933
1934 if (encodeStatusReport->UsedVdBoxNumber <= 1)
1935 {
1936 return CodechalEncodeHevcBase::GetStatusReport(encodeStatus, encodeStatusReport);
1937 }
1938
1939 PCODECHAL_ENCODE_BUFFER tileSizeStatusReport = &m_tileRecordBuffer[encodeStatusReport->CurrOriginalPic.FrameIdx];
1940
1941 MOS_LOCK_PARAMS lockFlags;
1942 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1943 CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface);
1944 HCPPakHWTileSizeRecord_G12 *tileStatusReport = (HCPPakHWTileSizeRecord_G12 *)m_osInterface->pfnLockResource(
1945 m_osInterface,
1946 &tileSizeStatusReport->sResource,
1947 &lockFlags);
1948 CODECHAL_ENCODE_CHK_NULL_RETURN(tileStatusReport);
1949
1950 encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
1951 encodeStatusReport->PanicMode = false;
1952 encodeStatusReport->AverageQp = 0;
1953 encodeStatusReport->QpY = 0;
1954 encodeStatusReport->SuggestedQpYDelta = 0;
1955 encodeStatusReport->NumberPasses = 1;
1956 encodeStatusReport->bitstreamSize = 0;
1957 encodeStatus->ImageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQp = 0;
1958
1959 uint32_t totalCU = 0;
1960 double sumQp = 0.0;
1961 for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
1962 {
1963 if (tileStatusReport[i].Length == 0)
1964 {
1965 encodeStatusReport->CodecStatus = CODECHAL_STATUS_INCOMPLETE;
1966 return eStatus;
1967 }
1968
1969 encodeStatusReport->bitstreamSize += tileStatusReport[i].Length;
1970 totalCU += (m_tileParams[i].TileHeightInMinCbMinus1 + 1) * (m_tileParams[i].TileWidthInMinCbMinus1 + 1);
1971 sumQp += tileStatusReport[i].Hcp_Qp_Status_Count;
1972 }
1973
1974 encodeStatusReport->NumberPasses = (uint8_t)encodeStatus->dwNumberPasses + 1;
1975 CODECHAL_ENCODE_VERBOSEMESSAGE("BRC Scalability Mode Exectued PAK Pass number: %d.\n", encodeStatusReport->NumberPasses);
1976
1977 if (encodeStatusReport->bitstreamSize == 0 ||
1978 encodeStatusReport->bitstreamSize > m_bitstreamUpperBound)
1979 {
1980 encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR;
1981 encodeStatusReport->bitstreamSize = 0;
1982 CODECHAL_ENCODE_ASSERTMESSAGE("Bit-stream size exceeds upper bound!");
1983 return MOS_STATUS_INVALID_FILE_SIZE;
1984 }
1985
1986 if (m_sseEnabled)
1987 {
1988 CODECHAL_ENCODE_CHK_STATUS_RETURN(CalculatePSNR(encodeStatus, encodeStatusReport));
1989 }
1990
1991 CODECHAL_ENCODE_CHK_COND_RETURN(totalCU == 0, "Invalid totalCU count");
1992 encodeStatusReport->QpY = encodeStatusReport->AverageQp =
1993 (uint8_t)((sumQp / (double)totalCU) / 4.0); // due to TU is 4x4 and there are 4 TUs in one CU
1994
1995 if (m_enableTileStitchByHW)
1996 {
1997 return eStatus;
1998 }
1999
2000 uint8_t *tempBsBuffer = nullptr, *bufPtr = nullptr;
2001 tempBsBuffer = bufPtr = (uint8_t *)MOS_AllocAndZeroMemory(encodeStatusReport->bitstreamSize);
2002 CODECHAL_ENCODE_CHK_NULL_RETURN(tempBsBuffer);
2003
2004 CODEC_REF_LIST currRefList = *(encodeStatus->encodeStatusReport.pCurrRefList);
2005 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2006 lockFlags.ReadOnly = 1;
2007 uint8_t *bitstream = (uint8_t *)m_osInterface->pfnLockResource(
2008 m_osInterface,
2009 &currRefList.resBitstreamBuffer,
2010 &lockFlags);
2011 if (bitstream == nullptr)
2012 {
2013 MOS_SafeFreeMemory(tempBsBuffer);
2014 CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr);
2015 }
2016
2017 for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
2018 {
2019 uint32_t offset = m_tileParams[i].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
2020 uint32_t len = tileStatusReport[i].Length;
2021
2022 MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len);
2023 bufPtr += len;
2024 }
2025
2026 MOS_SecureMemcpy(bitstream, encodeStatusReport->bitstreamSize, tempBsBuffer, encodeStatusReport->bitstreamSize);
2027 MOS_ZeroMemory(&bitstream[encodeStatusReport->bitstreamSize],
2028 m_bitstreamUpperBound - encodeStatusReport->bitstreamSize);
2029
2030 if (tempBsBuffer)
2031 {
2032 MOS_FreeMemory(tempBsBuffer);
2033 }
2034
2035 if (m_osInterface && bitstream)
2036 {
2037 m_osInterface->pfnUnlockResource(m_osInterface, &currRefList.resBitstreamBuffer);
2038 }
2039
2040 if (m_osInterface && tileStatusReport)
2041 {
2042 // clean-up the tile status report buffer
2043 MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * encodeStatusReport->NumberTilesInFrame);
2044
2045 m_osInterface->pfnUnlockResource(m_osInterface, &tileSizeStatusReport->sResource);
2046 }
2047
2048 return eStatus;
2049 }
2050
AllocateResourcesVariableSize()2051 MOS_STATUS CodechalEncHevcStateG12::AllocateResourcesVariableSize()
2052 {
2053 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2054
2055 CODECHAL_ENCODE_FUNCTION_ENTER;
2056
2057 if (!m_hevcPicParams->tiles_enabled_flag)
2058 {
2059 return eStatus;
2060 }
2061
2062 uint32_t bufSize = 0;
2063 if (m_pakPiplStrmOutEnable)
2064 {
2065 // PAK CU Level Streamout Data: DW57-59 in HCP pipe buffer address command
2066 // One CU has 16-byte. But, each tile needs to be aliged to the cache line
2067 uint32_t tileWidthInCus = 0;
2068 uint32_t tileHeightInCus = 0;
2069 uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
2070 uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
2071 for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
2072 {
2073 for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
2074 {
2075 uint32_t idx = tileRow * numTileColumns + tileCol;
2076
2077 tileHeightInCus = m_tileParams[idx].TileHeightInMinCbMinus1 + 1;
2078 tileWidthInCus = m_tileParams[idx].TileWidthInMinCbMinus1 + 1;
2079 bufSize += (tileWidthInCus * tileHeightInCus * 16);
2080 bufSize = MOS_ALIGN_CEIL(bufSize, CODECHAL_CACHELINE_SIZE);
2081 }
2082 }
2083 if (Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource) ||
2084 (bufSize > m_resPakcuLevelStreamoutData.dwSize))
2085 {
2086 if (!Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource))
2087 {
2088 m_osInterface->pfnFreeResource(m_osInterface, &m_resPakcuLevelStreamoutData.sResource);
2089 }
2090
2091 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
2092 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
2093 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
2094 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
2095 allocParamsForBufferLinear.Format = Format_Buffer;
2096 allocParamsForBufferLinear.dwBytes = bufSize;
2097 allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
2098
2099 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
2100 m_osInterface,
2101 &allocParamsForBufferLinear,
2102 &m_resPakcuLevelStreamoutData.sResource));
2103 m_resPakcuLevelStreamoutData.dwSize = bufSize;
2104 CODECHAL_ENCODE_VERBOSEMESSAGE("reallocate cu steam out buffer, size=0x%x.\n", bufSize);
2105 }
2106 }
2107
2108 return eStatus;
2109 }
2110
ExecutePictureLevel()2111 MOS_STATUS CodechalEncHevcStateG12::ExecutePictureLevel()
2112 {
2113 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2114
2115 m_firstTaskInPhase = m_singleTaskPhaseSupported ? IsFirstPass() : true;
2116 m_lastTaskInPhase = m_singleTaskPhaseSupported ? IsLastPass() : true;
2117
2118 PerfTagSetting perfTag;
2119 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE);
2120
2121 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifyCommandBufferSize());
2122
2123 if (!m_singleTaskPhaseSupportedInPak)
2124 {
2125 // Command buffer or patch list size are too small and so we cannot submit multiple pass of PAKs together
2126 m_firstTaskInPhase = true;
2127 m_lastTaskInPhase = true;
2128 }
2129
2130 if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex())
2131 {
2132 CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum");
2133 eStatus = MOS_STATUS_INVALID_PARAMETER;
2134 return eStatus;
2135 }
2136
2137 MOS_COMMAND_BUFFER cmdBuffer;
2138 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2139
2140 if ((!m_singleTaskPhaseSupported) || m_firstTaskInPhase)
2141 {
2142 // Send command buffer header at the beginning (OS dependent)
2143 // frame tracking tag is only added in the last command buffer header
2144 bool bRequestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
2145
2146 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, bRequestFrameTracking));
2147 }
2148
2149 // clean-up per VDBOX semaphore memory
2150 int32_t currentPipe = GetCurrentPipe();
2151 if (currentPipe < 0)
2152 {
2153 eStatus = MOS_STATUS_INVALID_PARAMETER;
2154 return eStatus;
2155 }
2156
2157 if (m_numPipe >= 2 &&
2158 ((m_singleTaskPhaseSupported && IsFirstPass()) ||
2159 !m_singleTaskPhaseSupported))
2160 {
2161 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStopCmd(&cmdBuffer));
2162 //HW Semaphore cmd to make sure all pipes start encode at the same time
2163 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeStartSemaMem, 1, MHW_MI_ATOMIC_INC, &cmdBuffer));
2164 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
2165 &m_resPipeStartSemaMem,
2166 &cmdBuffer,
2167 m_numPipe));
2168
2169 // Program some placeholder cmds to resolve the hazard between BEs sync
2170 MHW_MI_STORE_DATA_PARAMS dataParams;
2171 dataParams.pOsResource = &m_resDelayMinus;
2172 dataParams.dwResourceOffset = 0;
2173 dataParams.dwValue = 0xDE1A;
2174 for (uint32_t i = 0; i < m_numDelay; i++)
2175 {
2176 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2177 &cmdBuffer,
2178 &dataParams));
2179 }
2180
2181 //clean HW semaphore memory
2182 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeStartSemaMem, 1, MHW_MI_ATOMIC_DEC, &cmdBuffer));
2183
2184 //Start Watchdog Timer
2185 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStartCmd(&cmdBuffer));
2186
2187 //To help test media reset, this hw semaphore wait will never be reached.
2188 if (m_enableTestMediaReset)
2189 {
2190 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
2191 &m_resPipeStartSemaMem,
2192 &cmdBuffer,
2193 m_numPipe + 2));
2194 }
2195 }
2196
2197 if (m_brcEnabled && !IsFirstPass()) // Only the regular BRC passes have the conditional batch buffer end
2198 {
2199 // Ensure the previous PAK BRC pass is done, mainly for pipes other than pipe0.
2200 if (m_singleTaskPhaseSupported && m_numPipe >= 2 &&
2201 !Mos_ResourceIsNull(&m_resBrcSemaphoreMem[currentPipe].sResource))
2202 {
2203 CODECHAL_ENCODE_CHK_STATUS_RETURN(
2204 SendHWWaitCommand(
2205 &m_resBrcSemaphoreMem[currentPipe].sResource,
2206 &cmdBuffer,
2207 1));
2208 }
2209
2210 // Insert conditional batch buffer end
2211 MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
2212 MOS_ZeroMemory(
2213 &miConditionalBatchBufferEndParams,
2214 sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
2215 uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
2216 sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource ;
2217
2218 if (m_hucPakStitchEnabled && m_numPipe >= 2) //BRC scalability
2219 {
2220 CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwHuCStatusMaskOffset & 7) == 0); // Make sure uint64_t aligned
2221 CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwHuCStatusMaskOffset + sizeof(uint32_t)) == m_encodeStatusBuf.dwHuCStatusRegOffset);
2222
2223 miConditionalBatchBufferEndParams.presSemaphoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2224 miConditionalBatchBufferEndParams.dwOffset = baseOffset + m_encodeStatusBuf.dwHuCStatusMaskOffset;
2225 }
2226 else
2227 {
2228 CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwImageStatusMaskOffset & 7) == 0); // Make sure uint64_t aligned
2229 CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwImageStatusMaskOffset + sizeof(uint32_t)) == m_encodeStatusBuf.dwImageStatusCtrlOffset);
2230
2231 miConditionalBatchBufferEndParams.presSemaphoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2232 miConditionalBatchBufferEndParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusMaskOffset;
2233 }
2234
2235 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
2236 &cmdBuffer,
2237 &miConditionalBatchBufferEndParams));
2238
2239 auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
2240 MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
2241 MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
2242 if (m_hucPakStitchEnabled && m_numPipe >= 2)
2243 {
2244 // Write back the HCP image control register with HUC PAK Int Kernel output
2245 MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
2246 MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
2247 miLoadRegMemParams.presStoreBuffer = &m_resBrcDataBuffer;
2248 miLoadRegMemParams.dwOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, HCP_ImageStatusControl);
2249 miLoadRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2250 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(&cmdBuffer, &miLoadRegMemParams));
2251
2252 if (IsFirstPipe())
2253 {
2254 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
2255 miCpyMemMemParams.presSrc = &m_resBrcDataBuffer;
2256 miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, HCP_ImageStatusControl);
2257 miCpyMemMemParams.presDst = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
2258 miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
2259 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
2260
2261 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2262 miStoreRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2263 miStoreRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset;
2264 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2265 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2266 }
2267 }
2268 else
2269 {
2270 // Write back the HCP image control register for RC6 may clean it out
2271 MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
2272 MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
2273 miLoadRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2274 miLoadRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOffset;
2275 miLoadRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2276 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(&cmdBuffer, &miLoadRegMemParams));
2277
2278 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2279 miStoreRegMemParams.presStoreBuffer = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
2280 miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
2281 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2282 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2283
2284 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2285 miStoreRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2286 miStoreRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset;
2287 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2288 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2289 }
2290 }
2291
2292 if (IsFirstPipe() && IsFirstPass() && m_osInterface->bTagResourceSync)
2293 {
2294 // This is a short term solution to solve the sync tag issue: the sync tag write for PAK is inserted at the end of 2nd pass PAK BB
2295 // which may be skipped in multi-pass PAK enabled case. The idea here is to insert the previous frame's tag at the beginning
2296 // of the BB and keep the current frame's tag at the end of the BB. There will be a delay for tag update but it should be fine
2297 // as long as Dec/VP/Enc won't depend on this PAK so soon.
2298
2299 PMOS_RESOURCE globalGpuContextSyncTagBuffer = nullptr;
2300 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource(
2301 m_osInterface,
2302 globalGpuContextSyncTagBuffer));
2303 CODECHAL_ENCODE_CHK_NULL_RETURN(globalGpuContextSyncTagBuffer);
2304
2305 MHW_MI_STORE_DATA_PARAMS params;
2306 params.pOsResource = globalGpuContextSyncTagBuffer;
2307 params.dwResourceOffset = m_osInterface->pfnGetGpuStatusTagOffset(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
2308 uint32_t value = m_osInterface->pfnGetGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
2309 params.dwValue = (value > 0) ? (value - 1) : 0;
2310 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, ¶ms));
2311 }
2312
2313 if (IsFirstPipe())
2314 {
2315 CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2316 }
2317
2318 if (m_numPipe >= 2)
2319 {
2320 // clean up hw semaphore for BRC PAK pass sync, used only in single task phase.
2321 if (m_singleTaskPhaseSupported &&
2322 m_brcEnabled &&
2323 !Mos_ResourceIsNull(&m_resBrcSemaphoreMem[currentPipe].sResource))
2324 {
2325 MHW_MI_STORE_DATA_PARAMS storeDataParams;
2326 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2327 storeDataParams.pOsResource = &m_resBrcSemaphoreMem[currentPipe].sResource;
2328 storeDataParams.dwResourceOffset = 0;
2329 storeDataParams.dwValue = 0;
2330
2331 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2332 &cmdBuffer,
2333 &storeDataParams));
2334 }
2335 }
2336
2337 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPipeModeSelectCmd(&cmdBuffer));
2338
2339 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpSurfaceStateCmds(&cmdBuffer));
2340
2341 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPipeBufAddrCmd(&cmdBuffer));
2342
2343 MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
2344 SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
2345 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams));
2346
2347 MHW_VDBOX_QM_PARAMS fqmParams, qmParams;
2348 SetHcpQmStateParams(fqmParams, qmParams);
2349 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpFqmStateCmd(&cmdBuffer, &fqmParams));
2350 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpQmStateCmd(&cmdBuffer, &qmParams));
2351
2352 if (m_brcEnabled)
2353 {
2354 uint32_t picStateCmdOffset;
2355 if (m_hucPakStitchEnabled && m_numPipe >= 2)
2356 {
2357 //for non fist PAK pass, always use the 2nd HCP PIC STATE cmd buffer
2358 picStateCmdOffset = IsFirstPass() ? 0 : 1;
2359 }
2360 else
2361 {
2362 picStateCmdOffset = GetCurrentPass();
2363 }
2364
2365 MOS_RESOURCE &brcHcpStateWriteBuffer = m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx];
2366 if (IsPanicModePass())
2367 {
2368 // BRC kernel supports only 4 BrcImageStates read/write buffers.
2369 // So for panic PAK pass use HCP_PIC_STATE command from previous PAK pass.
2370 picStateCmdOffset -= 1;
2371 }
2372
2373 MHW_BATCH_BUFFER batchBuffer;
2374 MOS_ZeroMemory(&batchBuffer, sizeof(batchBuffer));
2375 batchBuffer.OsResource = brcHcpStateWriteBuffer;
2376 batchBuffer.dwOffset = picStateCmdOffset * BRC_IMG_STATE_SIZE_PER_PASS_G12;
2377 batchBuffer.bSecondLevel = true;
2378
2379 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
2380 &cmdBuffer,
2381 &batchBuffer));
2382 }
2383 else
2384 {
2385 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPictureStateCmd(&cmdBuffer));
2386 }
2387
2388 // Send HEVC_VP9_RDOQ_STATE command
2389 if (m_hevcRdoqEnabled)
2390 {
2391 MHW_VDBOX_HEVC_PIC_STATE picStateParams;
2392 SetHcpPicStateParams(picStateParams);
2393
2394 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&cmdBuffer, &picStateParams));
2395 }
2396
2397 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2398 return eStatus;
2399 }
2400
SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceState)2401 void CodechalEncHevcStateG12::SetHcpSliceStateCommonParams(
2402 MHW_VDBOX_HEVC_SLICE_STATE &sliceState)
2403 {
2404 CodechalEncHevcState::SetHcpSliceStateCommonParams(sliceState);
2405
2406 sliceState.RoundingIntra = m_roundingIntraInUse;
2407 sliceState.RoundingInter = m_roundingInterInUse;
2408
2409 if ((m_hevcSliceParams->slice_type == CODECHAL_HEVC_P_SLICE && m_hevcPicParams->weighted_pred_flag) ||
2410 (m_hevcSliceParams->slice_type == CODECHAL_HEVC_B_SLICE && m_hevcPicParams->weighted_bipred_flag))
2411 {
2412 sliceState.bWeightedPredInUse = true;
2413 }
2414 else
2415 {
2416 sliceState.bWeightedPredInUse = false;
2417 }
2418
2419 static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12 &>(sliceState).dwNumPipe = m_numPipe;
2420
2421 sliceState.presDataBuffer = IsPanicModePass() ? &m_skipFrameInfo.m_resMbCodeSkipFrameSurface : &m_resMbCodeSurface;
2422 }
2423
SetHcpSliceStateParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceState,PCODEC_ENCODER_SLCDATA slcData,uint16_t slcCount,PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileCodingParams,bool lastSliceInTile,uint32_t idx)2424 void CodechalEncHevcStateG12::SetHcpSliceStateParams(
2425 MHW_VDBOX_HEVC_SLICE_STATE & sliceState,
2426 PCODEC_ENCODER_SLCDATA slcData,
2427 uint16_t slcCount,
2428 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileCodingParams,
2429 bool lastSliceInTile,
2430 uint32_t idx)
2431 {
2432 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2433
2434 sliceState.pEncodeHevcSliceParams = &m_hevcSliceParams[slcCount];
2435 sliceState.dwDataBufferOffset = slcData[slcCount].CmdOffset;
2436 sliceState.dwOffset = slcData[slcCount].SliceOffset;
2437 sliceState.dwLength = slcData[slcCount].BitSize;
2438 sliceState.uiSkipEmulationCheckCount = slcData[slcCount].SkipEmulationByteCount;
2439 sliceState.dwSliceIndex = (uint32_t)slcCount;
2440 sliceState.bLastSlice = (slcCount == m_numSlices - 1);
2441 sliceState.bLastSliceInTile = lastSliceInTile;
2442 sliceState.bLastSliceInTileColumn = (bool)lastSliceInTile & tileCodingParams[idx].IsLastTileofColumn;
2443 sliceState.bFirstPass = IsFirstPass();
2444 sliceState.bLastPass = IsLastPass();
2445 sliceState.bInsertBeforeSliceHeaders = (slcCount == 0);
2446 sliceState.bSaoLumaFlag = (m_hevcSeqParams->SAO_enabled_flag) ? m_hevcSliceParams[slcCount].slice_sao_luma_flag : 0;
2447 sliceState.bSaoChromaFlag = (m_hevcSeqParams->SAO_enabled_flag) ? m_hevcSliceParams[slcCount].slice_sao_chroma_flag : 0;
2448 static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12 &>(sliceState).pTileCodingParams = tileCodingParams + idx;
2449 static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12 &>(sliceState).dwTileID = idx;
2450
2451 sliceState.DeblockingFilterDisable = m_hevcSliceParams[slcCount].slice_deblocking_filter_disable_flag;
2452 sliceState.TcOffsetDiv2 = m_hevcSliceParams[slcCount].tc_offset_div2;
2453 sliceState.BetaOffsetDiv2 = m_hevcSliceParams[slcCount].beta_offset_div2;
2454
2455 CalcTransformSkipParameters(sliceState.EncodeHevcTransformSkipParams);
2456 }
2457
SetMfxVideoCopyCmdParams(PMOS_COMMAND_BUFFER cmdBuffer)2458 MOS_STATUS CodechalEncHevcStateG12::SetMfxVideoCopyCmdParams(
2459 PMOS_COMMAND_BUFFER cmdBuffer)
2460 {
2461 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2462
2463 CODECHAL_ENCODE_CHK_NULL_RETURN(m_hwInterface);
2464 CODECHAL_ENCODE_CHK_NULL_RETURN(m_hwInterface->GetCpInterface());
2465 MhwCpInterface *cpInterface = m_hwInterface->GetCpInterface();
2466
2467 uint32_t index = m_virtualEngineBbIndex;
2468
2469 MHW_CP_COPY_PARAMS cpCopyParams;
2470 MOS_ZeroMemory(&cpCopyParams, sizeof(cpCopyParams));
2471
2472 cpCopyParams.size = m_hwInterface->m_tileRecordSize;
2473 cpCopyParams.presSrc = &m_tileRecordBuffer[index].sResource;
2474 cpCopyParams.presDst = &m_resBitstreamBuffer;
2475 cpCopyParams.lengthOfTable = (uint8_t)(m_numTiles);
2476 cpCopyParams.isEncodeInUse = true;
2477 CODECHAL_ENCODE_CHK_STATUS_RETURN(cpInterface->SetCpCopy(m_osInterface, cmdBuffer, &cpCopyParams));
2478
2479 return eStatus;
2480 }
2481
ExecuteSliceLevel()2482 MOS_STATUS CodechalEncHevcStateG12::ExecuteSliceLevel()
2483 {
2484 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2485
2486 CODECHAL_ENCODE_FUNCTION_ENTER;
2487
2488 CODECHAL_ENCODE_CHK_NULL_RETURN(m_slcData);
2489
2490 if (m_pakOnlyTest)
2491 {
2492 CODECHAL_ENCODE_CHK_STATUS_RETURN(LoadPakCommandAndCuRecordFromFile());
2493 }
2494
2495 if (!m_hevcPicParams->tiles_enabled_flag)
2496 {
2497 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::ExecuteSliceLevel());
2498 }
2499 else
2500 {
2501 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncTileLevel());
2502 }
2503
2504 return eStatus;
2505 }
2506
EncTileLevel()2507 MOS_STATUS CodechalEncHevcStateG12::EncTileLevel()
2508 {
2509 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2510
2511 CODECHAL_ENCODE_FUNCTION_ENTER;
2512
2513 int32_t currentPipe = GetCurrentPipe();
2514 int32_t currentPass = GetCurrentPass();
2515
2516 if (currentPipe < 0 || currentPass < 0)
2517 {
2518 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid pipe number or pass number");
2519 return MOS_STATUS_INVALID_PARAMETER;
2520 }
2521
2522 MHW_VDBOX_HEVC_SLICE_STATE_G12 sliceState;
2523 SetHcpSliceStateCommonParams(sliceState);
2524
2525 MOS_COMMAND_BUFFER cmdBuffer;
2526 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2527
2528 uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
2529 uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
2530
2531 for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
2532 {
2533 for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
2534 {
2535 PCODEC_ENCODER_SLCDATA slcData = m_slcData;
2536 uint32_t slcCount, idx, sliceNumInTile = 0;
2537
2538 idx = tileRow * numTileColumns + tileCol;
2539
2540 if ((m_numPipe > 1) && (tileCol != currentPipe))
2541 {
2542 continue;
2543 }
2544
2545 // HCP_TILE_CODING commmand
2546 CODECHAL_ENCODE_CHK_STATUS_RETURN(
2547 static_cast<MhwVdboxHcpInterfaceG12 *>(m_hcpInterface)->AddHcpTileCodingCmd(&cmdBuffer, &m_tileParams[idx]));
2548
2549 for (slcCount = 0; slcCount < m_numSlices; slcCount++)
2550 {
2551 bool lastSliceInTile = false, sliceInTile = false;
2552
2553 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
2554 &m_tileParams[idx],
2555 &sliceInTile,
2556 &lastSliceInTile));
2557
2558 if (!sliceInTile)
2559 {
2560 continue;
2561 }
2562
2563 if (IsFirstPass())
2564 {
2565 uint32_t startLcu = 0;
2566 for (uint32_t ii = 0; ii < slcCount; ii++)
2567 {
2568 startLcu += m_hevcSliceParams[ii].NumLCUsInSlice;
2569 }
2570 slcData[slcCount].CmdOffset = startLcu * (m_hwInterface->GetHcpInterface()->GetHcpPakObjSize()) * sizeof(uint32_t);
2571 }
2572
2573 SetHcpSliceStateParams(sliceState, slcData, (uint16_t)slcCount, m_tileParams, lastSliceInTile, idx);
2574
2575 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHwSliceEncodeCommand(&cmdBuffer, &sliceState));
2576
2577 sliceNumInTile++;
2578 } // end of slice
2579
2580 if (0 == sliceNumInTile)
2581 {
2582 // One tile must have at least one slice
2583 CODECHAL_ENCODE_ASSERT(false);
2584 eStatus = MOS_STATUS_INVALID_PARAMETER;
2585 return eStatus;
2586 }
2587 } // end of row tile
2588 } // end of column tile
2589
2590 // Insert end of sequence/stream if set
2591 if ((m_lastPicInStream || m_lastPicInSeq) && IsLastPipe())
2592 {
2593 MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
2594 MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
2595 pakInsertObjectParams.bLastPicInSeq = m_lastPicInSeq;
2596 pakInsertObjectParams.bLastPicInStream = m_lastPicInStream;
2597 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&cmdBuffer, &pakInsertObjectParams));
2598 }
2599
2600 // Send VD_PIPELINE_FLUSH command
2601 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2602 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2603 vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
2604 vdPipelineFlushParams.Flags.bFlushHEVC = 1;
2605 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2606 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
2607
2608 // Send MI_FLUSH command
2609 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2610 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2611 flushDwParams.bVideoPipelineCacheInvalidate = true;
2612 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2613
2614 //HW Semaphore cmd to make sure all pipes completion encode
2615 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeCompleteSemaMem, 1, MHW_MI_ATOMIC_INC, &cmdBuffer));
2616
2617 if (IsFirstPipe())
2618 {
2619 // first pipe needs to ensure all other pipes are ready
2620 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
2621 &m_resPipeCompleteSemaMem,
2622 &cmdBuffer,
2623 m_numPipe));
2624
2625 //clean HW semaphore memory
2626 MHW_MI_STORE_DATA_PARAMS storeDataParams;
2627 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2628 storeDataParams.pOsResource = &m_resPipeCompleteSemaMem;
2629 storeDataParams.dwValue = 0;
2630 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2631 &cmdBuffer,
2632 &storeDataParams));
2633
2634 // Use HW stitch commands only in the scalable mode
2635 if (m_numPipe > 1 && m_enableTileStitchByHW)
2636 {
2637 //call PAK Int Kernel in scalability case
2638 if (m_hucPakStitchEnabled)
2639 {
2640 CODECHAL_ENCODE_CHK_STATUS_RETURN(HucPakIntegrate(&cmdBuffer));
2641 #if 0 // Need to enable this code once Gen12 becomes open source \
2642 // 2nd level BB buffer for stitching cmd \
2643 // current location to add cmds in 2nd level batch buffer
2644 m_HucStitchCmdBatchBuffer.iCurrent = 0;
2645 // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
2646 m_HucStitchCmdBatchBuffer.dwOffset = 0;
2647 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_HucStitchCmdBatchBuffer));
2648 // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases
2649 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, m_osInterface->osCpInterface->IsCpEnabled() ? true : false));
2650 #endif
2651 }
2652 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMfxVideoCopyCmdParams(&cmdBuffer));
2653 }
2654
2655 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(&cmdBuffer));
2656
2657 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2658
2659 if (m_numPipe <= 1) // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer
2660 {
2661 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
2662
2663 // BRC PAK statistics different for each pass
2664 if (m_brcEnabled)
2665 {
2666 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStats(&cmdBuffer));
2667 }
2668 }
2669 else
2670 { //scalability mode
2671 if (m_brcEnabled)
2672 {
2673 //MMIO register is not used in scalability BRC case. all information is in TileSizeRecord stream out buffer
2674 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStatisticsForScalability(&cmdBuffer));
2675 }
2676 else
2677 {
2678 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
2679 }
2680 }
2681
2682 #if (_DEBUG || _RELEASE_INTERNAL)
2683 //this is to support BRC scalbility test to match with single pipe. Will be removed later after enhanced BRC Scalability is enabled.
2684 if (m_brcEnabled && m_forceSinglePakPass)
2685 {
2686 CODECHAL_ENCODE_CHK_STATUS_RETURN(ResetImgCtrlRegInPAKStatisticsBuffer(&cmdBuffer));
2687 }
2688 #endif
2689
2690 if (m_singleTaskPhaseSupported &&
2691 m_brcEnabled && m_numPipe >= 2 && !IsLastPass())
2692 {
2693 // Signal HW semaphore for the BRC dependency (i.e., next BRC pass waits for the current BRC pass)
2694 for (auto i = 0; i < m_numPipe; i++)
2695 {
2696 if (!Mos_ResourceIsNull(&m_resBrcSemaphoreMem[i].sResource))
2697 {
2698 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2699 storeDataParams.pOsResource = &m_resBrcSemaphoreMem[i].sResource;
2700 storeDataParams.dwValue = 1;
2701
2702 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2703 &cmdBuffer,
2704 &storeDataParams));
2705 }
2706 }
2707 }
2708 }
2709
2710 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2711 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2712
2713 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2714 {
2715 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
2716 }
2717
2718 std::string pakPassName = "PAK_PASS" + std::to_string(static_cast<uint32_t>(m_currPass));
2719 CODECHAL_DEBUG_TOOL(
2720 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
2721 &cmdBuffer,
2722 CODECHAL_NUM_MEDIA_STATES,
2723 pakPassName.data()));)
2724
2725 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2726
2727 if (IsFirstPipe() &&
2728 (m_pakOnlyTest == 0) && // In the PAK only test, no need to wait for ENC's completion
2729 IsFirstPass() &&
2730 !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
2731 {
2732 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
2733 syncParams.GpuContext = m_videoContext;
2734 syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
2735
2736 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
2737 }
2738
2739 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2740 {
2741 bool nullRendering = m_videoContextUsesNullHw;
2742
2743 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, nullRendering));
2744
2745 CODECHAL_DEBUG_TOOL(
2746 CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpHucDebugOutputBuffers());
2747 CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpPakOutput());
2748 if (m_mmcState) {
2749 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
2750 })
2751
2752 if ((IsLastPipe()) &&
2753 (IsLastPass()) &&
2754 m_signalEnc &&
2755 m_currRefSync &&
2756 !Mos_ResourceIsNull(&m_currRefSync->resSyncObject))
2757 {
2758 // signal semaphore
2759 MOS_SYNC_PARAMS syncParams;
2760 syncParams = g_cInitSyncParams;
2761 syncParams.GpuContext = m_videoContext;
2762 syncParams.presSyncResource = &m_currRefSync->resSyncObject;
2763
2764 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
2765 m_currRefSync->uiSemaphoreObjCount++;
2766 m_currRefSync->bInUsed = true;
2767 }
2768 }
2769
2770 // Reset parameters for next PAK execution
2771 if (IsLastPipe() && IsLastPass())
2772 {
2773 if (!m_singleTaskPhaseSupported)
2774 {
2775 m_osInterface->pfnResetPerfBufferID(m_osInterface);
2776 }
2777
2778 m_currPakSliceIdx = (m_currPakSliceIdx + 1) % CODECHAL_HEVC_NUM_PAK_SLICE_BATCH_BUFFERS;
2779
2780 if (m_hevcSeqParams->ParallelBRC)
2781 {
2782 m_brcBuffers.uiCurrBrcPakStasIdxForWrite =
2783 (m_brcBuffers.uiCurrBrcPakStasIdxForWrite + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
2784 }
2785
2786 m_newPpsHeader = 0;
2787 m_newSeqHeader = 0;
2788 m_frameNum++;
2789 }
2790
2791 return eStatus;
2792 }
2793
DecideEncodingPipeNumber()2794 MOS_STATUS CodechalEncHevcStateG12::DecideEncodingPipeNumber()
2795 {
2796 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2797
2798 CODECHAL_ENCODE_FUNCTION_ENTER;
2799
2800 m_numPipe = m_numVdbox;
2801
2802 uint8_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
2803
2804 if (numTileColumns > m_numPipe)
2805 {
2806 m_numPipe = 1;
2807 }
2808
2809 if (numTileColumns < m_numPipe)
2810 {
2811 if (numTileColumns >= 1 && numTileColumns <= 4)
2812 {
2813 m_numPipe = numTileColumns;
2814 }
2815 else
2816 {
2817 m_numPipe = 1; // invalid tile column test cases and switch back to the single VDBOX mode
2818 }
2819 }
2820
2821 m_useVirtualEngine = true; //always use virtual engine interface for single pipe and scalability mode
2822
2823 if (!m_forceScalability)
2824 {
2825 //resolution < 4K, always go with single pipe
2826 if (m_frameWidth * m_frameHeight < ENCODE_HEVC_4K_PIC_WIDTH * ENCODE_HEVC_4K_PIC_HEIGHT)
2827 {
2828 m_numPipe = 1;
2829 }
2830 }
2831
2832 m_numUsedVdbox = m_numPipe;
2833 m_numberTilesInFrame = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1);
2834
2835 if (m_scalabilityState)
2836 {
2837 // Create/ re-use a GPU context with 2 pipes
2838 m_scalabilityState->ucScalablePipeNum = m_numPipe;
2839 }
2840 return eStatus;
2841 }
2842
PlatformCapabilityCheck()2843 MOS_STATUS CodechalEncHevcStateG12::PlatformCapabilityCheck()
2844 {
2845 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2846
2847 CODECHAL_ENCODE_FUNCTION_ENTER;
2848
2849 CODECHAL_ENCODE_CHK_STATUS_RETURN(DecideEncodingPipeNumber());
2850
2851 if (MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
2852 {
2853 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ChkGpuCtxReCreation(this, m_scalabilityState, (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
2854 }
2855
2856 if (m_frameWidth * m_frameHeight > ENCODE_HEVC_MAX_16K_PIC_WIDTH * ENCODE_HEVC_MAX_16K_PIC_HEIGHT)
2857 {
2858 eStatus = MOS_STATUS_INVALID_PARAMETER;
2859 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Frame resolution greater than 16k not supported");
2860 }
2861
2862 if (m_vdencEnabled && m_chromaFormat == HCP_CHROMA_FORMAT_YUV444 && m_hevcSeqParams->TargetUsage == 7)
2863 {
2864 CODECHAL_ENCODE_ASSERTMESSAGE("Speed mode is not supported in VDENC 444, resetting TargetUsage to Normal mode\n");
2865 m_hevcSeqParams->TargetUsage = 4;
2866 }
2867
2868 if ((uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_chromaFormat &&
2869 (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat &&
2870 Format_YUY2 == m_reconSurface.Format)
2871 {
2872 if (m_reconSurface.dwHeight < m_oriFrameHeight * 2 ||
2873 m_reconSurface.dwWidth < m_oriFrameWidth / 2)
2874 {
2875 return MOS_STATUS_INVALID_PARAMETER;
2876 }
2877 }
2878
2879 // set RDOQ Intra blocks Threshold for Gen11+
2880 m_rdoqIntraTuThreshold = 0;
2881 if (m_hevcRdoqEnabled)
2882 {
2883 if (1 == m_hevcSeqParams->TargetUsage)
2884 {
2885 m_rdoqIntraTuThreshold = 0xffff;
2886 }
2887 else if (4 == m_hevcSeqParams->TargetUsage)
2888 {
2889 m_rdoqIntraTuThreshold = m_picWidthInMb * m_picHeightInMb;
2890 m_rdoqIntraTuThreshold = MOS_MIN(m_rdoqIntraTuThreshold / 10, 0xffff);
2891 }
2892 }
2893
2894 return eStatus;
2895 }
2896
CheckSupportedFormat(PMOS_SURFACE surface)2897 bool CodechalEncHevcStateG12::CheckSupportedFormat(PMOS_SURFACE surface)
2898 {
2899 CODECHAL_ENCODE_FUNCTION_ENTER;
2900
2901 bool isColorFormatSupported = false;
2902
2903 if (nullptr == surface)
2904 {
2905 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid (nullptr) Pointer.");
2906 return isColorFormatSupported;
2907 }
2908
2909 switch (surface->Format)
2910 {
2911 case Format_NV12:
2912 isColorFormatSupported = IS_Y_MAJOR_TILE_FORMAT(surface->TileType);
2913 break;
2914 case Format_YUY2:
2915 case Format_YUYV:
2916 case Format_A8R8G8B8:
2917 case Format_P010:
2918 case Format_P016:
2919 case Format_Y210:
2920 case Format_Y216:
2921 break;
2922 default:
2923 CODECHAL_ENCODE_ASSERTMESSAGE("Input surface color format = %d not supported!", surface->Format);
2924 break;
2925 }
2926
2927 return isColorFormatSupported;
2928 }
2929
GetSystemPipeNumberCommon()2930 MOS_STATUS CodechalEncHevcStateG12::GetSystemPipeNumberCommon()
2931 {
2932 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2933
2934 CODECHAL_ENCODE_FUNCTION_ENTER;
2935
2936 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
2937 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2938
2939 MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
2940 statusKey = MOS_UserFeature_ReadValue_ID(
2941 nullptr,
2942 __MEDIA_USER_FEATURE_VALUE_ENCODE_DISABLE_SCALABILITY,
2943 &userFeatureData,
2944 m_osInterface->pOsContext);
2945
2946 bool disableScalability = true; // m_hwInterface->IsDisableScalability()
2947 if (statusKey == MOS_STATUS_SUCCESS)
2948 {
2949 disableScalability = userFeatureData.i32Data ? true : false;
2950 }
2951
2952 MEDIA_SYSTEM_INFO *gtSystemInfo = m_osInterface->pfnGetGtSystemInfo(m_osInterface);
2953 CODECHAL_ENCODE_CHK_NULL_RETURN(gtSystemInfo);
2954
2955 if (gtSystemInfo && disableScalability == false)
2956 {
2957 // Both VE mode and media solo mode should be able to get the VDBOX number via the same interface
2958 m_numVdbox = (uint8_t)(gtSystemInfo->VDBoxInfo.NumberOfVDBoxEnabled);
2959 }
2960 else
2961 {
2962 m_numVdbox = 1;
2963 }
2964
2965 return eStatus;
2966 }
2967
HucPakIntegrate(PMOS_COMMAND_BUFFER cmdBuffer)2968 MOS_STATUS CodechalEncHevcStateG12::HucPakIntegrate(
2969 PMOS_COMMAND_BUFFER cmdBuffer)
2970 {
2971 CODECHAL_ENCODE_FUNCTION_ENTER;
2972
2973 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2974
2975 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2976
2977 CODECHAL_ENCODE_CHK_COND_RETURN(
2978 (m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()),
2979 "ERROR - vdbox index exceed the maximum");
2980
2981 auto mmioRegisters = m_hwInterface->GetHucInterface()->GetMmioRegisters(m_vdboxIndex);
2982
2983 // load kernel from WOPCM into L2 storage RAM
2984 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
2985 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
2986 imemParams.dwKernelDescriptor = VDBOX_HUC_PAK_INTEGRATION_KERNEL_DESCRIPTOR;
2987
2988 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucImemStateCmd(cmdBuffer, &imemParams));
2989
2990 // pipe mode select
2991 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
2992 pipeModeSelectParams.Mode = m_mode;
2993 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
2994
2995 // DMEM set
2996 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
2997 if (m_brcEnabled && m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ)
2998 {
2999 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakIntegrate(&dmemParams));
3000 }
3001 else
3002 {
3003 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakIntegrateCqp(&dmemParams));
3004 }
3005 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucDmemStateCmd(cmdBuffer, &dmemParams));
3006
3007 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
3008 if (m_brcEnabled && m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ)
3009 {
3010 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCPakIntegrate(&virtualAddrParams));
3011 }
3012 else
3013 {
3014 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCPakIntegrateCqp(&virtualAddrParams));
3015 }
3016 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucVirtualAddrStateCmd(cmdBuffer, &virtualAddrParams));
3017
3018 // Write HUC_STATUS2 mask - bit 6 - valid IMEM loaded
3019 MHW_MI_STORE_DATA_PARAMS storeDataParams;
3020 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3021 storeDataParams.pOsResource = &m_resHucStatus2Buffer;
3022 storeDataParams.dwResourceOffset = 0;
3023 storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatus2ImemLoadedMask();
3024 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
3025
3026 // Store HUC_STATUS2 register
3027 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
3028 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
3029 storeRegParams.presStoreBuffer = &m_resHucStatus2Buffer;
3030 storeRegParams.dwOffset = sizeof(uint32_t);
3031 storeRegParams.dwRegister = mmioRegisters->hucStatus2RegOffset;
3032 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams));
3033
3034 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucStartCmd(cmdBuffer, true));
3035
3036 // wait Huc completion (use HEVC bit for now)
3037 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
3038 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
3039 vdPipeFlushParams.Flags.bFlushHEVC = 1;
3040 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
3041 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetVdencInterface()->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams));
3042
3043 // Flush the engine to ensure memory written out
3044 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3045 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3046 flushDwParams.bVideoPipelineCacheInvalidate = true;
3047 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
3048
3049 EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
3050
3051 uint32_t baseOffset =
3052 (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource
3053
3054 // Write HUC_STATUS mask
3055 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3056 storeDataParams.pOsResource = &encodeStatusBuf.resStatusBuffer;
3057 storeDataParams.dwResourceOffset = baseOffset + encodeStatusBuf.dwHuCStatusMaskOffset;
3058 storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatusReEncodeMask();
3059 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
3060 cmdBuffer,
3061 &storeDataParams));
3062
3063 // store HUC_STATUS register
3064 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
3065 storeRegParams.presStoreBuffer = &encodeStatusBuf.resStatusBuffer;
3066 storeRegParams.dwOffset = baseOffset + encodeStatusBuf.dwHuCStatusRegOffset;
3067 storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
3068 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(
3069 cmdBuffer,
3070 &storeRegParams));
3071
3072 return eStatus;
3073 }
3074
Initialize(CodechalSetting * settings)3075 MOS_STATUS CodechalEncHevcStateG12::Initialize(CodechalSetting *settings)
3076 {
3077 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3078
3079 CODECHAL_ENCODE_FUNCTION_ENTER;
3080
3081 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
3082 MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
3083
3084 #if (_DEBUG || _RELEASE_INTERNAL)
3085 char stringData[MOS_USER_CONTROL_MAX_DATA_SIZE];
3086 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3087 userFeatureData.StringData.pStringData = stringData;
3088 statusKey = MOS_UserFeature_ReadValue_ID(
3089 nullptr,
3090 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_PAK_ONLY_ID,
3091 &userFeatureData,
3092 m_osInterface->pOsContext);
3093
3094 if (statusKey == MOS_STATUS_SUCCESS && userFeatureData.StringData.uSize > 0)
3095 {
3096 MOS_SecureStrcpy(m_pakOnlyDataFolder,
3097 sizeof(m_pakOnlyDataFolder) / sizeof(m_pakOnlyDataFolder[0]),
3098 stringData);
3099
3100 uint32_t len = strlen(m_pakOnlyDataFolder);
3101 if (m_pakOnlyDataFolder[len - 1] == '\\')
3102 {
3103 m_pakOnlyDataFolder[len - 1] = 0;
3104 }
3105
3106 m_pakOnlyTest = true;
3107 // PAK only mode does not need to init any kernel
3108 }
3109
3110 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3111 userFeatureData.StringData.pStringData = stringData;
3112 statusKey = MOS_UserFeature_ReadValue_ID(
3113 nullptr,
3114 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_LOAD_KERNEL_INPUT_ID,
3115 &userFeatureData,
3116 m_osInterface->pOsContext);
3117
3118 if (statusKey == MOS_STATUS_SUCCESS && userFeatureData.StringData.uSize > 0)
3119 {
3120 MOS_SecureStrcpy(m_loadKernelInputDataFolder,
3121 sizeof(m_loadKernelInputDataFolder) / sizeof(m_loadKernelInputDataFolder[0]),
3122 stringData);
3123
3124 uint32_t len = strlen(m_loadKernelInputDataFolder);
3125 if (m_loadKernelInputDataFolder[len - 1] == '\\')
3126 {
3127 m_loadKernelInputDataFolder[len - 1] = 0;
3128 }
3129 m_loadKernelInput = true;
3130 }
3131 #endif
3132
3133 // Common initialization
3134 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::Initialize(settings));
3135
3136 m_numDelay = 15; //Value suggested by HW team.
3137 m_bmeMethodTable = (uint8_t *)m_meMethod;
3138 m_b4XMeDistortionBufferSupported = true;
3139 m_brcBuffers.dwBrcConstantSurfaceWidth = HEVC_BRC_CONSTANT_SURFACE_WIDTH_G9;
3140 m_brcBuffers.dwBrcConstantSurfaceHeight = HEVC_BRC_CONSTANT_SURFACE_HEIGHT_G10;
3141 m_brcHistoryBufferSize = HEVC_BRC_HISTORY_BUFFER_SIZE_G12;
3142 m_maxNumSlicesSupported = CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6;
3143 m_brcBuffers.dwBrcHcpPicStateSize = BRC_IMG_STATE_SIZE_PER_PASS_G12 * CODECHAL_ENCODE_BRC_MAXIMUM_NUM_PASSES;
3144
3145 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3146 MOS_UserFeature_ReadValue_ID(
3147 nullptr,
3148 __MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID,
3149 &userFeatureData,
3150 m_osInterface->pOsContext);
3151 m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false;
3152
3153 // Max ConcurrentGroup used in the ENC kernel
3154 m_numberConcurrentGroup = 4;
3155
3156 m_sizeOfHcpPakFrameStats = 9 * CODECHAL_CACHELINE_SIZE; //Frame statistics occupying 9 caceline on gen12
3157
3158 // Max Subthread number used in the ENC kernel
3159 m_numberEncKernelSubThread = 3;
3160
3161 if (m_numberEncKernelSubThread > m_hevcThreadTaskDataNum)
3162 {
3163 m_numberEncKernelSubThread = m_hevcThreadTaskDataNum; // support up to 2 sub-threads in one LCU64x64
3164 }
3165
3166 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3167 MOS_UserFeature_ReadValue_ID(
3168 nullptr,
3169 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_26Z_ENABLE_ID,
3170 &userFeatureData,
3171 m_osInterface->pOsContext);
3172 m_enable26WalkingPattern = (userFeatureData.i32Data) ? false : true;
3173
3174 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3175 MOS_UserFeature_ReadValue_ID(
3176 nullptr,
3177 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_RDOQ_ENABLE_ID,
3178 &userFeatureData,
3179 m_osInterface->pOsContext);
3180 m_hevcRdoqEnabled = userFeatureData.i32Data ? true : false;
3181
3182 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3183 MOS_UserFeature_ReadValue_ID(
3184 nullptr,
3185 __MEDIA_USER_FEATURE_VALUE_HEVC_VME_ENCODE_SSE_ENABLE_ID,
3186 &userFeatureData,
3187 m_osInterface->pOsContext);
3188 m_sseSupported = userFeatureData.i32Data ? true : false;
3189
3190 // Overriding the defaults here with 32 aligned dimensions
3191 // 2x Scaling WxH
3192 m_downscaledWidth2x =
3193 CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_frameWidth);
3194 m_downscaledHeight2x =
3195 CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_frameHeight);
3196
3197 // HME Scaling WxH
3198 m_downscaledWidth4x =
3199 CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_frameWidth);
3200 m_downscaledHeight4x =
3201 CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_frameHeight);
3202 m_downscaledWidthInMb4x =
3203 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth4x);
3204 m_downscaledHeightInMb4x =
3205 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight4x);
3206
3207 // SuperHME Scaling WxH
3208 m_downscaledWidth16x =
3209 CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_downscaledWidth4x);
3210 m_downscaledHeight16x =
3211 CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_downscaledHeight4x);
3212 m_downscaledWidthInMb16x =
3213 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth16x);
3214 m_downscaledHeightInMb16x =
3215 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight16x);
3216
3217 // UltraHME Scaling WxH
3218 m_downscaledWidth32x =
3219 CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_downscaledWidth16x);
3220 m_downscaledHeight32x =
3221 CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_downscaledHeight16x);
3222 m_downscaledWidthInMb32x =
3223 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth32x);
3224 m_downscaledHeightInMb32x =
3225 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight32x);
3226
3227 // disable MMCD if we enable Codechal dump. Because dump code changes the surface state from compressed to uncompressed,
3228 // this causes mis-match issue between dump is enabled or disabled.
3229 CODECHAL_DEBUG_TOOL(
3230 if (m_mmcState && m_debugInterface && m_debugInterface->m_dbgCfgHead){
3231 //m_mmcState->SetMmcDisabled();
3232 })
3233
3234 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetSystemPipeNumberCommon());
3235
3236 if (MOS_VE_SUPPORTED(m_osInterface))
3237 {
3238 m_scalabilityState = (PCODECHAL_ENCODE_SCALABILITY_STATE)MOS_AllocAndZeroMemory(sizeof(CODECHAL_ENCODE_SCALABILITY_STATE));
3239 CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalabilityState);
3240 //scalability initialize
3241 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_InitializeState(m_scalabilityState, m_hwInterface));
3242 }
3243
3244 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3245 statusKey = MOS_UserFeature_ReadValue_ID(
3246 nullptr,
3247 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_STITCH,
3248 &userFeatureData,
3249 m_osInterface->pOsContext);
3250 m_enableTileStitchByHW = userFeatureData.i32Data ? true : false;
3251
3252 statusKey = MOS_STATUS_SUCCESS;
3253 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3254 statusKey = MOS_UserFeature_ReadValue_ID(
3255 nullptr,
3256 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_SEMAPHORE,
3257 &userFeatureData,
3258 m_osInterface->pOsContext);
3259 m_enableHWSemaphore = userFeatureData.i32Data ? true : false;
3260
3261 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3262 statusKey = MOS_UserFeature_ReadValue_ID(
3263 nullptr,
3264 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_WP_SUPPORT_ID,
3265 &userFeatureData,
3266 m_osInterface->pOsContext);
3267 m_weightedPredictionSupported = userFeatureData.i32Data ? true : false;
3268
3269 #if (_DEBUG || _RELEASE_INTERNAL)
3270 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3271 statusKey = MOS_UserFeature_ReadValue_ID(
3272 nullptr,
3273 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE,
3274 &userFeatureData,
3275 m_osInterface->pOsContext);
3276 m_kmdVeOveride.Value = (uint64_t)userFeatureData.i64Data;
3277
3278 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3279 MOS_UserFeature_ReadValue_ID(
3280 nullptr,
3281 __MEDIA_USER_FEATURE_VALUE_HEVC_VME_FORCE_SCALABILITY_ID,
3282 &userFeatureData,
3283 m_osInterface->pOsContext);
3284 m_forceScalability = userFeatureData.i32Data ? true : false;
3285
3286 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3287 statusKey = MOS_UserFeature_ReadValue_ID(
3288 nullptr,
3289 __MEDIA_USER_FEATURE_VALUE_HEVC_VME_DISABLE_PANIC_MODE_ID,
3290 &userFeatureData,
3291 m_osInterface->pOsContext);
3292 if (statusKey == MOS_STATUS_SUCCESS)
3293 {
3294 m_enableFramePanicMode = userFeatureData.i32Data ? false : true;
3295 }
3296
3297 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3298 MOS_UserFeature_ReadValue_ID(
3299 nullptr,
3300 __MEDIA_USER_FEATURE_VALUE_HEVC_VME_BRC_LTR_INTERVAL_ID,
3301 &userFeatureData,
3302 m_osInterface->pOsContext);
3303 m_ltrInterval = (uint32_t)(userFeatureData.i32Data);
3304
3305 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3306 MOS_UserFeature_ReadValue_ID(
3307 nullptr,
3308 __MEDIA_USER_FEATURE_VALUE_HEVC_VME_BRC_LTR_DISABLE_ID,
3309 &userFeatureData,
3310 m_osInterface->pOsContext);
3311 m_enableBrcLTR = (userFeatureData.i32Data) ? false : true;
3312 #endif
3313
3314 if (m_codecFunction != CODECHAL_FUNCTION_PAK)
3315 {
3316 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3317 MOS_UserFeature_ReadValue_ID(
3318 nullptr,
3319 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ME_ENABLE_ID,
3320 &userFeatureData,
3321 m_osInterface->pOsContext);
3322 m_hmeSupported = (userFeatureData.i32Data) ? true : false;
3323
3324 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3325 MOS_UserFeature_ReadValue_ID(
3326 nullptr,
3327 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_16xME_ENABLE_ID,
3328 &userFeatureData,
3329 m_osInterface->pOsContext);
3330 m_16xMeSupported = (userFeatureData.i32Data) ? true : false;
3331
3332 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3333 MOS_UserFeature_ReadValue_ID(
3334 nullptr,
3335 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_32xME_ENABLE_ID,
3336 &userFeatureData,
3337 m_osInterface->pOsContext);
3338 // Keeping UHME by Default ON for Gen12
3339 m_32xMeSupported = (userFeatureData.i32Data) ? false : true;
3340
3341 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3342 MOS_UserFeature_ReadValue_ID(
3343 nullptr,
3344 __MEDIA_USER_FEATURE_VALUE_HEVC_NUM_THREADS_PER_LCU_ID,
3345 &userFeatureData,
3346 m_osInterface->pOsContext);
3347 m_totalNumThreadsPerLcu = (uint16_t)userFeatureData.i32Data;
3348
3349 if (m_totalNumThreadsPerLcu < m_minThreadsPerLcuB || m_totalNumThreadsPerLcu > m_maxThreadsPerLcuB)
3350 {
3351 return MOS_STATUS_INVALID_PARAMETER;
3352 }
3353 }
3354
3355 if (m_frameWidth < 128 || m_frameHeight < 128)
3356 {
3357 m_16xMeSupported = false;
3358 m_32xMeSupported = false;
3359 }
3360 else if (m_frameWidth < 512 || m_frameHeight < 512)
3361 {
3362 m_32xMeSupported = false;
3363 }
3364
3365 return eStatus;
3366 }
3367
LoadCosts(uint8_t sliceType,uint8_t qp)3368 void CodechalEncHevcStateG12::LoadCosts(uint8_t sliceType, uint8_t qp)
3369 {
3370 if (sliceType >= CODECHAL_HEVC_NUM_SLICE_TYPES)
3371 {
3372 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid slice type");
3373 sliceType = CODECHAL_HEVC_I_SLICE;
3374 }
3375
3376 double qpScale = 0.60;
3377 int32_t qpMinus12 = qp - 12;
3378 double lambda = sqrt(qpScale * pow(2.0, MOS_MAX(0, qpMinus12) / 3.0));
3379 uint8_t lcuIdx = ((m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3) == 6) ? 1 : 0;
3380 m_lambdaRD = (uint16_t)(qpScale * pow(2.0, MOS_MAX(0, qpMinus12) / 3.0) * 4 + 0.5);
3381
3382 m_modeCostCre[LUTCREMODE_INTRA_32X32] = CRECOST(lambda, LUTMODEBITS_INTRA_32X32, lcuIdx, sliceType);
3383 m_modeCostCre[LUTCREMODE_INTRA_16X16] = CRECOST(lambda, LUTMODEBITS_INTRA_16X16, lcuIdx, sliceType);
3384 m_modeCostCre[LUTCREMODE_INTRA_8X8] = CRECOST(lambda, LUTMODEBITS_INTRA_8X8, lcuIdx, sliceType);
3385 m_modeCostCre[LUTCREMODE_INTRA_CHROMA] = CRECOST(lambda, LUTMODEBITS_INTRA_CHROMA, lcuIdx, sliceType);
3386 m_modeCostCre[LUTCREMODE_INTER_32X32] = CRECOST(lambda, LUTMODEBITS_INTER_32X32, lcuIdx, sliceType);
3387 m_modeCostCre[LUTCREMODE_INTER_32X16] = CRECOST(lambda, LUTMODEBITS_INTER_32X16, lcuIdx, sliceType);
3388 m_modeCostCre[LUTCREMODE_INTER_16X16] = CRECOST(lambda, LUTMODEBITS_INTER_16X16, lcuIdx, sliceType);
3389 m_modeCostCre[LUTCREMODE_INTER_16X8] = CRECOST(lambda, LUTMODEBITS_INTER_16X8, lcuIdx, sliceType);
3390 m_modeCostCre[LUTCREMODE_INTER_8X8] = CRECOST(lambda, LUTMODEBITS_INTER_8X8, lcuIdx, sliceType);
3391 m_modeCostCre[LUTCREMODE_INTER_BIDIR] = CRECOST(lambda, LUTMODEBITS_INTER_BIDIR, lcuIdx, sliceType);
3392 m_modeCostCre[LUTCREMODE_INTER_SKIP] = CRECOST(lambda, LUTMODEBITS_INTER_SKIP, lcuIdx, sliceType);
3393 m_modeCostCre[LUTCREMODE_INTRA_NONDC_32X32] = CRECOST(lambda, LUTMODEBITS_INTRA_NONDC_32X32, lcuIdx, sliceType);
3394 m_modeCostCre[LUTCREMODE_INTRA_NONDC_16X16] = CRECOST(lambda, LUTMODEBITS_INTRA_NONDC_16X16, lcuIdx, sliceType);
3395 m_modeCostCre[LUTCREMODE_INTRA_NONDC_8X8] = CRECOST(lambda, LUTMODEBITS_INTRA_NONDC_8X8, lcuIdx, sliceType);
3396 m_modeCostCre[LUTCREMODE_INTRA_NONPRED] = CRECOST(lambda, LUTMODEBITS_INTRA_MPM, lcuIdx, sliceType);
3397
3398 m_modeCostRde[LUTRDEMODE_INTRA_64X64] = RDEBITS62(LUTMODEBITS_INTRA_64X64, lcuIdx, sliceType);
3399 m_modeCostRde[LUTRDEMODE_INTRA_32X32] = RDEBITS62(LUTMODEBITS_INTRA_32X32, lcuIdx, sliceType);
3400 m_modeCostRde[LUTRDEMODE_INTRA_16X16] = RDEBITS62(LUTMODEBITS_INTRA_16X16, lcuIdx, sliceType);
3401 m_modeCostRde[LUTRDEMODE_INTRA_8X8] = RDEBITS62(LUTMODEBITS_INTRA_8X8, lcuIdx, sliceType);
3402 m_modeCostRde[LUTRDEMODE_INTRA_NXN] = RDEBITS62(LUTMODEBITS_INTRA_NXN, lcuIdx, sliceType);
3403 m_modeCostRde[LUTRDEMODE_INTRA_MPM] = RDEBITS62(LUTMODEBITS_INTRA_MPM, lcuIdx, sliceType);
3404 m_modeCostRde[LUTRDEMODE_INTRA_DC_32X32] = RDEBITS62(LUTMODEBITS_INTRA_DC_32X32, lcuIdx, sliceType);
3405 m_modeCostRde[LUTRDEMODE_INTRA_DC_8X8] = RDEBITS62(LUTMODEBITS_INTRA_DC_8X8, lcuIdx, sliceType);
3406 m_modeCostRde[LUTRDEMODE_INTRA_NONDC_32X32] = RDEBITS62(LUTMODEBITS_INTRA_NONDC_32X32, lcuIdx, sliceType);
3407 m_modeCostRde[LUTRDEMODE_INTRA_NONDC_8X8] = RDEBITS62(LUTMODEBITS_INTRA_NONDC_8X8, lcuIdx, sliceType);
3408 m_modeCostRde[LUTRDEMODE_INTER_BIDIR] = RDEBITS62(LUTMODEBITS_INTER_BIDIR, lcuIdx, sliceType);
3409 m_modeCostRde[LUTRDEMODE_INTER_REFID] = RDEBITS62(LUTMODEBITS_INTER_REFID, lcuIdx, sliceType);
3410 m_modeCostRde[LUTRDEMODE_SKIP_64X64] = RDEBITS62(LUTMODEBITS_SKIP_64X64, lcuIdx, sliceType);
3411 m_modeCostRde[LUTRDEMODE_SKIP_32X32] = RDEBITS62(LUTMODEBITS_SKIP_32X32, lcuIdx, sliceType);
3412 m_modeCostRde[LUTRDEMODE_SKIP_16X16] = RDEBITS62(LUTMODEBITS_SKIP_16X16, lcuIdx, sliceType);
3413 m_modeCostRde[LUTRDEMODE_SKIP_8X8] = RDEBITS62(LUTMODEBITS_SKIP_8X8, lcuIdx, sliceType);
3414 m_modeCostRde[LUTRDEMODE_MERGE_64X64] = RDEBITS62(LUTMODEBITS_MERGE_64X64, lcuIdx, sliceType);
3415 m_modeCostRde[LUTRDEMODE_MERGE_32X32] = RDEBITS62(LUTMODEBITS_MERGE_32X32, lcuIdx, sliceType);
3416 m_modeCostRde[LUTRDEMODE_MERGE_16X16] = RDEBITS62(LUTMODEBITS_MERGE_16X16, lcuIdx, sliceType);
3417 m_modeCostRde[LUTRDEMODE_MERGE_8X8] = RDEBITS62(LUTMODEBITS_MERGE_8X8, lcuIdx, sliceType);
3418 m_modeCostRde[LUTRDEMODE_INTER_32X32] = RDEBITS62(LUTMODEBITS_INTER_32X32, lcuIdx, sliceType);
3419 m_modeCostRde[LUTRDEMODE_INTER_32X16] = RDEBITS62(LUTMODEBITS_INTER_32X16, lcuIdx, sliceType);
3420 m_modeCostRde[LUTRDEMODE_INTER_16X16] = RDEBITS62(LUTMODEBITS_INTER_16X16, lcuIdx, sliceType);
3421 m_modeCostRde[LUTRDEMODE_INTER_16X8] = RDEBITS62(LUTMODEBITS_INTER_16X8, lcuIdx, sliceType);
3422 m_modeCostRde[LUTRDEMODE_INTER_8X8] = RDEBITS62(LUTMODEBITS_INTER_8X8, lcuIdx, sliceType);
3423 m_modeCostRde[LUTRDEMODE_TU_DEPTH_0] = RDEBITS62(LUTMODEBITS_TU_DEPTH_0, lcuIdx, sliceType);
3424 m_modeCostRde[LUTRDEMODE_TU_DEPTH_1] = RDEBITS62(LUTMODEBITS_TU_DEPTH_1, lcuIdx, sliceType);
3425
3426 for (uint8_t i = 0; i < 8; i++)
3427 {
3428 m_modeCostRde[LUTRDEMODE_CBF + i] = RDEBITS62(LUTMODEBITS_CBF + i, lcuIdx, sliceType);
3429 }
3430 }
3431
3432 // ------------------------------------------------------------------------------
3433 //| Purpose: Setup curbe for HEVC MbEnc B Kernels
3434 //| Return: N/A
3435 //------------------------------------------------------------------------------
SetCurbeMbEncBKernel()3436 MOS_STATUS CodechalEncHevcStateG12::SetCurbeMbEncBKernel()
3437 {
3438 uint32_t curIdx = m_currRecycledBufIdx;
3439 MOS_LOCK_PARAMS lockFlags;
3440 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3441
3442 uint8_t tuMapping = ((m_hevcSeqParams->TargetUsage) / 3) % 3; // Map TU 1,4,6 to 0,1,2
3443
3444 // Initialize the CURBE data
3445 MBENC_CURBE curbe;
3446
3447 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
3448 {
3449 curbe.QPType = QP_TYPE_CONSTANT;
3450 curbe.ROIEnable = m_hevcPicParams->NumROI ? true : false;
3451 }
3452 else
3453 {
3454 curbe.QPType = m_lcuBrcEnabled ? QP_TYPE_CU_LEVEL : QP_TYPE_FRAME;
3455 }
3456
3457 // TU based settings
3458 curbe.EnableCu64Check = m_tuSettings[EnableCu64CheckTuParam][tuMapping];
3459 curbe.MaxNumIMESearchCenter = m_tuSettings[MaxNumIMESearchCenterTuParam][tuMapping];
3460 curbe.MaxTransformDepthInter = m_tuSettings[Log2TUMaxDepthInterTuParam][tuMapping];
3461 curbe.MaxTransformDepthIntra = m_tuSettings[Log2TUMaxDepthIntraTuParam][tuMapping];
3462 curbe.Dynamic64Order = m_tuSettings[Dynamic64OrderTuParam][tuMapping];
3463 curbe.DynamicOrderTh = m_tuSettings[DynamicOrderThTuParam][tuMapping];
3464 curbe.Dynamic64Enable = m_tuSettings[Dynamic64EnableTuParam][tuMapping];
3465 curbe.Dynamic64Th = m_tuSettings[Dynamic64ThTuParam][tuMapping];
3466 curbe.IncreaseExitThresh = m_tuSettings[IncreaseExitThreshTuParam][tuMapping];
3467 curbe.IntraSpotCheck = m_tuSettings[IntraSpotCheckFlagTuParam][tuMapping];
3468 curbe.Fake32Enable = m_tuSettings[Fake32EnableTuParam][tuMapping];
3469
3470 curbe.FrameWidthInSamples = m_frameWidth;
3471 curbe.FrameHeightInSamples = m_frameHeight;
3472
3473 curbe.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
3474 curbe.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
3475 curbe.Log2MaxTUSize = m_hevcSeqParams->log2_max_transform_block_size_minus2 + 2;
3476 curbe.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
3477
3478 curbe.ChromaFormatType = m_hevcSeqParams->chroma_format_idc;
3479
3480 curbe.TUDepthControl = curbe.MaxTransformDepthInter;
3481
3482 int32_t sliceQp = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
3483 curbe.FrameQP = abs(sliceQp);
3484 curbe.FrameQPSign = (sliceQp > 0) ? 0 : 1;
3485
3486 #if 0 // no need in the optimized kernel because kernel does the table look-up
3487 LoadCosts(CODECHAL_HEVC_B_SLICE, (uint8_t)sliceQp);
3488 curbe.DW4_ModeIntra32x32Cost = m_modeCostCre[LUTCREMODE_INTRA_32X32];
3489 curbe.DW4_ModeIntraNonDC32x32Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_32X32];
3490
3491 curbe.DW5_ModeIntra16x16Cost = m_modeCostCre[LUTCREMODE_INTRA_16X16];
3492 curbe.DW5_ModeIntraNonDC16x16Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_16X16];
3493 curbe.DW5_ModeIntra8x8Cost = m_modeCostCre[LUTCREMODE_INTRA_8X8];
3494 curbe.DW5_ModeIntraNonDC8x8Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_8X8];
3495
3496 curbe.DW6_ModeIntraNonPred = m_modeCostCre[LUTCREMODE_INTRA_NONPRED];
3497
3498 curbe.DW7_ChromaIntraModeCost = m_modeCostCre[LUTCREMODE_INTRA_CHROMA];
3499
3500 curbe.DW12_IntraModeCostMPM = m_modeCostRde[LUTRDEMODE_INTRA_MPM];
3501
3502 curbe.DW13_IntraTUDept0Cost = m_modeCostRde[LUTRDEMODE_TU_DEPTH_0];
3503 curbe.DW13_IntraTUDept1Cost = m_modeCostRde[LUTRDEMODE_TU_DEPTH_1];
3504
3505 curbe.DW14_IntraTU4x4CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_4X4];
3506 curbe.DW14_IntraTU8x8CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_8X8];
3507 curbe.DW14_IntraTU16x16CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_16X16];
3508 curbe.DW14_IntraTU32x32CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_32X32];
3509 curbe.DW15_LambdaRD = (uint16_t)m_lambdaRD;
3510 curbe.DW17_IntraNonDC8x8Penalty = m_modeCostRde[LUTRDEMODE_INTRA_NONDC_8X8];
3511 curbe.DW17_IntraNonDC32x32Penalty = m_modeCostRde[LUTRDEMODE_INTRA_NONDC_32X32];
3512 #endif
3513
3514 curbe.NumofColumnTile = m_hevcPicParams->num_tile_columns_minus1 + 1;
3515 curbe.NumofRowTile = m_hevcPicParams->num_tile_rows_minus1 + 1;
3516 curbe.HMEFlag = m_hmeSupported ? 3 : 0;
3517
3518 curbe.MaxRefIdxL0 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10 - 1;
3519 curbe.MaxRefIdxL1 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L1_REF_G10 - 1;
3520 curbe.MaxBRefIdxL0 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10 - 1;
3521
3522 // Check whether Last Frame is I frame or not
3523 if (m_frameNum == 0 || m_picHeightInMb == I_TYPE || (m_frameNum && m_lastPictureCodingType == I_TYPE))
3524 {
3525 // This is the flag to notify kernel not to use the history buffer
3526 curbe.LastFrameIsIntra = true;
3527 }
3528 else
3529 {
3530 curbe.LastFrameIsIntra = false;
3531 }
3532
3533 curbe.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
3534 curbe.TemporalMvpEnableFlag = m_hevcSliceParams->slice_temporal_mvp_enable_flag;
3535 curbe.CollocatedFromL0Flag = m_hevcSliceParams->collocated_from_l0_flag;
3536 curbe.theSameRefList = m_sameRefList;
3537 curbe.IsLowDelay = m_lowDelay;
3538 curbe.MaxNumMergeCand = m_hevcSliceParams->MaxNumMergeCand;
3539 curbe.NumRefIdxL0 = m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1;
3540 curbe.NumRefIdxL1 = m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1;
3541
3542 if (m_hevcSeqParams->TargetUsage == 1)
3543 {
3544 // MaxNumMergeCand C Model uses 4 for TU1,
3545 // for quality consideration, make sure not larger than the value from App as it will be used in PAK
3546 curbe.MaxNumMergeCand = MOS_MIN(m_hevcSliceParams->MaxNumMergeCand, 4);
3547 }
3548 else
3549 {
3550 // MaxNumMergeCand C Model uses 2 for TU4 and TU7,
3551 // for quality consideration, make sure not larger than the value from App as it will be used in PAK
3552 curbe.MaxNumMergeCand = MOS_MIN(m_hevcSliceParams->MaxNumMergeCand, 2);
3553 }
3554
3555 int32_t tbRefListL0[CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10] = {0}, tbRefListL1[CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L1_REF_G10] = {0};
3556 curbe.FwdPocNumber_L0_mTb_0 = tbRefListL0[0] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][0]);
3557 curbe.BwdPocNumber_L1_mTb_0 = tbRefListL1[0] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][0]);
3558 curbe.FwdPocNumber_L0_mTb_1 = tbRefListL0[1] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][1]);
3559 curbe.BwdPocNumber_L1_mTb_1 = tbRefListL1[1] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][1]);
3560
3561 curbe.FwdPocNumber_L0_mTb_2 = tbRefListL0[2] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][2]);
3562 curbe.BwdPocNumber_L1_mTb_2 = tbRefListL1[2] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][2]);
3563 curbe.FwdPocNumber_L0_mTb_3 = tbRefListL0[3] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][3]);
3564 curbe.BwdPocNumber_L1_mTb_3 = tbRefListL1[3] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][3]);
3565
3566 curbe.RefFrameWinHeight = m_frameHeight;
3567 curbe.RefFrameWinWidth = m_frameWidth;
3568
3569 // Hard coding for now from Gen10HEVC_TU4_default.par
3570 curbe.RoundingInter = (m_roundingInter + 1) << 4; // Should be an input from par(slice state)
3571 curbe.RoundingIntra = (m_roundingIntra + 1) << 4; // Should be an input from par(slice state)
3572 curbe.RDEQuantRoundValue = (m_roundingInter + 1) << 4;
3573
3574 uint32_t gopP = (m_hevcSeqParams->GopRefDist) ? ((m_hevcSeqParams->GopPicSize - 1) / m_hevcSeqParams->GopRefDist) : 0;
3575 uint32_t gopB = m_hevcSeqParams->GopPicSize - 1 - gopP;
3576
3577 curbe.CostScalingForRA = 1; // default setting
3578
3579 // get the min distance between current pic and ref pics
3580 uint32_t minPocDist = 255;
3581 uint32_t costTableIndex = 0;
3582 if (curbe.CostScalingForRA == 1)
3583 {
3584 for (uint8_t ref = 0; ref < curbe.NumRefIdxL0; ref++)
3585 {
3586 if ((uint32_t)abs(tbRefListL0[ref]) < minPocDist)
3587 minPocDist = abs(tbRefListL0[ref]);
3588 }
3589 for (uint8_t ref = 0; ref < curbe.NumRefIdxL1; ref++)
3590 {
3591 if ((uint32_t)abs(tbRefListL1[ref]) < minPocDist)
3592 minPocDist = abs(tbRefListL1[ref]);
3593 }
3594
3595 if (gopB == 4)
3596 {
3597 if (minPocDist == 1 || minPocDist == 2 || minPocDist == 4)
3598 costTableIndex = minPocDist;
3599 }
3600 if (gopB == 8)
3601 {
3602 if (minPocDist == 1 || minPocDist == 2 || minPocDist == 4 || minPocDist == 8)
3603 costTableIndex = minPocDist + 3;
3604 }
3605 }
3606
3607 curbe.CostTableIndex = costTableIndex;
3608
3609 // the following fields are needed by the new optimized kernel in v052417
3610 curbe.Log2ParallelMergeLevel = m_hevcPicParams->log2_parallel_merge_level_minus2 + 2;
3611 curbe.MaxIntraRdeIter = 1;
3612 curbe.CornerNeighborPixel = 0;
3613 curbe.IntraNeighborAvailFlags = 0;
3614 curbe.SubPelMode = 3; // qual-pel search
3615 curbe.InterSADMeasure = 2; // Haar transform
3616 curbe.IntraSADMeasure = 2; // Haar transform
3617 curbe.IntraPrediction = 0; // enable 32x32, 16x16, and 8x8 luma intra prediction
3618 curbe.RefIDCostMode = 1; // 0: AVC and 1: linear method
3619 curbe.TUBasedCostSetting = 0;
3620 curbe.ConcurrentGroupNum = m_numberConcurrentGroup;
3621 curbe.WaveFrontSplitVQFix = ((1 << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)) == 64) ? 1 : 0;
3622 curbe.NumofUnitInWaveFront = m_numWavefrontInOneRegion;
3623 curbe.LoadBalenceEnable = 0; // when this flag is false, kernel does not use LoadBalance (or MBENC_B_FRAME_CONCURRENT_TG_DATA) buffe
3624 curbe.ThreadNumber = MOS_MIN(2, m_numberEncKernelSubThread);
3625 curbe.Pic_init_qp_B = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
3626 curbe.Pic_init_qp_P = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
3627 curbe.Pic_init_qp_I = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
3628 curbe.WaveFrontSplitsEnable = (m_numberConcurrentGroup == 1) ? false : true;
3629 curbe.SuperHME = m_16xMeSupported;
3630 curbe.UltraHME = m_32xMeSupported;
3631 curbe.PerBFrameQPOffset = 0;
3632
3633 switch (m_hevcSeqParams->TargetUsage)
3634 {
3635 case 1:
3636 curbe.Degree45 = 0;
3637 curbe.Break12Dependency = 0;
3638 break;
3639 case 4:
3640 default:
3641 curbe.Degree45 = 1;
3642 curbe.Break12Dependency = 1;
3643 break;
3644 }
3645
3646 curbe.LongTermReferenceFlags_L0 = 0;
3647 for (uint32_t i = 0; i < curbe.NumRefIdxL0; i++)
3648 {
3649 curbe.LongTermReferenceFlags_L0 |= (m_hevcSliceParams->RefPicList[0][i].PicFlags & PICTURE_LONG_TERM_REFERENCE) << i;
3650 }
3651 curbe.LongTermReferenceFlags_L1 = 0;
3652 for (uint32_t i = 0; i < curbe.NumRefIdxL1; i++)
3653 {
3654 curbe.LongTermReferenceFlags_L1 |= (m_hevcSliceParams->RefPicList[1][i].PicFlags & PICTURE_LONG_TERM_REFERENCE) << i;
3655 }
3656
3657 curbe.Stepping = 0;
3658 curbe.Cu64SkipCheckOnly = 0;
3659 curbe.Cu642Nx2NCheckOnly = 0;
3660 curbe.EnableCu64AmpCheck = 1;
3661 curbe.IntraSpeedMode = 0; // 35 mode
3662 curbe.DisableIntraNxN = 0;
3663
3664 if (m_hwInterface->GetPlatform().usRevId == 0)
3665 {
3666 curbe.Stepping = 1;
3667 curbe.TUDepthControl = 1;
3668 curbe.MaxTransformDepthInter = 1;
3669 curbe.MaxTransformDepthIntra = 0;
3670 //buf->curbe.EnableCu64Check = 1;
3671 curbe.Cu64SkipCheckOnly = 0;
3672 curbe.Cu642Nx2NCheckOnly = 1;
3673 curbe.EnableCu64AmpCheck = 0;
3674 curbe.IntraSpeedMode = 0; // 35 mode
3675 curbe.DisableIntraNxN = 1;
3676 curbe.MaxNumMergeCand = 1;
3677 }
3678
3679 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3680 lockFlags.WriteOnly = 1;
3681 auto buf = (PMBENC_COMBINED_BUFFER1)m_osInterface->pfnLockResource(
3682 m_osInterface,
3683 &m_encBCombinedBuffer1[curIdx].sResource,
3684 &lockFlags);
3685 CODECHAL_ENCODE_CHK_NULL_RETURN(buf);
3686
3687 if (curbe.Degree45)
3688 {
3689 MOS_ZeroMemory(&buf->concurrent, sizeof(buf->concurrent));
3690 }
3691 buf->Curbe = curbe;
3692
3693 m_osInterface->pfnUnlockResource(
3694 m_osInterface,
3695 &m_encBCombinedBuffer1[curIdx].sResource);
3696
3697 // clean-up the thread dependency buffer in the second combined buffer
3698 if (m_numberEncKernelSubThread > 1)
3699 {
3700 MOS_LOCK_PARAMS lockFlags;
3701
3702 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3703 lockFlags.WriteOnly = 1;
3704 auto data = (uint8_t *)m_osInterface->pfnLockResource(
3705 m_osInterface,
3706 &m_encBCombinedBuffer2[curIdx].sResource,
3707 &lockFlags);
3708 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
3709
3710 MOS_ZeroMemory(&data[m_threadTaskBufferOffset], m_threadTaskBufferSize);
3711
3712 m_osInterface->pfnUnlockResource(
3713 m_osInterface,
3714 &m_encBCombinedBuffer2[curIdx].sResource);
3715 }
3716
3717 if (m_initEncConstTable)
3718 {
3719 // Initialize the Enc Constant Table surface
3720 MOS_LOCK_PARAMS lockFlags;
3721 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3722 lockFlags.WriteOnly = 1;
3723
3724 auto data = (uint8_t *)m_osInterface->pfnLockResource(
3725 m_osInterface,
3726 &m_encConstantTableForB.sResource,
3727 &lockFlags);
3728 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
3729
3730 if (m_isMaxLcu64)
3731 {
3732 MOS_SecureMemcpy(data, m_encConstantTableForB.dwSize, (const void *)m_encLcu64ConstantDataLut, sizeof(m_encLcu64ConstantDataLut));
3733 }
3734 else
3735 {
3736 MOS_SecureMemcpy(data, m_encConstantTableForB.dwSize, (const void *)m_encLcu32ConstantDataLut, sizeof(m_encLcu32ConstantDataLut));
3737 }
3738
3739 m_osInterface->pfnUnlockResource(
3740 m_osInterface,
3741 &m_encConstantTableForB.sResource);
3742 m_initEncConstTable = false;
3743 }
3744
3745 // binding table index
3746 MBENC_COMBINED_BTI params;
3747 if (m_isMaxLcu64)
3748 {
3749 for (uint32_t i = 0; i < MAX_MULTI_FRAME_NUMBER; i++)
3750 {
3751 params.BTI_LCU64.Combined1DSurIndexMF1[i] = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER1;
3752 params.BTI_LCU64.Combined1DSurIndexMF2[i] = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER2;
3753 params.BTI_LCU64.VMEInterPredictionSurfIndexMF[i] = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0;
3754 params.BTI_LCU64.SrcSurfIndexMF[i] = MBENC_B_FRAME_CURR_Y;
3755 params.BTI_LCU64.SrcReconSurfIndexMF[i] = MBENC_B_FRAME_CURR_Y_WITH_RECON_BOUNDARY_PIX;
3756 params.BTI_LCU64.CURecordSurfIndexMF[i] = MBENC_B_FRAME_ENC_CU_RECORD;
3757 params.BTI_LCU64.PAKObjectSurfIndexMF[i] = MBENC_B_FRAME_PAK_OBJ;
3758 params.BTI_LCU64.CUPacketSurfIndexMF[i] = MBENC_B_FRAME_PAK_CU_RECORD;
3759 params.BTI_LCU64.SWScoreBoardSurfIndexMF[i] = MBENC_B_FRAME_SW_SCOREBOARD;
3760 params.BTI_LCU64.QPCU16SurfIndexMF[i] = MBENC_B_FRAME_CU_QP_DATA;
3761 params.BTI_LCU64.LCULevelDataSurfIndexMF[i] = MBENC_B_FRAME_LCU_LEVEL_DATA_INPUT;
3762 params.BTI_LCU64.TemporalMVSurfIndexMF[i] = MBENC_B_FRAME_COLOCATED_CU_MV_DATA;
3763 params.BTI_LCU64.HmeDataSurfIndexMF[i] = MBENC_B_FRAME_HME_MOTION_PREDICTOR_DATA;
3764 params.BTI_LCU64.VME2XInterPredictionSurfIndexMF[i] = MBENC_B_FRAME_VME_PRED_FOR_2X_DS_CURR;
3765 }
3766 params.BTI_LCU64.DebugSurfIndexMF[0] = MBENC_B_FRAME_DEBUG_SURFACE;
3767 params.BTI_LCU64.DebugSurfIndexMF[1] = MBENC_B_FRAME_DEBUG_SURFACE1;
3768 params.BTI_LCU64.DebugSurfIndexMF[2] = MBENC_B_FRAME_DEBUG_SURFACE2;
3769 params.BTI_LCU64.DebugSurfIndexMF[3] = MBENC_B_FRAME_DEBUG_SURFACE3;
3770 params.BTI_LCU64.HEVCCnstLutSurfIndex = MBENC_B_FRAME_ENC_CONST_TABLE;
3771 params.BTI_LCU64.LoadBalenceSurfIndex = MBENC_B_FRAME_CONCURRENT_TG_DATA;
3772 }
3773 else
3774 {
3775 for (uint32_t i = 0; i < MAX_MULTI_FRAME_NUMBER; i++)
3776 {
3777 params.BTI_LCU32.Combined1DSurIndexMF1[i] = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER1;
3778 params.BTI_LCU32.Combined1DSurIndexMF2[i] = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER2;
3779 params.BTI_LCU32.VMEInterPredictionSurfIndexMF[i] = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0;
3780 params.BTI_LCU32.SrcSurfIndexMF[i] = MBENC_B_FRAME_CURR_Y;
3781 params.BTI_LCU32.SrcReconSurfIndexMF[i] = MBENC_B_FRAME_CURR_Y_WITH_RECON_BOUNDARY_PIX;
3782 params.BTI_LCU32.CURecordSurfIndexMF[i] = MBENC_B_FRAME_ENC_CU_RECORD;
3783 params.BTI_LCU32.PAKObjectSurfIndexMF[i] = MBENC_B_FRAME_PAK_OBJ;
3784 params.BTI_LCU32.CUPacketSurfIndexMF[i] = MBENC_B_FRAME_PAK_CU_RECORD;
3785 params.BTI_LCU32.SWScoreBoardSurfIndexMF[i] = MBENC_B_FRAME_SW_SCOREBOARD;
3786 params.BTI_LCU32.QPCU16SurfIndexMF[i] = MBENC_B_FRAME_CU_QP_DATA;
3787 params.BTI_LCU32.LCULevelDataSurfIndexMF[i] = MBENC_B_FRAME_LCU_LEVEL_DATA_INPUT;
3788 params.BTI_LCU32.TemporalMVSurfIndexMF[i] = MBENC_B_FRAME_COLOCATED_CU_MV_DATA;
3789 params.BTI_LCU32.HmeDataSurfIndexMF[i] = MBENC_B_FRAME_HME_MOTION_PREDICTOR_DATA;
3790 }
3791 params.BTI_LCU32.DebugSurfIndexMF[0] = MBENC_B_FRAME_DEBUG_SURFACE;
3792 params.BTI_LCU32.DebugSurfIndexMF[1] = MBENC_B_FRAME_DEBUG_SURFACE1;
3793 params.BTI_LCU32.DebugSurfIndexMF[2] = MBENC_B_FRAME_DEBUG_SURFACE2;
3794 params.BTI_LCU32.DebugSurfIndexMF[3] = MBENC_B_FRAME_DEBUG_SURFACE3;
3795 params.BTI_LCU32.HEVCCnstLutSurfIndex = MBENC_B_FRAME_ENC_CONST_TABLE;
3796 params.BTI_LCU32.LoadBalenceSurfIndex = MBENC_B_FRAME_CONCURRENT_TG_DATA;
3797 }
3798
3799 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
3800 PMHW_KERNEL_STATE kernelState = m_isMaxLcu64 ? &m_mbEncKernelStates[MBENC_LCU64_KRNIDX] : &m_mbEncKernelStates[MBENC_LCU32_KRNIDX];
3801 CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
3802 ¶ms,
3803 kernelState->dwCurbeOffset,
3804 sizeof(params)));
3805
3806 return eStatus;
3807 }
3808
3809 // ------------------------------------------------------------------------------
3810 //| Purpose: Setup curbe for HEVC BrcInitReset Kernel
3811 //| Return: N/A
3812 //------------------------------------------------------------------------------
SetCurbeBrcInitReset(CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx)3813 MOS_STATUS CodechalEncHevcStateG12::SetCurbeBrcInitReset(
3814 CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx)
3815 {
3816 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3817
3818 CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
3819
3820 if (brcKrnIdx != CODECHAL_HEVC_BRC_INIT && brcKrnIdx != CODECHAL_HEVC_BRC_RESET)
3821 {
3822 CODECHAL_ENCODE_ASSERTMESSAGE("Brc kernel requested is not init or reset\n");
3823 return MOS_STATUS_INVALID_PARAMETER;
3824 }
3825
3826 // Initialize the CURBE data
3827 BRC_INITRESET_CURBE curbe = m_brcInitResetCurbeInit;
3828
3829 uint32_t profileLevelMaxFrame = GetProfileLevelMaxFrameSize();
3830
3831 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR ||
3832 m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR ||
3833 m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
3834 {
3835 if (m_hevcSeqParams->InitVBVBufferFullnessInBit == 0)
3836 {
3837 CODECHAL_ENCODE_ASSERTMESSAGE("Initial VBV Buffer Fullness is zero\n");
3838 return MOS_STATUS_INVALID_PARAMETER;
3839 }
3840
3841 if (m_hevcSeqParams->VBVBufferSizeInBit == 0)
3842 {
3843 CODECHAL_ENCODE_ASSERTMESSAGE("VBV buffer size in bits is zero\n");
3844 return MOS_STATUS_INVALID_PARAMETER;
3845 }
3846 }
3847
3848 curbe.DW0_ProfileLevelMaxFrame = profileLevelMaxFrame;
3849 curbe.DW1_InitBufFull = m_hevcSeqParams->InitVBVBufferFullnessInBit;
3850 curbe.DW2_BufSize = m_hevcSeqParams->VBVBufferSizeInBit;
3851 curbe.DW3_TargetBitRate = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS; //DDI in Kbits
3852 curbe.DW4_MaximumBitRate = m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS;
3853 curbe.DW5_MinimumBitRate = 0;
3854 curbe.DW6_FrameRateM = m_hevcSeqParams->FrameRate.Numerator;
3855 curbe.DW7_FrameRateD = m_hevcSeqParams->FrameRate.Denominator;
3856 curbe.DW8_BRCFlag = BRCINIT_IGNORE_PICTURE_HEADER_SIZE; // always ignore the picture header size set in BRC Update curbe;
3857
3858 if (m_hevcPicParams->NumROI)
3859 {
3860 curbe.DW8_BRCFlag |= BRCINIT_DISABLE_MBBRC; // BRC ROI need disable MBBRC logic in LcuBrc Kernel
3861 }
3862 else
3863 {
3864 curbe.DW8_BRCFlag |= (m_lcuBrcEnabled) ? 0 : BRCINIT_DISABLE_MBBRC;
3865 }
3866
3867 curbe.DW8_BRCFlag |= (m_brcEnabled && m_numPipe > 1) ? BRCINIT_USEHUCBRC : 0;
3868 // For non-ICQ, ACQP Buffer always set to 1
3869 curbe.DW25_ACQPBuffer = 1;
3870 curbe.DW25_SlidingWindowSize = m_slidingWindowSize;
3871
3872 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR)
3873 {
3874 curbe.DW4_MaximumBitRate = curbe.DW3_TargetBitRate;
3875 curbe.DW8_BRCFlag |= BRCINIT_ISCBR;
3876 }
3877 else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR)
3878 {
3879 if (curbe.DW4_MaximumBitRate < curbe.DW3_TargetBitRate)
3880 {
3881 curbe.DW4_MaximumBitRate = 2 * curbe.DW3_TargetBitRate;
3882 }
3883 curbe.DW8_BRCFlag |= BRCINIT_ISVBR;
3884 }
3885 else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
3886 {
3887 curbe.DW8_BRCFlag |= BRCINIT_ISAVBR;
3888 // For AVBR, max bitrate = target bitrate,
3889 curbe.DW3_TargetBitRate = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS; //DDI in Kbits
3890 curbe.DW4_MaximumBitRate = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
3891 }
3892 else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_ICQ)
3893 {
3894 curbe.DW8_BRCFlag |= BRCINIT_ISICQ;
3895 curbe.DW25_ACQPBuffer = m_hevcSeqParams->ICQQualityFactor;
3896 }
3897 else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_VCM)
3898 {
3899 curbe.DW4_MaximumBitRate = curbe.DW3_TargetBitRate;
3900 curbe.DW8_BRCFlag |= BRCINIT_ISVCM;
3901 }
3902 else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
3903 {
3904 curbe.DW8_BRCFlag = BRCINIT_ISCQP;
3905 }
3906 else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_QVBR)
3907 {
3908 if (curbe.DW4_MaximumBitRate < curbe.DW3_TargetBitRate)
3909 {
3910 curbe.DW4_MaximumBitRate = curbe.DW3_TargetBitRate; // Use max bit rate for HRD compliance
3911 }
3912 curbe.DW8_BRCFlag = curbe.DW8_BRCFlag | BRCINIT_ISQVBR | BRCINIT_ISVBR; // We need to make sure that VBR is used for QP determination.
3913 // use ICQQualityFactor to determine the larger Qp for each MB
3914 curbe.DW25_ACQPBuffer = m_hevcSeqParams->ICQQualityFactor;
3915 }
3916 curbe.DW9_FrameWidth = m_oriFrameWidth;
3917 curbe.DW10_FrameHeight = m_oriFrameHeight;
3918 curbe.DW10_AVBRAccuracy = m_usAvbrAccuracy;
3919 curbe.DW11_AVBRConvergence = m_usAvbrConvergence;
3920 curbe.DW12_NumberSlice = m_numSlices;
3921
3922 /**********************************************************************
3923 In case of non-HB/BPyramid Structure
3924 BRC_Param_A = GopP
3925 BRC_Param_B = GopB
3926 In case of HB/BPyramid GOP Structure
3927 BRC_Param_A, BRC_Param_B, BRC_Param_C, BRC_Param_D are
3928 BRC Parameters set as follows as per CModel equation
3929 ***********************************************************************/
3930 // BPyramid GOP
3931 if (m_HierchGopBRCEnabled)
3932 {
3933 curbe.DW8_BRCGopP = ((m_hevcSeqParams->GopPicSize + m_hevcSeqParams->GopRefDist - 1) / m_hevcSeqParams->GopRefDist);
3934 curbe.DW9_BRCGopB = curbe.DW8_BRCGopP;
3935 curbe.DW13_BRCGopB1 = curbe.DW8_BRCGopP * 2;
3936 curbe.DW14_BRCGopB2 = ((m_hevcSeqParams->GopPicSize) - (curbe.DW8_BRCGopP) - (curbe.DW13_BRCGopB1) - (curbe.DW9_BRCGopB));
3937 // B1 Level GOP
3938 if (m_hevcSeqParams->GopRefDist <= 4 || curbe.DW14_BRCGopB2 == 0)
3939 {
3940 curbe.DW14_MaxBRCLevel = 3;
3941 }
3942 // B2 Level GOP
3943 else
3944 {
3945 curbe.DW14_MaxBRCLevel = 4;
3946 }
3947 }
3948 // For Regular GOP - No BPyramid
3949 else
3950 {
3951 curbe.DW14_MaxBRCLevel = 1;
3952 curbe.DW8_BRCGopP = (m_hevcSeqParams->GopRefDist) ? ((m_hevcSeqParams->GopPicSize - 1) / m_hevcSeqParams->GopRefDist) : 0;
3953 curbe.DW9_BRCGopB = m_hevcSeqParams->GopPicSize - 1 - curbe.DW8_BRCGopP;
3954 }
3955
3956 // Set dynamic thresholds
3957 double inputBitsPerFrame = (double)((double)curbe.DW4_MaximumBitRate * (double)curbe.DW7_FrameRateD);
3958 inputBitsPerFrame = (double)(inputBitsPerFrame / curbe.DW6_FrameRateM);
3959
3960 if (curbe.DW2_BufSize < (uint32_t)inputBitsPerFrame * 4)
3961 {
3962 curbe.DW2_BufSize = (uint32_t)inputBitsPerFrame * 4;
3963 }
3964
3965 if (curbe.DW1_InitBufFull == 0)
3966 {
3967 curbe.DW1_InitBufFull = 7 * curbe.DW2_BufSize / 8;
3968 }
3969 if (curbe.DW1_InitBufFull < (uint32_t)(inputBitsPerFrame * 2))
3970 {
3971 curbe.DW1_InitBufFull = (uint32_t)(inputBitsPerFrame * 2);
3972 }
3973 if (curbe.DW1_InitBufFull > curbe.DW2_BufSize)
3974 {
3975 curbe.DW1_InitBufFull = curbe.DW2_BufSize;
3976 }
3977
3978 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
3979 {
3980 // For AVBR, Buffer size = 2*Bitrate, InitVBV = 0.75 * BufferSize
3981 curbe.DW2_BufSize = 2 * m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
3982 curbe.DW1_InitBufFull = (uint32_t)(0.75 * curbe.DW2_BufSize);
3983 }
3984
3985 if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)
3986 {
3987 curbe.DW15_LongTermInterval = 0; // no LTR for low delay brc
3988 }
3989 else
3990 {
3991 curbe.DW15_LongTermInterval = (m_enableBrcLTR && m_ltrInterval) ? m_ltrInterval : m_enableBrcLTR ? HEVC_BRC_LONG_TERM_REFRENCE_FLAG : 0;
3992 }
3993
3994 double bpsRatio = ((double)inputBitsPerFrame / ((double)(curbe.DW2_BufSize) / 30));
3995 bpsRatio = (bpsRatio < 0.1) ? 0.1 : (bpsRatio > 3.5) ? 3.5 : bpsRatio;
3996
3997 curbe.DW19_DeviationThreshold0_PBframe = (uint32_t)(-50 * pow(0.90, bpsRatio));
3998 curbe.DW19_DeviationThreshold1_PBframe = (uint32_t)(-50 * pow(0.66, bpsRatio));
3999 curbe.DW19_DeviationThreshold2_PBframe = (uint32_t)(-50 * pow(0.46, bpsRatio));
4000 curbe.DW19_DeviationThreshold3_PBframe = (uint32_t)(-50 * pow(0.3, bpsRatio));
4001
4002 curbe.DW20_DeviationThreshold4_PBframe = (uint32_t)(50 * pow(0.3, bpsRatio));
4003 curbe.DW20_DeviationThreshold5_PBframe = (uint32_t)(50 * pow(0.46, bpsRatio));
4004 curbe.DW20_DeviationThreshold6_PBframe = (uint32_t)(50 * pow(0.7, bpsRatio));
4005 curbe.DW20_DeviationThreshold7_PBframe = (uint32_t)(50 * pow(0.9, bpsRatio));
4006
4007 curbe.DW21_DeviationThreshold0_VBRcontrol = (uint32_t)(-50 * pow(0.9, bpsRatio));
4008 curbe.DW21_DeviationThreshold1_VBRcontrol = (uint32_t)(-50 * pow(0.7, bpsRatio));
4009 curbe.DW21_DeviationThreshold2_VBRcontrol = (uint32_t)(-50 * pow(0.5, bpsRatio));
4010 curbe.DW21_DeviationThreshold3_VBRcontrol = (uint32_t)(-50 * pow(0.3, bpsRatio));
4011
4012 curbe.DW22_DeviationThreshold4_VBRcontrol = (uint32_t)(100 * pow(0.4, bpsRatio));
4013 curbe.DW22_DeviationThreshold5_VBRcontrol = (uint32_t)(100 * pow(0.5, bpsRatio));
4014 curbe.DW22_DeviationThreshold6_VBRcontrol = (uint32_t)(100 * pow(0.75, bpsRatio));
4015 curbe.DW22_DeviationThreshold7_VBRcontrol = (uint32_t)(100 * pow(0.9, bpsRatio));
4016
4017 curbe.DW23_DeviationThreshold0_Iframe = (uint32_t)(-50 * pow(0.8, bpsRatio));
4018 curbe.DW23_DeviationThreshold1_Iframe = (uint32_t)(-50 * pow(0.6, bpsRatio));
4019 curbe.DW23_DeviationThreshold2_Iframe = (uint32_t)(-50 * pow(0.34, bpsRatio));
4020 curbe.DW23_DeviationThreshold3_Iframe = (uint32_t)(-50 * pow(0.2, bpsRatio));
4021
4022 curbe.DW24_DeviationThreshold4_Iframe = (uint32_t)(50 * pow(0.2, bpsRatio));
4023 curbe.DW24_DeviationThreshold5_Iframe = (uint32_t)(50 * pow(0.4, bpsRatio));
4024 curbe.DW24_DeviationThreshold6_Iframe = (uint32_t)(50 * pow(0.66, bpsRatio));
4025 curbe.DW24_DeviationThreshold7_Iframe = (uint32_t)(50 * pow(0.9, bpsRatio));
4026
4027 if (m_hevcSeqParams->HierarchicalFlag && !m_hevcSeqParams->LowDelayMode &&
4028 (m_hevcSeqParams->GopRefDist == 4 || m_hevcSeqParams->GopRefDist == 8))
4029 {
4030 curbe.DW26_RandomAccess = true;
4031 }
4032 else
4033 {
4034 curbe.DW26_RandomAccess = false;
4035 }
4036
4037 if (m_brcInit)
4038 {
4039 m_dBrcInitCurrentTargetBufFullInBits = curbe.DW1_InitBufFull;
4040 }
4041
4042 m_brcInitResetBufSizeInBits = curbe.DW2_BufSize;
4043 m_dBrcInitResetInputBitsPerFrame = inputBitsPerFrame;
4044
4045 PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
4046 CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
4047 &curbe,
4048 kernelState->dwCurbeOffset,
4049 sizeof(curbe)));
4050
4051 return eStatus;
4052 }
4053
4054 // ------------------------------------------------------------------------------
4055 //| Purpose: Setup curbe for HEVC BrcUpdate Kernel
4056 //| Return: N/A
4057 //------------------------------------------------------------------------------
SetCurbeBrcUpdate(CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx)4058 MOS_STATUS CodechalEncHevcStateG12::SetCurbeBrcUpdate(
4059 CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx)
4060 {
4061 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4062
4063 if (brcKrnIdx != CODECHAL_HEVC_BRC_FRAME_UPDATE && brcKrnIdx != CODECHAL_HEVC_BRC_LCU_UPDATE)
4064 {
4065 CODECHAL_ENCODE_ASSERTMESSAGE("Brc kernel requested is not frame update or LCU update\n");
4066 return MOS_STATUS_INVALID_PARAMETER;
4067 }
4068
4069 CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
4070
4071 // Initialize the CURBE data
4072 BRCUPDATE_CURBE curbe = m_brcUpdateCurbeInit;
4073
4074 curbe.DW5_TargetSize_Flag = 0;
4075
4076 if (m_dBrcInitCurrentTargetBufFullInBits > (double)m_brcInitResetBufSizeInBits)
4077 {
4078 m_dBrcInitCurrentTargetBufFullInBits -= (double)m_brcInitResetBufSizeInBits;
4079 curbe.DW5_TargetSize_Flag = 1;
4080 }
4081
4082 if (m_numSkipFrames)
4083 {
4084 // pass num/size of skipped frames to update BRC
4085 curbe.DW6_NumSkippedFrames = m_numSkipFrames;
4086 curbe.DW15_SizeOfSkippedFrames = m_sizeSkipFrames;
4087
4088 // account for skipped frame in calculating CurrentTargetBufFullInBits
4089 m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame * m_numSkipFrames;
4090 }
4091
4092 curbe.DW0_TargetSize = (uint32_t)(m_dBrcInitCurrentTargetBufFullInBits);
4093 curbe.DW1_FrameNumber = m_storeData - 1; // Check if we can remove this (set to 0)
4094
4095 // BRC PAK statistic buffer from last frame, the encoded size includes header already.
4096 // in BRC Initreset kernel, curbe DW8_BRCFlag will always ignore picture header size, so no need to set picture header size here.
4097 curbe.DW2_PictureHeaderSize = 0;
4098 curbe.DW5_CurrFrameBrcLevel = m_currFrameBrcLevel;
4099 curbe.DW5_MaxNumPAKs = m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses();
4100
4101 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
4102 {
4103 curbe.DW6_CqpValue = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
4104 }
4105 if (m_hevcPicParams->NumROI)
4106 {
4107 curbe.DW6_ROIEnable = m_brcEnabled ? false : true;
4108 curbe.DW6_BRCROIEnable = m_brcEnabled ? true : false;
4109 curbe.DW6_RoiRatio = CalculateROIRatio();
4110 }
4111 curbe.DW6_SlidingWindowEnable = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_LOW);
4112
4113 //for low delay brc
4114 curbe.DW6_LowDelayEnable = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW);
4115 curbe.DW16_UserMaxFrameSize = GetProfileLevelMaxFrameSize();
4116 curbe.DW14_ParallelMode = m_hevcSeqParams->ParallelBRC;
4117
4118 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
4119 {
4120 curbe.DW3_StartGAdjFrame0 = (uint32_t)((10 * m_usAvbrConvergence) / (double)150);
4121 curbe.DW3_StartGAdjFrame1 = (uint32_t)((50 * m_usAvbrConvergence) / (double)150);
4122 curbe.DW4_StartGAdjFrame2 = (uint32_t)((100 * m_usAvbrConvergence) / (double)150);
4123 curbe.DW4_StartGAdjFrame3 = (uint32_t)((150 * m_usAvbrConvergence) / (double)150);
4124
4125 curbe.DW11_gRateRatioThreshold0 =
4126 (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 40)));
4127 curbe.DW11_gRateRatioThreshold1 =
4128 (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 75)));
4129 curbe.DW12_gRateRatioThreshold2 = (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 97)));
4130 curbe.DW12_gRateRatioThreshold3 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (103 - 100)));
4131 curbe.DW12_gRateRatioThreshold4 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (125 - 100)));
4132 curbe.DW12_gRateRatioThreshold5 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (160 - 100)));
4133 }
4134
4135 if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)
4136 {
4137 curbe.DW17_LongTerm_Current = 0; // no LTR for low delay brc
4138 }
4139 else
4140 {
4141 m_isFrameLTR = (CodecHal_PictureIsLongTermRef(m_currReconstructedPic));
4142 curbe.DW17_LongTerm_Current = (m_enableBrcLTR && m_isFrameLTR) ? 1 : 0;
4143 }
4144
4145 PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
4146 CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
4147 &curbe,
4148 kernelState->dwCurbeOffset,
4149 sizeof(curbe)));
4150
4151 return eStatus;
4152 }
4153
SendMbEncSurfacesIKernel(PMOS_COMMAND_BUFFER cmdBuffer)4154 MOS_STATUS CodechalEncHevcStateG12::SendMbEncSurfacesIKernel(
4155 PMOS_COMMAND_BUFFER cmdBuffer)
4156 {
4157 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4158
4159 uint32_t startBTI = 0, mbenc_I_KRNIDX = MBENC_LCU32_KRNIDX;
4160 CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
4161 PMOS_SURFACE inputSurface = m_rawSurfaceToEnc;
4162 PMHW_KERNEL_STATE kernelState = &m_mbEncKernelStates[mbenc_I_KRNIDX];
4163 PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_mbEncKernelBindingTable[mbenc_I_KRNIDX];
4164
4165 // Combined 1D buffer 1, which contains regular kernel curbe and concurrent map
4166 startBTI = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER1;
4167 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4168 &surfaceCodecParams,
4169 &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource,
4170 m_encBCombinedBuffer1[m_currRecycledBufIdx].dwSize,
4171 0,
4172 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4173 bindingTable->dwBindingTableEntries[startBTI++],
4174 false));
4175
4176 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4177 m_hwInterface,
4178 cmdBuffer,
4179 &surfaceCodecParams,
4180 kernelState));
4181
4182 CODECHAL_DEBUG_TOOL(
4183 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4184 &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource,
4185 CodechalDbgAttr::attrOutput,
4186 "Hevc_CombinedBuffer1",
4187 m_encBCombinedBuffer1[m_currRecycledBufIdx].dwSize,
4188 0,
4189 CODECHAL_MEDIA_STATE_HEVC_I_MBENC)););
4190
4191 // VME surfaces
4192 startBTI = 0;
4193 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4194 &surfaceCodecParams,
4195 inputSurface,
4196 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4197 bindingTable->dwBindingTableEntries[startBTI++]));
4198
4199 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4200 m_hwInterface,
4201 cmdBuffer,
4202 &surfaceCodecParams,
4203 kernelState));
4204
4205 // Programming dummy surfaces even if not used (VME requirement), currently setting to input surface
4206 for (int32_t surface_idx = 0; surface_idx < 8; surface_idx++)
4207 {
4208 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4209 &surfaceCodecParams,
4210 inputSurface,
4211 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4212 bindingTable->dwBindingTableEntries[startBTI++]));
4213
4214 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4215 m_hwInterface,
4216 cmdBuffer,
4217 &surfaceCodecParams,
4218 kernelState));
4219 }
4220
4221 //Source Y and UV
4222 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4223 &surfaceCodecParams,
4224 inputSurface,
4225 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4226 bindingTable->dwBindingTableEntries[startBTI++],
4227 m_verticalLineStride,
4228 false));
4229
4230 surfaceCodecParams.bUseUVPlane = true;
4231
4232 surfaceCodecParams.dwUVBindingTableOffset = bindingTable->dwBindingTableEntries[startBTI++];
4233 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4234 m_hwInterface,
4235 cmdBuffer,
4236 &surfaceCodecParams,
4237 kernelState));
4238
4239 CODECHAL_DEBUG_TOOL(
4240 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4241 inputSurface,
4242 CodechalDbgAttr::attrEncodeRawInputSurface,
4243 "MbEnc_Input_SrcSurf")));
4244 // Current Y with reconstructed boundary pixels
4245 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4246 &surfaceCodecParams,
4247 &m_currPicWithReconBoundaryPix,
4248 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4249 bindingTable->dwBindingTableEntries[startBTI++],
4250 m_verticalLineStride,
4251 true));
4252
4253 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4254 m_hwInterface,
4255 cmdBuffer,
4256 &surfaceCodecParams,
4257 kernelState));
4258
4259 // Enc CU Record
4260 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4261 &surfaceCodecParams,
4262 &m_intermediateCuRecordSurfaceLcu32,
4263 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4264 bindingTable->dwBindingTableEntries[startBTI++],
4265 m_verticalLineStride,
4266 true));
4267
4268 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4269 m_hwInterface,
4270 cmdBuffer,
4271 &surfaceCodecParams,
4272 kernelState));
4273
4274 // PAK object command surface
4275 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4276 &surfaceCodecParams,
4277 &m_resMbCodeSurface,
4278 m_mvOffset,
4279 0,
4280 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4281 bindingTable->dwBindingTableEntries[startBTI++],
4282 true));
4283
4284 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4285 m_hwInterface,
4286 cmdBuffer,
4287 &surfaceCodecParams,
4288 kernelState));
4289
4290 // CU packet for PAK surface
4291 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4292 &surfaceCodecParams,
4293 &m_resMbCodeSurface,
4294 m_mbCodeSize - m_mvOffset,
4295 m_mvOffset,
4296 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4297 bindingTable->dwBindingTableEntries[startBTI++],
4298 true));
4299
4300 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4301 m_hwInterface,
4302 cmdBuffer,
4303 &surfaceCodecParams,
4304 kernelState));
4305
4306 //Software scoreboard surface
4307 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4308 &surfaceCodecParams,
4309 m_swScoreboardState->GetCurSwScoreboardSurface(),
4310 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4311 bindingTable->dwBindingTableEntries[startBTI++],
4312 m_verticalLineStride,
4313 true));
4314
4315 surfaceCodecParams.bUse32UINTSurfaceFormat = true;
4316 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4317 m_hwInterface,
4318 cmdBuffer,
4319 &surfaceCodecParams,
4320 kernelState));
4321
4322 // Scratch surface for Internal Use Only
4323 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4324 &surfaceCodecParams,
4325 &m_scratchSurface,
4326 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4327 bindingTable->dwBindingTableEntries[startBTI++],
4328 m_verticalLineStride,
4329 true));
4330
4331 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4332 m_hwInterface,
4333 cmdBuffer,
4334 &surfaceCodecParams,
4335 kernelState));
4336
4337 // CU 16x16 QP data input surface
4338 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4339 &surfaceCodecParams,
4340 &m_brcBuffers.sBrcMbQpBuffer,
4341 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4342 bindingTable->dwBindingTableEntries[startBTI++],
4343 m_verticalLineStride,
4344 false));
4345
4346 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4347 m_hwInterface,
4348 cmdBuffer,
4349 &surfaceCodecParams,
4350 kernelState));
4351
4352 // Lcu level data input
4353 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4354 &surfaceCodecParams,
4355 &m_lcuLevelInputDataSurface[m_currRecycledBufIdx],
4356 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4357 bindingTable->dwBindingTableEntries[startBTI++],
4358 m_verticalLineStride,
4359 false));
4360
4361 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4362 m_hwInterface,
4363 cmdBuffer,
4364 &surfaceCodecParams,
4365 kernelState));
4366
4367 // Enc I Constant Table surface // CostLUT Buf
4368 startBTI = MBENC_I_FRAME_ENC_CONST_TABLE;
4369 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4370 &surfaceCodecParams,
4371 &m_encConstantTableForB.sResource,
4372 m_encConstantTableForB.dwSize,
4373 0,
4374 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4375 bindingTable->dwBindingTableEntries[startBTI++],
4376 false));
4377
4378 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4379 m_hwInterface,
4380 cmdBuffer,
4381 &surfaceCodecParams,
4382 kernelState));
4383
4384 #if 0
4385 // Concurrent Thread Group Data surface
4386 startBTI = MBENC_I_FRAME_CONCURRENT_TG_DATA;
4387 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4388 &surfaceCodecParams,
4389 &resConcurrentThreadGroupData.sResource,
4390 resConcurrentThreadGroupData.dwSize,
4391 0,
4392 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4393 bindingTable->dwBindingTableEntries[startBTI++],
4394 false));
4395
4396 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4397 m_hwInterface,
4398 cmdBuffer,
4399 &surfaceCodecParams,
4400 kernelState));
4401 #endif
4402
4403 // Brc Combined Enc parameter surface
4404 startBTI = MBENC_I_FRAME_BRC_COMBINED_ENC_PARAMETER_SURFACE;
4405 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4406 &surfaceCodecParams,
4407 &m_brcInputForEncKernelBuffer->sResource,
4408 HEVC_FRAMEBRC_BUF_CONST_SIZE,
4409 0,
4410 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4411 bindingTable->dwBindingTableEntries[startBTI++],
4412 false));
4413
4414 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4415 m_hwInterface,
4416 cmdBuffer,
4417 &surfaceCodecParams,
4418 kernelState));
4419
4420 // Kernel debug surface
4421 startBTI = MBENC_I_FRAME_DEBUG_DUMP;
4422 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4423 &surfaceCodecParams,
4424 &m_debugSurface[0].sResource,
4425 m_debugSurface[0].dwSize,
4426 0,
4427 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4428 bindingTable->dwBindingTableEntries[startBTI++],
4429 false));
4430
4431 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4432 m_hwInterface,
4433 cmdBuffer,
4434 &surfaceCodecParams,
4435 kernelState));
4436
4437 return eStatus;
4438 }
4439
SendMbEncSurfacesBKernel(PMOS_COMMAND_BUFFER cmdBuffer)4440 MOS_STATUS CodechalEncHevcStateG12::SendMbEncSurfacesBKernel(
4441 PMOS_COMMAND_BUFFER cmdBuffer)
4442 {
4443 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4444
4445 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
4446 PMHW_KERNEL_STATE kernelState = m_isMaxLcu64 ? &m_mbEncKernelStates[MBENC_LCU64_KRNIDX] : &m_mbEncKernelStates[MBENC_LCU32_KRNIDX];
4447
4448 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelBindingTable);
4449 PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = m_isMaxLcu64 ? &m_mbEncKernelBindingTable[MBENC_LCU64_KRNIDX] : &m_mbEncKernelBindingTable[MBENC_LCU32_KRNIDX];
4450
4451 PMOS_SURFACE inputSurface = m_rawSurfaceToEnc;
4452 uint32_t startBTI = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0;
4453 CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
4454
4455 // Combined 1D buffer 1, which contains regular kernel curbe and concurrent map
4456 startBTI = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER1;
4457 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4458 &surfaceCodecParams,
4459 &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource,
4460 m_encBCombinedBuffer1[m_currRecycledBufIdx].dwSize,
4461 0,
4462 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4463 bindingTable->dwBindingTableEntries[startBTI++],
4464 false));
4465
4466 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4467 m_hwInterface,
4468 cmdBuffer,
4469 &surfaceCodecParams,
4470 kernelState));
4471
4472 CODECHAL_DEBUG_TOOL(
4473 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4474 &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource,
4475 CodechalDbgAttr::attrOutput,
4476 "Hevc_CombinedBuffer1",
4477 m_encBCombinedBuffer1[m_currRecycledBufIdx].dwSize,
4478 0,
4479 CODECHAL_MEDIA_STATE_HEVC_B_MBENC)););
4480 // Combined 1D buffer 2, which contains non fixed sizes of buffers
4481 startBTI = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER2;
4482 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4483 &surfaceCodecParams,
4484 &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
4485 m_encBCombinedBuffer2[m_currRecycledBufIdx].dwSize,
4486 0,
4487 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4488 bindingTable->dwBindingTableEntries[startBTI++],
4489 false));
4490 surfaceCodecParams.bRawSurface = true;
4491
4492 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4493 m_hwInterface,
4494 cmdBuffer,
4495 &surfaceCodecParams,
4496 kernelState));
4497
4498 CODECHAL_DEBUG_TOOL(
4499 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4500 &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
4501 CodechalDbgAttr::attrOutput,
4502 "Hevc_CombinedBuffer2",
4503 m_encBCombinedBuffer2[m_currRecycledBufIdx].dwSize,
4504 0,
4505 CODECHAL_MEDIA_STATE_HEVC_B_MBENC)););
4506 // VME surfaces
4507 startBTI = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0;
4508 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4509 &surfaceCodecParams,
4510 inputSurface,
4511 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4512 bindingTable->dwBindingTableEntries[startBTI++]));
4513
4514 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4515 m_hwInterface,
4516 cmdBuffer,
4517 &surfaceCodecParams,
4518 kernelState));
4519
4520 for (int32_t surface_idx = 0; surface_idx < 4; surface_idx++)
4521 {
4522 int32_t ll = 0;
4523 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
4524 if (!CodecHal_PictureIsInvalid(refPic) &&
4525 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
4526 {
4527 int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
4528 PMOS_SURFACE refSurfacePtr;
4529 if (surface_idx == 0 && m_useWeightedSurfaceForL0)
4530 {
4531 refSurfacePtr = m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L0_START + surface_idx);
4532 }
4533 else
4534 {
4535 refSurfacePtr = &m_refList[idx]->sRefBuffer;
4536 }
4537
4538 // Picture Y VME
4539 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4540 &surfaceCodecParams,
4541 refSurfacePtr,
4542 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4543 bindingTable->dwBindingTableEntries[startBTI++]));
4544
4545 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4546 m_hwInterface,
4547 cmdBuffer,
4548 &surfaceCodecParams,
4549 kernelState));
4550
4551 CODECHAL_DEBUG_TOOL(
4552 m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
4553 std::string refSurfName = "RefSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex));
4554 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4555 &m_refList[idx]->sRefBuffer,
4556 CodechalDbgAttr::attrReferenceSurfaces,
4557 refSurfName.data())));
4558 }
4559 else
4560 {
4561 // Providing Dummy surface as per VME requirement.
4562 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4563 &surfaceCodecParams,
4564 inputSurface,
4565 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4566 bindingTable->dwBindingTableEntries[startBTI++]));
4567
4568 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4569 m_hwInterface,
4570 cmdBuffer,
4571 &surfaceCodecParams,
4572 kernelState));
4573 }
4574
4575 ll = 1;
4576 refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
4577 if (!CodecHal_PictureIsInvalid(refPic) &&
4578 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
4579 {
4580 int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
4581 PMOS_SURFACE refSurfacePtr;
4582 if (surface_idx == 0 && m_useWeightedSurfaceForL1)
4583 {
4584 refSurfacePtr = m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L1_START + surface_idx);
4585 }
4586 else
4587 {
4588 refSurfacePtr = &m_refList[idx]->sRefBuffer;
4589 }
4590
4591 // Picture Y VME
4592 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4593 &surfaceCodecParams,
4594 refSurfacePtr,
4595 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4596 bindingTable->dwBindingTableEntries[startBTI++]));
4597
4598 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4599 m_hwInterface,
4600 cmdBuffer,
4601 &surfaceCodecParams,
4602 kernelState));
4603
4604 CODECHAL_DEBUG_TOOL(
4605 m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
4606 std::string refSurfName = "RefSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex));
4607 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4608 &m_refList[idx]->sRefBuffer,
4609 CodechalDbgAttr::attrReferenceSurfaces,
4610 refSurfName.data())));
4611 }
4612 else
4613 {
4614 // Providing Dummy surface as per VME requirement.
4615 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4616 &surfaceCodecParams,
4617 inputSurface,
4618 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4619 bindingTable->dwBindingTableEntries[startBTI++]));
4620
4621 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4622 m_hwInterface,
4623 cmdBuffer,
4624 &surfaceCodecParams,
4625 kernelState));
4626 }
4627 }
4628
4629 //Source Y and UV
4630 startBTI = MBENC_B_FRAME_CURR_Y;
4631 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4632 &surfaceCodecParams,
4633 inputSurface,
4634 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4635 bindingTable->dwBindingTableEntries[startBTI++],
4636 m_verticalLineStride,
4637 false));
4638
4639 surfaceCodecParams.bUseUVPlane = true;
4640
4641 surfaceCodecParams.dwUVBindingTableOffset = bindingTable->dwBindingTableEntries[startBTI];
4642 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4643 m_hwInterface,
4644 cmdBuffer,
4645 &surfaceCodecParams,
4646 kernelState));
4647
4648 CODECHAL_DEBUG_TOOL(
4649 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4650 inputSurface,
4651 CodechalDbgAttr::attrEncodeRawInputSurface,
4652 "MbEnc_Input_SrcSurf")));
4653
4654 // Current Y with reconstructed boundary pixels
4655 startBTI = MBENC_B_FRAME_CURR_Y_WITH_RECON_BOUNDARY_PIX;
4656 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4657 &surfaceCodecParams,
4658 &m_currPicWithReconBoundaryPix,
4659 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4660 bindingTable->dwBindingTableEntries[startBTI],
4661 m_verticalLineStride,
4662 true));
4663
4664 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4665 m_hwInterface,
4666 cmdBuffer,
4667 &surfaceCodecParams,
4668 kernelState));
4669
4670 // Enc CU Record
4671 startBTI = MBENC_B_FRAME_ENC_CU_RECORD;
4672 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4673 &surfaceCodecParams,
4674 &m_intermediateCuRecordSurfaceLcu32,
4675 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4676 bindingTable->dwBindingTableEntries[startBTI],
4677 0,
4678 true));
4679
4680 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4681 m_hwInterface,
4682 cmdBuffer,
4683 &surfaceCodecParams,
4684 kernelState));
4685
4686 // PAK object command surface
4687 startBTI = MBENC_B_FRAME_PAK_OBJ;
4688 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4689 &surfaceCodecParams,
4690 &m_resMbCodeSurface,
4691 m_mvOffset,
4692 0,
4693 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4694 bindingTable->dwBindingTableEntries[startBTI],
4695 true));
4696
4697 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4698 m_hwInterface,
4699 cmdBuffer,
4700 &surfaceCodecParams,
4701 kernelState));
4702
4703 // CU packet for PAK surface
4704 startBTI = MBENC_B_FRAME_PAK_CU_RECORD;
4705 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4706 &surfaceCodecParams,
4707 &m_resMbCodeSurface,
4708 m_mbCodeSize - m_mvOffset,
4709 m_mvOffset,
4710 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4711 bindingTable->dwBindingTableEntries[startBTI],
4712 true));
4713
4714 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4715 m_hwInterface,
4716 cmdBuffer,
4717 &surfaceCodecParams,
4718 kernelState));
4719
4720 //Software scoreboard surface
4721 startBTI = MBENC_B_FRAME_SW_SCOREBOARD;
4722 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4723 &surfaceCodecParams,
4724 m_swScoreboardState->GetCurSwScoreboardSurface(),
4725 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4726 bindingTable->dwBindingTableEntries[startBTI],
4727 m_verticalLineStride,
4728 true));
4729
4730 surfaceCodecParams.bUse32UINTSurfaceFormat = true;
4731 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4732 m_hwInterface,
4733 cmdBuffer,
4734 &surfaceCodecParams,
4735 kernelState));
4736
4737 // Scratch surface for Internal Use Only
4738 startBTI = MBENC_B_FRAME_SCRATCH_SURFACE;
4739 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4740 &surfaceCodecParams,
4741 &m_scratchSurface,
4742 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4743 bindingTable->dwBindingTableEntries[startBTI],
4744 m_verticalLineStride,
4745 true));
4746
4747 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4748 m_hwInterface,
4749 cmdBuffer,
4750 &surfaceCodecParams,
4751 kernelState));
4752
4753 // CU 16x16 QP data input surface
4754 startBTI = MBENC_B_FRAME_CU_QP_DATA;
4755 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4756 &surfaceCodecParams,
4757 &m_brcBuffers.sBrcMbQpBuffer,
4758 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4759 bindingTable->dwBindingTableEntries[startBTI],
4760 m_verticalLineStride,
4761 false));
4762
4763 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4764 m_hwInterface,
4765 cmdBuffer,
4766 &surfaceCodecParams,
4767 kernelState));
4768
4769 // Lcu level data input
4770 startBTI = MBENC_B_FRAME_LCU_LEVEL_DATA_INPUT;
4771 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4772 &surfaceCodecParams,
4773 &m_lcuLevelInputDataSurface[m_currRecycledBufIdx],
4774 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4775 bindingTable->dwBindingTableEntries[startBTI],
4776 m_verticalLineStride,
4777 false));
4778
4779 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4780 m_hwInterface,
4781 cmdBuffer,
4782 &surfaceCodecParams,
4783 kernelState));
4784
4785 // Enc B 32x32 Constant Table surface
4786 startBTI = MBENC_B_FRAME_ENC_CONST_TABLE;
4787 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4788 &surfaceCodecParams,
4789 &m_encConstantTableForB.sResource,
4790 m_encConstantTableForB.dwSize,
4791 0,
4792 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4793 bindingTable->dwBindingTableEntries[startBTI],
4794 false));
4795
4796 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4797 m_hwInterface,
4798 cmdBuffer,
4799 &surfaceCodecParams,
4800 kernelState));
4801
4802 // Colocated CU Motion Vector Data surface
4803 startBTI = MBENC_B_FRAME_COLOCATED_CU_MV_DATA;
4804 uint8_t mbCodeIdxForTempMVP = 0xFF;
4805 if (m_hevcPicParams->CollocatedRefPicIndex != 0xFF && m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC)
4806 {
4807 uint8_t frameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx;
4808
4809 mbCodeIdxForTempMVP = m_refList[frameIdx]->ucScalingIdx;
4810 }
4811
4812 if (m_pictureCodingType == I_TYPE)
4813 {
4814 // No temoporal MVP in the I frame
4815 m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
4816 }
4817 else
4818 {
4819 if (mbCodeIdxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag)
4820 {
4821 // Temporal reference MV index is invalid and so disable the temporal MVP
4822 CODECHAL_ENCODE_ASSERT(false);
4823 m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
4824 }
4825 }
4826
4827 if (mbCodeIdxForTempMVP == 0xFF)
4828 {
4829 startBTI++;
4830 }
4831 else
4832 {
4833 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4834 &surfaceCodecParams,
4835 m_trackedBuf->GetMvTemporalBuffer(mbCodeIdxForTempMVP),
4836 m_sizeOfMvTemporalBuffer,
4837 0,
4838 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
4839 bindingTable->dwBindingTableEntries[startBTI++],
4840 false));
4841
4842 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4843 m_hwInterface,
4844 cmdBuffer,
4845 &surfaceCodecParams,
4846 kernelState));
4847 }
4848
4849 startBTI = MBENC_B_FRAME_HME_MOTION_PREDICTOR_DATA;
4850
4851 // HME motion predictor data
4852 if (m_hmeEnabled)
4853 {
4854 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4855 &surfaceCodecParams,
4856 m_hmeKernel->GetSurface(CodechalKernelHme::SurfaceId::me4xMvDataBuffer),
4857 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
4858 bindingTable->dwBindingTableEntries[startBTI++],
4859 m_verticalLineStride,
4860 false));
4861
4862 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4863 m_hwInterface,
4864 cmdBuffer,
4865 &surfaceCodecParams,
4866 kernelState));
4867 }
4868 else
4869 {
4870 startBTI++;
4871 }
4872
4873 // Brc Combined Enc parameter surface
4874 startBTI = MBENC_B_FRAME_BRC_COMBINED_ENC_PARAMETER_SURFACE;
4875 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4876 &surfaceCodecParams,
4877 &m_brcInputForEncKernelBuffer->sResource,
4878 HEVC_FRAMEBRC_BUF_CONST_SIZE,
4879 0,
4880 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4881 bindingTable->dwBindingTableEntries[startBTI++],
4882 false));
4883
4884 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4885 m_hwInterface,
4886 cmdBuffer,
4887 &surfaceCodecParams,
4888 kernelState));
4889
4890 startBTI = MBENC_B_FRAME_VME_PRED_FOR_2X_DS_CURR;
4891 if (m_isMaxLcu64)
4892 {
4893 PMOS_SURFACE currScaledSurface2x = m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER);
4894
4895 //VME 2X Inter prediction surface for current frame
4896 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4897 &surfaceCodecParams,
4898 currScaledSurface2x,
4899 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4900 bindingTable->dwBindingTableEntries[startBTI++]));
4901
4902 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4903 m_hwInterface,
4904 cmdBuffer,
4905 &surfaceCodecParams,
4906 kernelState));
4907
4908 CODECHAL_DEBUG_TOOL(
4909 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4910 currScaledSurface2x,
4911 CodechalDbgAttr::attrReferenceSurfaces,
4912 "2xScaledSurf")));
4913
4914 // RefFrame's 2x DS surface
4915 for (int32_t surface_idx = 0; surface_idx < 4; surface_idx++)
4916 {
4917 int32_t ll = 0;
4918 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
4919 if (!CodecHal_PictureIsInvalid(refPic) &&
4920 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
4921 {
4922 int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
4923
4924 // Picture Y VME
4925 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4926 &surfaceCodecParams,
4927 m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
4928 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4929 bindingTable->dwBindingTableEntries[startBTI++]));
4930
4931 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4932 m_hwInterface,
4933 cmdBuffer,
4934 &surfaceCodecParams,
4935 kernelState));
4936
4937 CODECHAL_DEBUG_TOOL(
4938 m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
4939 std::string refSurfName = "Ref2xScaledSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex));
4940 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4941 m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
4942 CodechalDbgAttr::attrReferenceSurfaces,
4943 refSurfName.data())));
4944 }
4945 else
4946 {
4947 // Providing Dummy surface as per VME requirement.
4948 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4949 &surfaceCodecParams,
4950 currScaledSurface2x,
4951 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4952 bindingTable->dwBindingTableEntries[startBTI++]));
4953
4954 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4955 m_hwInterface,
4956 cmdBuffer,
4957 &surfaceCodecParams,
4958 kernelState));
4959 }
4960
4961 ll = 1;
4962 refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
4963 if (!CodecHal_PictureIsInvalid(refPic) &&
4964 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
4965 {
4966 int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
4967
4968 // Picture Y VME
4969 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4970 &surfaceCodecParams,
4971 m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
4972 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4973 bindingTable->dwBindingTableEntries[startBTI++]));
4974
4975 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4976 m_hwInterface,
4977 cmdBuffer,
4978 &surfaceCodecParams,
4979 kernelState));
4980
4981 CODECHAL_DEBUG_TOOL(
4982 m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
4983 std::string refSurfName = "Ref2xScaledSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex));
4984 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4985 m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
4986 CodechalDbgAttr::attrReferenceSurfaces,
4987 refSurfName.data())));
4988 }
4989 else
4990 {
4991 // Providing Dummy surface as per VME requirement.
4992 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4993 &surfaceCodecParams,
4994 currScaledSurface2x,
4995 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4996 bindingTable->dwBindingTableEntries[startBTI++]));
4997
4998 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4999 m_hwInterface,
5000 cmdBuffer,
5001 &surfaceCodecParams,
5002 kernelState));
5003 }
5004 }
5005 }
5006
5007 // Encoder History Input Buffer
5008 startBTI = MBENC_B_FRAME_ENCODER_HISTORY_INPUT_BUFFER;
5009 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5010 &surfaceCodecParams,
5011 &m_encoderHistoryInputBuffer,
5012 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5013 bindingTable->dwBindingTableEntries[startBTI++],
5014 m_verticalLineStride,
5015 true));
5016
5017 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5018 m_hwInterface,
5019 cmdBuffer,
5020 &surfaceCodecParams,
5021 kernelState));
5022
5023 // Encoder History Output Buffer
5024 startBTI = MBENC_B_FRAME_ENCODER_HISTORY_OUTPUT_BUFFER;
5025 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5026 &surfaceCodecParams,
5027 &m_encoderHistoryOutputBuffer,
5028 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5029 bindingTable->dwBindingTableEntries[startBTI++],
5030 m_verticalLineStride,
5031 true));
5032
5033 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5034 m_hwInterface,
5035 cmdBuffer,
5036 &surfaceCodecParams,
5037 kernelState));
5038
5039 // Kernel debug surface
5040 startBTI = MBENC_B_FRAME_DEBUG_SURFACE;
5041 for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_debugSurface); i++, startBTI++)
5042 {
5043 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5044 &surfaceCodecParams,
5045 &m_debugSurface[i].sResource,
5046 m_debugSurface[i].dwSize,
5047 0,
5048 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5049 bindingTable->dwBindingTableEntries[startBTI],
5050 false));
5051
5052 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5053 m_hwInterface,
5054 cmdBuffer,
5055 &surfaceCodecParams,
5056 kernelState));
5057 }
5058
5059 return eStatus;
5060 }
5061
SendBrcInitResetSurfaces(PMOS_COMMAND_BUFFER cmdBuffer,CODECHAL_HEVC_BRC_KRNIDX krnIdx)5062 MOS_STATUS CodechalEncHevcStateG12::SendBrcInitResetSurfaces(
5063 PMOS_COMMAND_BUFFER cmdBuffer,
5064 CODECHAL_HEVC_BRC_KRNIDX krnIdx)
5065 {
5066 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5067
5068 if (krnIdx != CODECHAL_HEVC_BRC_INIT && krnIdx != CODECHAL_HEVC_BRC_RESET)
5069 {
5070 CODECHAL_ENCODE_ASSERTMESSAGE("Brc kernel requested is not init or reset\n");
5071 return MOS_STATUS_INVALID_PARAMETER;
5072 }
5073
5074 PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_brcKernelBindingTable[krnIdx];
5075 uint32_t startBti = 0;
5076 CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
5077 // BRC History Buffer
5078 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5079 &surfaceCodecParams,
5080 &m_brcBuffers.resBrcHistoryBuffer,
5081 m_brcHistoryBufferSize,
5082 0,
5083 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5084 bindingTable->dwBindingTableEntries[startBti++],
5085 true));
5086
5087 PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[krnIdx];
5088 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5089 m_hwInterface,
5090 cmdBuffer,
5091 &surfaceCodecParams,
5092 kernelState));
5093
5094 // BRC Distortion Surface
5095 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5096 &surfaceCodecParams,
5097 m_brcDistortion,
5098 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
5099 bindingTable->dwBindingTableEntries[startBti++],
5100 0,
5101 true));
5102 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5103 m_hwInterface,
5104 cmdBuffer,
5105 &surfaceCodecParams,
5106 kernelState));
5107
5108 return eStatus;
5109 }
5110
SetupBrcConstantTable(PMOS_SURFACE brcConstantData)5111 MOS_STATUS CodechalEncHevcStateG12::SetupBrcConstantTable(
5112 PMOS_SURFACE brcConstantData)
5113 {
5114 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5115
5116 CODECHAL_ENCODE_FUNCTION_ENTER;
5117
5118 MOS_LOCK_PARAMS lockFlags;
5119 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
5120 lockFlags.WriteOnly = 1;
5121 uint8_t *outputData = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &brcConstantData->OsResource, &lockFlags);
5122 CODECHAL_ENCODE_CHK_NULL_RETURN(outputData);
5123 uint8_t *inputData = (uint8_t *)g_cInit_HEVC_BRC_QP_ADJUST;
5124 uint32_t inputSize = sizeof(g_cInit_HEVC_BRC_QP_ADJUST);
5125 uint32_t outputSize = brcConstantData->dwHeight * brcConstantData->dwPitch;
5126
5127 // 576-byte of Qp adjust table
5128 while ((inputSize >= brcConstantData->dwWidth) && (outputSize >= brcConstantData->dwWidth))
5129 {
5130 MOS_SecureMemcpy(outputData, outputSize, inputData, brcConstantData->dwWidth);
5131 outputData += brcConstantData->dwPitch;
5132 outputSize -= brcConstantData->dwPitch;
5133 inputData += brcConstantData->dwWidth;
5134 inputSize -= brcConstantData->dwWidth;
5135 }
5136 //lambda and mode cost
5137 if (m_isMaxLcu64)
5138 {
5139 inputData = (uint8_t *)m_brcLcu64x64LambdaModeCostInit;
5140 inputSize = sizeof(m_brcLcu64x64LambdaModeCostInit);
5141 }
5142 else
5143 {
5144 inputData = (uint8_t *)m_brcLcu32x32LambdaModeCostInit;
5145 inputSize = sizeof(m_brcLcu32x32LambdaModeCostInit);
5146 }
5147
5148 while ((inputSize >= brcConstantData->dwWidth) && (outputSize >= brcConstantData->dwWidth))
5149 {
5150 MOS_SecureMemcpy(outputData, outputSize, inputData, brcConstantData->dwWidth);
5151 outputData += brcConstantData->dwPitch;
5152 outputSize -= brcConstantData->dwPitch;
5153 inputData += brcConstantData->dwWidth;
5154 inputSize -= brcConstantData->dwWidth;
5155 }
5156
5157 m_osInterface->pfnUnlockResource(m_osInterface, &brcConstantData->OsResource);
5158
5159 return eStatus;
5160 }
5161
SendBrcFrameUpdateSurfaces(PMOS_COMMAND_BUFFER cmdBuffer)5162 MOS_STATUS CodechalEncHevcStateG12::SendBrcFrameUpdateSurfaces(
5163 PMOS_COMMAND_BUFFER cmdBuffer)
5164 {
5165 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5166
5167 // Fill HCP_IMG_STATE so that BRC kernel can use it to generate the write buffer for PAK
5168 PMOS_RESOURCE brcHcpStateReadBuffer = &m_brcBuffers.resBrcImageStatesReadBuffer[m_currRecycledBufIdx];
5169 MHW_VDBOX_HEVC_PIC_STATE mhwHevcPicState;
5170 mhwHevcPicState.pHevcEncSeqParams = m_hevcSeqParams;
5171 mhwHevcPicState.pHevcEncPicParams = m_hevcPicParams;
5172 mhwHevcPicState.bUseVDEnc = m_vdencEnabled ? 1 : 0;
5173 mhwHevcPicState.brcNumPakPasses = m_mfxInterface->GetBrcNumPakPasses();
5174 mhwHevcPicState.sseEnabledInVmeEncode = m_sseEnabled;
5175 mhwHevcPicState.rhodomainRCEnable = m_brcEnabled && (m_numPipe > 1);
5176 mhwHevcPicState.bSAOEnable = m_hevcSeqParams->SAO_enabled_flag ? (m_hevcSliceParams->slice_sao_luma_flag || m_hevcSliceParams->slice_sao_chroma_flag) : 0;
5177 mhwHevcPicState.bTransformSkipEnable = m_hevcPicParams->transform_skip_enabled_flag;
5178 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcPicBrcBuffer(brcHcpStateReadBuffer, &mhwHevcPicState));
5179
5180 PMOS_SURFACE brcConstantData = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx];
5181 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBrcConstantTable(brcConstantData));
5182
5183 uint32_t startBti = 0;
5184 PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE];
5185 PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_brcKernelBindingTable[CODECHAL_HEVC_BRC_FRAME_UPDATE];
5186 CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
5187
5188 // BRC History Buffer
5189 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5190 &surfaceCodecParams,
5191 &m_brcBuffers.resBrcHistoryBuffer,
5192 m_brcHistoryBufferSize,
5193 0,
5194 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5195 bindingTable->dwBindingTableEntries[startBti++],
5196 true));
5197 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5198 m_hwInterface,
5199 cmdBuffer,
5200 &surfaceCodecParams,
5201 kernelState));
5202
5203 // BRC Prev PAK statistics output buffer
5204 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5205 &surfaceCodecParams,
5206 &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead],
5207 m_hevcBrcPakStatisticsSize,
5208 0,
5209 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5210 bindingTable->dwBindingTableEntries[startBti++],
5211 false));
5212 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5213 m_hwInterface,
5214 cmdBuffer,
5215 &surfaceCodecParams,
5216 kernelState));
5217
5218 // BRC HCP_PIC_STATE read
5219 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5220 &surfaceCodecParams,
5221 brcHcpStateReadBuffer,
5222 m_brcBuffers.dwBrcHcpPicStateSize,
5223 0,
5224 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5225 bindingTable->dwBindingTableEntries[startBti++],
5226 false));
5227 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5228 m_hwInterface,
5229 cmdBuffer,
5230 &surfaceCodecParams,
5231 kernelState));
5232
5233 // BRC HCP_PIC_STATE write
5234 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5235 &surfaceCodecParams,
5236 &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
5237 m_brcBuffers.dwBrcHcpPicStateSize,
5238 0,
5239 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5240 bindingTable->dwBindingTableEntries[startBti++],
5241 true));
5242 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5243 m_hwInterface,
5244 cmdBuffer,
5245 &surfaceCodecParams,
5246 kernelState));
5247
5248 // Combined ENC-parameter buffer
5249 startBti++;
5250
5251 // BRC Distortion Surface
5252 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5253 &surfaceCodecParams,
5254 m_brcDistortion,
5255 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
5256 bindingTable->dwBindingTableEntries[startBti++],
5257 0,
5258 true));
5259 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5260 m_hwInterface,
5261 cmdBuffer,
5262 &surfaceCodecParams,
5263 kernelState));
5264
5265 // BRC Data Surface
5266 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5267 &surfaceCodecParams,
5268 brcConstantData,
5269 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5270 bindingTable->dwBindingTableEntries[startBti++],
5271 0,
5272 false));
5273 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5274 m_hwInterface,
5275 cmdBuffer,
5276 &surfaceCodecParams,
5277 kernelState));
5278
5279 // Pixel MB Statistics surface
5280 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5281 &surfaceCodecParams,
5282 &m_resMbStatsBuffer,
5283 m_hwInterface->m_avcMbStatBufferSize,
5284 0,
5285 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5286 bindingTable->dwBindingTableEntries[startBti++],
5287 false));
5288 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5289 m_hwInterface,
5290 cmdBuffer,
5291 &surfaceCodecParams,
5292 kernelState));
5293
5294 // Mv and Distortion summation surface
5295 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5296 &surfaceCodecParams,
5297 &m_mvAndDistortionSumSurface.sResource,
5298 m_mvAndDistortionSumSurface.dwSize,
5299 0,
5300 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5301 bindingTable->dwBindingTableEntries[startBti++],
5302 false));
5303 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5304 m_hwInterface,
5305 cmdBuffer,
5306 &surfaceCodecParams,
5307 kernelState));
5308
5309 CODECHAL_DEBUG_TOOL(
5310 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5311 &m_mvAndDistortionSumSurface.sResource,
5312 CodechalDbgAttr::attrInput,
5313 "MvDistSum",
5314 m_mvAndDistortionSumSurface.dwSize,
5315 0,
5316 CODECHAL_MEDIA_STATE_BRC_UPDATE));
5317
5318 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5319 &m_brcBuffers.resBrcImageStatesReadBuffer[m_currRecycledBufIdx],
5320 CodechalDbgAttr::attrInput,
5321 "ImgStateRead",
5322 BRC_IMG_STATE_SIZE_PER_PASS * m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(),
5323 0,
5324 CODECHAL_MEDIA_STATE_BRC_UPDATE));
5325
5326 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
5327 &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx],
5328 CodechalDbgAttr::attrInput,
5329 "ConstData",
5330 CODECHAL_MEDIA_STATE_BRC_UPDATE));
5331
5332 // PAK statistics buffer is only dumped for BrcUpdate kernel input
5333 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5334 &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead],
5335 CodechalDbgAttr::attrInput,
5336 "PakStats",
5337 HEVC_BRC_PAK_STATISTCS_SIZE,
5338 0,
5339 CODECHAL_MEDIA_STATE_BRC_UPDATE));
5340
5341 // HEVC maintains a ptr to its own distortion surface, as it may be a couple different surfaces
5342 if (m_brcDistortion) {
5343 CODECHAL_ENCODE_CHK_STATUS_RETURN(
5344 m_debugInterface->DumpBuffer(
5345 &m_brcDistortion->OsResource,
5346 CodechalDbgAttr::attrInput,
5347 "BrcDist_BeforeFrameBRC",
5348 m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
5349 m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
5350 CODECHAL_MEDIA_STATE_BRC_UPDATE));
5351 }
5352
5353 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(&m_brcBuffers.resBrcHistoryBuffer,
5354 CodechalDbgAttr::attrInput,
5355 "HistoryRead_beforeFramBRC",
5356 m_brcHistoryBufferSize,
5357 0,
5358 CODECHAL_MEDIA_STATE_BRC_UPDATE));
5359
5360 if (m_brcBuffers.pMbEncKernelStateInUse) {
5361 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
5362 CODECHAL_MEDIA_STATE_BRC_UPDATE,
5363 m_brcBuffers.pMbEncKernelStateInUse));
5364 }
5365
5366 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(&m_resMbStatsBuffer,
5367 CodechalDbgAttr::attrInput,
5368 "MBStatsSurf",
5369 m_hwInterface->m_avcMbStatBufferSize,
5370 0,
5371 CODECHAL_MEDIA_STATE_BRC_UPDATE));)
5372 return eStatus;
5373 }
5374
SendBrcLcuUpdateSurfaces(PMOS_COMMAND_BUFFER cmdBuffer)5375 MOS_STATUS CodechalEncHevcStateG12::SendBrcLcuUpdateSurfaces(
5376 PMOS_COMMAND_BUFFER cmdBuffer)
5377 {
5378 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5379
5380 PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE];
5381 PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_brcKernelBindingTable[CODECHAL_HEVC_BRC_LCU_UPDATE];
5382 uint32_t startBTI = 0;
5383 CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
5384
5385 if (m_brcEnabled)
5386 {
5387 // BRC History Buffer
5388 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5389 &surfaceCodecParams,
5390 &m_brcBuffers.resBrcHistoryBuffer,
5391 m_brcHistoryBufferSize,
5392 0,
5393 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5394 bindingTable->dwBindingTableEntries[startBTI++],
5395 true));
5396 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5397 m_hwInterface,
5398 cmdBuffer,
5399 &surfaceCodecParams,
5400 kernelState));
5401
5402 // BRC Distortion Surface - Intra or Inter
5403 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5404 &surfaceCodecParams,
5405 m_brcDistortion,
5406 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
5407 bindingTable->dwBindingTableEntries[startBTI++],
5408 0,
5409 true));
5410 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5411 m_hwInterface,
5412 cmdBuffer,
5413 &surfaceCodecParams,
5414 kernelState));
5415
5416 // Pixel MB Statistics surface
5417 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5418 &surfaceCodecParams,
5419 &m_resMbStatsBuffer,
5420 m_hwInterface->m_avcMbStatBufferSize,
5421 0,
5422 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5423 bindingTable->dwBindingTableEntries[startBTI++],
5424 false));
5425 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5426 m_hwInterface,
5427 cmdBuffer,
5428 &surfaceCodecParams,
5429 kernelState));
5430 }
5431 else
5432 {
5433 // CQP ROI
5434 startBTI += 3;
5435 }
5436 // MB QP surface
5437 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5438 &surfaceCodecParams,
5439 &m_brcBuffers.sBrcMbQpBuffer,
5440 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
5441 bindingTable->dwBindingTableEntries[startBTI++],
5442 0,
5443 true));
5444 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5445 m_hwInterface,
5446 cmdBuffer,
5447 &surfaceCodecParams,
5448 kernelState));
5449
5450 // ROI surface
5451 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
5452 &surfaceCodecParams,
5453 &m_brcBuffers.sBrcRoiSurface,
5454 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ROI_ENCODE].Value,
5455 bindingTable->dwBindingTableEntries[startBTI++],
5456 0,
5457 false));
5458 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5459 m_hwInterface,
5460 cmdBuffer,
5461 &surfaceCodecParams,
5462 kernelState));
5463
5464 return eStatus;
5465 }
5466
GetCustomDispatchPattern(PMHW_WALKER_PARAMS walkerParams,PCODECHAL_WALKER_CODEC_PARAMS walkerCodecParams)5467 MOS_STATUS CodechalEncHevcStateG12::GetCustomDispatchPattern(
5468 PMHW_WALKER_PARAMS walkerParams,
5469 PCODECHAL_WALKER_CODEC_PARAMS walkerCodecParams)
5470 {
5471 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5472
5473 CODECHAL_ENCODE_CHK_NULL_RETURN(walkerParams);
5474 CODECHAL_ENCODE_CHK_NULL_RETURN(walkerCodecParams);
5475
5476 MOS_ZeroMemory(walkerParams, sizeof(*walkerParams));
5477
5478 walkerParams->WalkerMode = (MHW_WALKER_MODE)walkerCodecParams->WalkerMode;
5479
5480 walkerParams->dwLocalLoopExecCount = 0xFFFF; //MAX VALUE
5481 walkerParams->dwGlobalLoopExecCount = 0xFFFF; //MAX VALUE
5482
5483 // the following code is copied from the kernel ULT
5484 uint32_t maxThreadWidth, maxThreadHeight;
5485 uint32_t threadSpaceWidth, threadSpaceHeight, concurGroupNum, threadScaleV;
5486
5487 threadSpaceWidth = walkerCodecParams->dwResolutionX;
5488 threadSpaceHeight = walkerCodecParams->dwResolutionY;
5489 maxThreadWidth = threadSpaceWidth;
5490 maxThreadHeight = threadSpaceHeight;
5491 concurGroupNum = m_numberConcurrentGroup;
5492 threadScaleV = m_numberEncKernelSubThread;
5493
5494 if (concurGroupNum > 1)
5495 {
5496 maxThreadWidth = threadSpaceWidth;
5497 maxThreadHeight = threadSpaceWidth + (threadSpaceWidth + threadSpaceHeight + concurGroupNum - 2) / concurGroupNum;
5498 maxThreadHeight *= threadScaleV;
5499 maxThreadHeight += 1;
5500 }
5501 else
5502 {
5503 threadSpaceHeight *= threadScaleV;
5504 maxThreadHeight *= threadScaleV;
5505 }
5506
5507 uint32_t localLoopExecCount = m_degree45Needed ? (2 * m_numWavefrontInOneRegion + 1) : m_numWavefrontInOneRegion;
5508
5509 eStatus = InitMediaObjectWalker(maxThreadWidth,
5510 maxThreadHeight,
5511 concurGroupNum - 1,
5512 m_swScoreboardState->GetDependencyPattern(),
5513 m_numberEncKernelSubThread - 1,
5514 localLoopExecCount,
5515 *walkerParams);
5516
5517 return eStatus;
5518 }
5519
GenerateLcuLevelData(MOS_SURFACE & lcuLevelInputDataSurfaceParam)5520 MOS_STATUS CodechalEncHevcStateG12::GenerateLcuLevelData(MOS_SURFACE &lcuLevelInputDataSurfaceParam)
5521 {
5522 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5523
5524 CODECHAL_ENCODE_FUNCTION_ENTER;
5525
5526 CODECHAL_ENCODE_CHK_NULL_RETURN(m_tileParams);
5527
5528 uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
5529 uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
5530
5531 uint32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
5532 uint32_t residual = (1 << shift) - 1;
5533
5534 uint32_t frameWidthInLcu = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift;
5535 uint32_t frameHeightInLcu = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift;
5536
5537 PLCU_LEVEL_DATA *lcuInfo = (PLCU_LEVEL_DATA *)MOS_AllocMemory(sizeof(PLCU_LEVEL_DATA) * frameWidthInLcu);
5538 CODECHAL_ENCODE_CHK_NULL_RETURN(lcuInfo);
5539 for (uint32_t i = 0; i < frameWidthInLcu; i++)
5540 {
5541 lcuInfo[i] = (PLCU_LEVEL_DATA)MOS_AllocMemory(sizeof(LCU_LEVEL_DATA) * frameHeightInLcu);
5542 if (lcuInfo[i] == nullptr)
5543 {
5544 for (uint32_t j = 0; j < i; j++)
5545 {
5546 MOS_FreeMemory(lcuInfo[j]);
5547 }
5548 MOS_FreeMemory(lcuInfo);
5549 CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr);
5550 }
5551 MOS_ZeroMemory(lcuInfo[i], (sizeof(LCU_LEVEL_DATA) * frameHeightInLcu));
5552 }
5553
5554 // Tiling case
5555 if (numTileColumns > 1 || numTileRows > 1)
5556 {
5557 // This assumes that the entire Slice is contained within a Tile
5558 for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
5559 {
5560 for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
5561 {
5562 uint32_t tileId = tileRow * numTileColumns + tileCol;
5563 MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 currentTile = m_tileParams[tileId];
5564
5565 uint32_t tileColumnWidth = (currentTile.TileWidthInMinCbMinus1 + 1 + residual) >> shift;
5566 uint32_t tileRowHeight = (currentTile.TileHeightInMinCbMinus1 + 1 + residual) >> shift;
5567
5568 for (uint32_t startLCU = 0, sliceStartLcu = 0, slcCount = 0; slcCount < m_numSlices; slcCount++)
5569 {
5570 bool lastSliceInTile = false, sliceInTile = false;
5571
5572 eStatus = (MOS_STATUS)IsSliceInTile(slcCount,
5573 ¤tTile,
5574 &sliceInTile,
5575 &lastSliceInTile);
5576 if (eStatus != MOS_STATUS_SUCCESS)
5577 {
5578 for (uint32_t i = 0; i < frameWidthInLcu; i++)
5579 {
5580 MOS_FreeMemory(lcuInfo[i]);
5581 }
5582 MOS_FreeMemory(lcuInfo);
5583 CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
5584 }
5585
5586 if (!sliceInTile)
5587 {
5588 startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice;
5589 continue;
5590 }
5591
5592 sliceStartLcu = m_hevcSliceParams[slcCount].slice_segment_address;
5593 uint32_t sliceLcuX = sliceStartLcu % frameWidthInLcu;
5594 uint32_t sliceLcuY = sliceStartLcu / frameWidthInLcu;
5595
5596 for (uint32_t i = 0; i < m_hevcSliceParams[slcCount].NumLCUsInSlice; i++)
5597 {
5598 lcuInfo[sliceLcuX][sliceLcuY].SliceStartLcuIndex = (uint16_t)startLCU;
5599 lcuInfo[sliceLcuX][sliceLcuY].SliceEndLcuIndex = (uint16_t)(startLCU + m_hevcSliceParams[slcCount].NumLCUsInSlice); // this should be next slice start index
5600 lcuInfo[sliceLcuX][sliceLcuY].SliceId = (uint16_t)slcCount;
5601 lcuInfo[sliceLcuX][sliceLcuY].TileId = (uint16_t)tileId;
5602 lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateX = (uint16_t)currentTile.TileStartLCUX;
5603 lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateY = (uint16_t)currentTile.TileStartLCUY;
5604 lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateX = (uint16_t)(currentTile.TileStartLCUX + tileColumnWidth);
5605 lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateY = (uint16_t)(currentTile.TileStartLCUY + tileRowHeight);
5606
5607 sliceLcuX++;
5608
5609 if (sliceLcuX >= currentTile.TileStartLCUX + tileColumnWidth)
5610 {
5611 sliceLcuX = currentTile.TileStartLCUX;
5612 sliceLcuY++;
5613 }
5614 }
5615 startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice;
5616 }
5617 }
5618 }
5619 }
5620 else // non-tiling case
5621 {
5622 for (uint32_t startLCU = 0, sliceStartLcu = 0, slcCount = 0; slcCount < m_numSlices; slcCount++)
5623 {
5624 sliceStartLcu = m_hevcSliceParams[slcCount].slice_segment_address;
5625 uint32_t sliceLcuX = sliceStartLcu % frameWidthInLcu;
5626 uint32_t sliceLcuY = sliceStartLcu / frameWidthInLcu;
5627
5628 for (uint32_t i = 0; i < m_hevcSliceParams[slcCount].NumLCUsInSlice; i++)
5629 {
5630 lcuInfo[sliceLcuX][sliceLcuY].SliceStartLcuIndex = (uint16_t)startLCU;
5631 lcuInfo[sliceLcuX][sliceLcuY].SliceEndLcuIndex = (uint16_t)(startLCU + m_hevcSliceParams[slcCount].NumLCUsInSlice); // this should be next slice start index
5632 lcuInfo[sliceLcuX][sliceLcuY].SliceId = (uint16_t)slcCount;
5633 lcuInfo[sliceLcuX][sliceLcuY].TileId = 0;
5634 lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateX = 0;
5635 lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateY = 0;
5636 lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateX = (uint16_t)frameWidthInLcu;
5637 lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateY = (uint16_t)frameHeightInLcu;
5638
5639 sliceLcuX++;
5640
5641 if (sliceLcuX >= frameWidthInLcu)
5642 {
5643 sliceLcuX = 0;
5644 sliceLcuY++;
5645 }
5646 }
5647 startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice;
5648 }
5649 }
5650
5651 // Write LCU Info to the surface
5652 if (!Mos_ResourceIsNull(&lcuLevelInputDataSurfaceParam.OsResource))
5653 {
5654 MOS_LOCK_PARAMS lockFlags;
5655 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
5656 lockFlags.WriteOnly = 1;
5657 PLCU_LEVEL_DATA lcuLevelData = (PLCU_LEVEL_DATA)m_osInterface->pfnLockResource(
5658 m_osInterface,
5659 &lcuLevelInputDataSurfaceParam.OsResource,
5660 &lockFlags);
5661 if (lcuLevelData == nullptr)
5662 {
5663 for (uint32_t i = 0; i < frameWidthInLcu; i++)
5664 {
5665 MOS_FreeMemory(lcuInfo[i]);
5666 }
5667 MOS_FreeMemory(lcuInfo);
5668 CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr);
5669 }
5670
5671 uint8_t *dataRowStart = (uint8_t *)lcuLevelData;
5672
5673 for (uint32_t sliceLcuY = 0; sliceLcuY < frameHeightInLcu; sliceLcuY++)
5674 {
5675 for (uint32_t sliceLcuX = 0; sliceLcuX < frameWidthInLcu; sliceLcuX++)
5676 {
5677 *(lcuLevelData) = lcuInfo[sliceLcuX][sliceLcuY];
5678
5679 if ((sliceLcuX + 1) == frameWidthInLcu)
5680 {
5681 dataRowStart += lcuLevelInputDataSurfaceParam.dwPitch;
5682 lcuLevelData = (PLCU_LEVEL_DATA)dataRowStart;
5683 }
5684 else
5685 {
5686 lcuLevelData++;
5687 }
5688 }
5689 }
5690
5691 m_osInterface->pfnUnlockResource(
5692 m_osInterface,
5693 &lcuLevelInputDataSurfaceParam.OsResource);
5694 }
5695 else
5696 {
5697 eStatus = MOS_STATUS_NULL_POINTER;
5698 CODECHAL_ENCODE_ASSERTMESSAGE("Null pointer exception\n");
5699 }
5700
5701 // Freeing the temporarily allocated memory
5702 if (lcuInfo)
5703 {
5704 for (uint32_t i = 0; i < frameWidthInLcu; i++)
5705 {
5706 MOS_FreeMemory(lcuInfo[i]);
5707 }
5708 MOS_FreeMemory(lcuInfo);
5709 }
5710 return eStatus;
5711 }
5712
5713 // Helper class to describe quadtree node
5714 class QuadTreeNode
5715 {
5716 friend class QuadTree;
5717
5718 public:
QuadTreeNode(const QuadTreeNode * ctb,uint32_t x,uint32_t y,uint32_t level,uint32_t ctbLog2Size)5719 QuadTreeNode(const QuadTreeNode *ctb, uint32_t x, uint32_t y, uint32_t level, uint32_t ctbLog2Size) : m_ctb(ctb), m_x(x), m_y(y), m_level(level), m_size((1 << ctbLog2Size) >> level), m_ctbLog2Size(ctbLog2Size)
5720 {
5721 }
5722
5723 protected:
Build(uint32_t picWidth,uint32_t picHeight)5724 void Build(uint32_t picWidth, uint32_t picHeight)
5725 {
5726 if (DoesBlockCrossCodedPicture(picWidth, picHeight))
5727 {
5728 CreateCUs();
5729 for_each(m_childBlocks.begin(), m_childBlocks.end(), [&](QuadTreeNode &blk) { blk.Build(picWidth, picHeight); });
5730 }
5731 }
5732
CreateCUs()5733 void CreateCUs()
5734 {
5735 uint32_t size = m_size / 2;
5736 uint32_t level = m_level + 1;
5737
5738 m_childBlocks.emplace_back(m_ctb, m_x, m_y, level, m_ctbLog2Size);
5739 m_childBlocks.emplace_back(m_ctb, m_x + size, m_y, level, m_ctbLog2Size);
5740 m_childBlocks.emplace_back(m_ctb, m_x, m_y + size, level, m_ctbLog2Size);
5741 m_childBlocks.emplace_back(m_ctb, m_x + size, m_y + size, level, m_ctbLog2Size);
5742 }
5743
DoesBlockCrossCodedPicture(uint32_t w,uint32_t h) const5744 bool DoesBlockCrossCodedPicture(uint32_t w, uint32_t h) const
5745 {
5746 return (m_x < w && ((m_x + m_size) > w)) || (m_y < h && ((m_y + m_size) > h));
5747 }
5748
5749 public:
5750 const QuadTreeNode * m_ctb = nullptr; // the root of CTB
5751 const uint32_t m_x = 0;
5752 const uint32_t m_y = 0;
5753 const uint32_t m_level = 0;
5754 const uint32_t m_size = 0;
5755 const uint32_t m_ctbLog2Size = 0;
5756 std::vector<QuadTreeNode> m_childBlocks = {};
5757 };
5758
5759 class QuadTree : public QuadTreeNode
5760 {
5761 public:
QuadTree(uint32_t x,uint32_t y,uint32_t ctbLog2Size)5762 QuadTree(uint32_t x, uint32_t y, uint32_t ctbLog2Size)
5763 : QuadTreeNode(this, x, y, 0, ctbLog2Size)
5764 {
5765 }
5766
5767 // Build quadtree in the way none of the blocks crosses picture boundary
BuildQuadTree(uint32_t width,uint32_t height)5768 void BuildQuadTree(uint32_t width, uint32_t height)
5769 {
5770 m_picWidth = width;
5771 m_picHeight = height;
5772
5773 Build(width, height);
5774
5775 CUs.reserve(64);
5776 FillCuList(*this, CUs);
5777 }
5778
GetSplitFlags(const QuadTreeNode & blk,HcpPakObjectG12 & pakObj)5779 static void GetSplitFlags(const QuadTreeNode &blk, HcpPakObjectG12 &pakObj)
5780 {
5781 auto idx = [](uint32_t x0, uint32_t y0, uint32_t x, uint32_t y, uint32_t log2CbSize) {
5782 auto const nCbS = (1 << log2CbSize);
5783 return (x - x0) / nCbS + (y - y0) / nCbS * 2;
5784 };
5785
5786 if (blk.m_childBlocks.empty()) // Block doesn't have splits
5787 return;
5788
5789 switch (blk.m_level)
5790 {
5791 case 0:
5792 pakObj.DW1.Split_flag_level0 = 1;
5793 break;
5794
5795 case 1:
5796 {
5797 auto const blkIdx = idx(blk.m_ctb->m_x, blk.m_ctb->m_y, blk.m_x, blk.m_y, blk.m_ctbLog2Size - 1);
5798 pakObj.DW1.Split_flag_level1 |= 1 << blkIdx;
5799 }
5800 break;
5801
5802 case 2:
5803 {
5804 auto const blkIdx1 = idx(blk.m_ctb->m_x, blk.m_ctb->m_y, blk.m_x, blk.m_y, blk.m_ctbLog2Size - 1);
5805 auto const nCbS1 = (1 << (blk.m_ctbLog2Size - 1));
5806 auto const x1 = blk.m_ctb->m_x + nCbS1 * (blkIdx1 % 2);
5807 auto const y1 = blk.m_ctb->m_y + nCbS1 * (blkIdx1 / 2);
5808 auto const blkIdx2 = idx(x1, y1, blk.m_x, blk.m_y, blk.m_ctbLog2Size - 2);
5809 switch (blkIdx1)
5810 {
5811 case 0:
5812 pakObj.DW1.Split_flag_level2_level1part0 |= 1 << blkIdx2;
5813 break;
5814 case 1:
5815 pakObj.DW1.Split_flag_level2_level1part1 |= 1 << blkIdx2;
5816 break;
5817 case 2:
5818 pakObj.DW1.Split_flag_level2_level1part2 |= 1 << blkIdx2;
5819 break;
5820 case 3:
5821 pakObj.DW1.Split_flag_level2_level1part3 |= 1 << blkIdx2;
5822 break;
5823 };
5824 }
5825 break;
5826 }
5827
5828 for_each(blk.m_childBlocks.begin(), blk.m_childBlocks.end(), [&](const QuadTreeNode &blk) { GetSplitFlags(blk, pakObj); });
5829 }
5830
5831 protected:
5832 // Prepare a list of CU inside a coded picure boundary
FillCuList(const QuadTreeNode & cu,std::vector<const QuadTreeNode * > & list)5833 void FillCuList(const QuadTreeNode &cu, std::vector<const QuadTreeNode *> &list)
5834 {
5835 if (cu.m_childBlocks.empty() && ((cu.m_x + cu.m_size) <= m_picWidth) && ((cu.m_y + cu.m_size) <= m_picHeight))
5836 list.push_back(&cu);
5837 else
5838 for_each(cu.m_childBlocks.begin(), cu.m_childBlocks.end(), [&](const QuadTreeNode &blk) { FillCuList(blk, list); });
5839 }
5840
5841 uint32_t m_picWidth = 0;
5842 uint32_t m_picHeight = 0;
5843
5844 public:
5845 std::vector<const QuadTreeNode *> CUs = {};
5846 };
5847
GenerateSkipFrameMbCodeSurface(SkipFrameInfo & skipframeInfo)5848 MOS_STATUS CodechalEncHevcStateG12::GenerateSkipFrameMbCodeSurface(SkipFrameInfo &skipframeInfo)
5849 {
5850 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5851 CODECHAL_ENCODE_FUNCTION_ENTER;
5852
5853 MOS_LOCK_PARAMS lockFlags = {};
5854 lockFlags.WriteOnly = 1;
5855 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &skipframeInfo.m_resMbCodeSkipFrameSurface, &lockFlags);
5856 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
5857 MOS_ZeroMemory(data, m_mbCodeSize + 8 * CODECHAL_CACHELINE_SIZE);
5858
5859 auto pakObjData = (HcpPakObjectG12 *)data;
5860 auto cuData = (EncodeHevcCuDataG12 *)(data + m_mvOffset);
5861
5862 auto const ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
5863 auto const maxNumCuInCtb = (ctbSize / CODECHAL_HEVC_MIN_CU_SIZE) * (ctbSize / CODECHAL_HEVC_MIN_CU_SIZE);
5864 auto const picWidthInCtb = MOS_ROUNDUP_DIVIDE(m_frameWidth, ctbSize);
5865 auto const picHeightInCtb = MOS_ROUNDUP_DIVIDE(m_frameHeight, ctbSize);
5866 CODECHAL_ENCODE_CHK_COND_RETURN(picWidthInCtb <= 0, "Invalid m_frameWidth");
5867 CODECHAL_ENCODE_CHK_COND_RETURN(picHeightInCtb <= 0, "Invalid m_frameHeight");
5868 uint32_t num_tile_columns = m_hevcPicParams->num_tile_columns_minus1 + 1;
5869 uint32_t * tileColumnsStartPosition{new uint32_t[num_tile_columns]{}};
5870
5871 for (uint32_t i = 0; i < (num_tile_columns); i++)
5872 {
5873 if (m_hevcPicParams->tile_column_width[i] == 0)
5874 {
5875 tileColumnsStartPosition[i] = picWidthInCtb;
5876 break;
5877 }
5878
5879 if (i == 0)
5880 {
5881 tileColumnsStartPosition[i] = m_hevcPicParams->tile_column_width[i];
5882 continue;
5883 }
5884
5885 tileColumnsStartPosition[i] = tileColumnsStartPosition[i - 1] + m_hevcPicParams->tile_column_width[i];
5886 }
5887
5888 // Prepare CTB splits for corner cases:
5889 // Last column
5890 QuadTree lastColumnCtb((picWidthInCtb - 1) * ctbSize, 0, m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
5891 lastColumnCtb.BuildQuadTree(m_frameWidth, m_frameHeight);
5892
5893 // Last row
5894 QuadTree lastRowCtb(0, (picHeightInCtb - 1) * ctbSize, m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
5895 lastRowCtb.BuildQuadTree(m_frameWidth, m_frameHeight);
5896
5897 // Right bottom CTB
5898 QuadTree lastColRowCtb((picWidthInCtb - 1) * ctbSize, (picHeightInCtb - 1) * ctbSize, m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
5899 lastColRowCtb.BuildQuadTree(m_frameWidth, m_frameHeight);
5900
5901 uint32_t sliceFirstCtbIdx;
5902 uint32_t ctbXAddr;
5903 uint32_t ctbYAddr;
5904 uint32_t nCUs;
5905 uint32_t tileEnd;
5906 uint32_t tileStart;
5907 for (uint32_t slcIdx = 0; slcIdx < m_numSlices; ++slcIdx)
5908 {
5909 sliceFirstCtbIdx = m_hevcSliceParams[slcIdx].slice_segment_address;
5910 tileEnd = 0;
5911 tileStart = 0;
5912 ctbXAddr = sliceFirstCtbIdx % picWidthInCtb;
5913 ctbYAddr = sliceFirstCtbIdx / picWidthInCtb;
5914 for (uint32_t i = 0; i < num_tile_columns; i++)
5915 {
5916 //Determine what tile slice belongs to
5917 if (ctbXAddr < tileColumnsStartPosition[i])
5918 {
5919 tileEnd = tileColumnsStartPosition[i];
5920 tileStart = (i == 0) ? 0 : tileColumnsStartPosition[i - 1];
5921 break;
5922 }
5923 }
5924
5925 for (uint32_t ctbIdxInSlice = 0; ctbIdxInSlice < m_hevcSliceParams[slcIdx].NumLCUsInSlice; ++ctbIdxInSlice, ++pakObjData, ++ctbXAddr)
5926 {
5927 if (ctbXAddr >= tileEnd)
5928 {
5929 ctbYAddr++;
5930 ctbXAddr = tileStart;
5931 }
5932 pakObjData->DW0.Type = 0x03;
5933 pakObjData->DW0.Opcode = 0x27;
5934 pakObjData->DW0.SubOp = 0x21;
5935 pakObjData->DW0.DwordLength = 0x3;
5936 pakObjData->DW2.Current_LCU_X_Addr = ctbXAddr;
5937 pakObjData->DW2.Current_LCU_Y_Addr = ctbYAddr;
5938 pakObjData->DW4.LCUForceZeroCoeff = 1; // Force skip CUs
5939 pakObjData->DW4.Disable_SAO_On_LCU_Flag = 1;
5940
5941 const bool bCtbCrossRightPicBoundary = (ctbXAddr + 1) * ctbSize > m_frameWidth;
5942 const bool bCtbCrossBottomPicBoundary = (ctbYAddr + 1) * ctbSize > m_frameHeight;
5943 const bool bCtbCrossRightBottomPicBoundary = bCtbCrossRightPicBoundary && bCtbCrossBottomPicBoundary;
5944 if (bCtbCrossRightBottomPicBoundary)
5945 {
5946 QuadTree::GetSplitFlags(lastColRowCtb, *pakObjData);
5947 nCUs = lastColRowCtb.CUs.size();
5948 }
5949 else if (bCtbCrossRightPicBoundary)
5950 {
5951 QuadTree::GetSplitFlags(lastColumnCtb, *pakObjData);
5952 nCUs = lastColumnCtb.CUs.size();
5953 }
5954 else if (bCtbCrossBottomPicBoundary)
5955 {
5956 QuadTree::GetSplitFlags(lastRowCtb, *pakObjData);
5957 nCUs = lastRowCtb.CUs.size();
5958 }
5959 else // default case
5960 {
5961 nCUs = 1;
5962 // For regular CTB, CU splits are not needed. All level values are zero
5963 }
5964 pakObjData->DW1.CU_count_minus1 = nCUs - 1;
5965
5966 if (ctbIdxInSlice == (m_hevcSliceParams[slcIdx].NumLCUsInSlice - 1))
5967 {
5968 pakObjData->DW1.LastCtbOfTileFlag = pakObjData->DW1.LastCtbOfSliceFlag = 1;
5969 pakObjData->DW5 = 0x05000000; // Add batch buffer end flag
5970 }
5971
5972 auto CeilLog2 = [](uint32_t x) {
5973 auto l = 0;
5974 while (x > (1U << l)) l++;
5975 return l;
5976 };
5977
5978 // Fill CU records
5979 for (unsigned int cuIdx = 0; cuIdx < nCUs; ++cuIdx, ++cuData)
5980 {
5981 cuData->DW7_CuPredMode = 1; // Inter
5982
5983 // Note that this can work only for B slices.
5984 // If P slice support appears, we need to have the 2nd skipFrameMbCodeSurface
5985 // When panic mode is triggered backwards reference only should be used
5986 cuData->DW7_InterPredIdcMv0 = 0;
5987 cuData->DW7_InterPredIdcMv1 = 0;
5988
5989 if (bCtbCrossRightBottomPicBoundary)
5990 {
5991 cuData->DW7_CuSize = CeilLog2(lastColRowCtb.CUs[cuIdx]->m_size) - 3;
5992 }
5993 else if (bCtbCrossRightPicBoundary)
5994 {
5995 cuData->DW7_CuSize = CeilLog2(lastColumnCtb.CUs[cuIdx]->m_size) - 3;
5996 }
5997 else if (bCtbCrossBottomPicBoundary)
5998 {
5999 cuData->DW7_CuSize = CeilLog2(lastRowCtb.CUs[cuIdx]->m_size) - 3;
6000 }
6001 else
6002 {
6003 cuData->DW7_CuSize = m_hevcSeqParams->log2_max_coding_block_size_minus3;
6004 }
6005
6006 if (cuData->DW7_CuSize == 3) // 64x64
6007 {
6008 cuData->DW5_TuSize = 0xff; // 4 TUs 32x32
6009 cuData->DW6_TuCountMinus1 = 3;
6010 }
6011 else if (cuData->DW7_CuSize == 2) // 32x32
6012 {
6013 cuData->DW5_TuSize = 3; // 1 TU 32x32
6014 }
6015 else if (cuData->DW7_CuSize == 1) // 16x16
6016 {
6017 cuData->DW5_TuSize = 2; // 1 TU 16x16
6018 }
6019 else // 8x8
6020 {
6021 cuData->DW5_TuSize = 1; // 1 TU 8x8
6022 }
6023 }
6024 cuData += (maxNumCuInCtb - nCUs); // Shift to CUs of next CTB
6025
6026
6027 }
6028 }
6029 m_osInterface->pfnUnlockResource(m_osInterface, &skipframeInfo.m_resMbCodeSkipFrameSurface);
6030 delete[] tileColumnsStartPosition;
6031
6032 skipframeInfo.numSlices = m_numSlices;
6033 uint32_t mbCodeSize = m_mbCodeSize + 8 * CODECHAL_CACHELINE_SIZE;
6034
6035 #if USE_CODECHAL_DEBUG_TOOL
6036 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6037 &skipframeInfo.m_resMbCodeSkipFrameSurface,
6038 CodechalDbgAttr::attrInput,
6039 "SkipFrameSurface",
6040 mbCodeSize,
6041 0,
6042 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6043 #endif
6044
6045 return eStatus;
6046 }
6047
GenerateConcurrentThreadGroupData(MOS_RESOURCE & concurrentThreadGroupData)6048 MOS_STATUS CodechalEncHevcStateG12::GenerateConcurrentThreadGroupData(MOS_RESOURCE &concurrentThreadGroupData)
6049 {
6050 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6051
6052 CODECHAL_ENCODE_FUNCTION_ENTER;
6053
6054 if (!Mos_ResourceIsNull(&concurrentThreadGroupData))
6055 {
6056 MOS_LOCK_PARAMS lockFlags;
6057 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
6058 lockFlags.WriteOnly = 1;
6059 PCONCURRENT_THREAD_GROUP_DATA concurrentTgData = (PCONCURRENT_THREAD_GROUP_DATA)m_osInterface->pfnLockResource(
6060 m_osInterface,
6061 &concurrentThreadGroupData,
6062 &lockFlags);
6063 CODECHAL_ENCODE_CHK_NULL_RETURN(concurrentTgData);
6064
6065 MOS_ZeroMemory(concurrentTgData, concurrentThreadGroupData.iSize);
6066
6067 uint32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
6068 uint32_t residual = (1 << shift) - 1;
6069
6070 uint32_t frameWidthInLCU = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift;
6071 uint32_t frameHeightInLCU = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift;
6072
6073 uint32_t slcCount = 0;
6074 // Currently only using one thread group for each slice. Extend it to multiple soon.
6075 for (uint32_t startLcu = 0; slcCount < m_numSlices; slcCount++, startLcu += m_hevcSliceParams[slcCount].NumLCUsInSlice)
6076 {
6077 uint32_t sliceStartLcu = m_hevcSliceParams[slcCount].slice_segment_address;
6078 uint32_t sliceStartLcux = sliceStartLcu % frameWidthInLCU;
6079 uint32_t sliceStartLcuy = sliceStartLcu / frameWidthInLCU;
6080
6081 uint32_t sliceEndLcu = (uint16_t)(startLcu + m_hevcSliceParams[slcCount].NumLCUsInSlice); // this should be next slice start index
6082 uint32_t sliceEndLcux = sliceStartLcu % frameWidthInLCU;
6083 uint32_t sliceEndLcuy = sliceStartLcu / frameWidthInLCU;
6084
6085 concurrentTgData->CurrSliceStartLcuX = (uint16_t)sliceStartLcux;
6086 concurrentTgData->CurrSliceStartLcuY = (uint16_t)sliceStartLcuy;
6087
6088 concurrentTgData->CurrSliceEndLcuX = (uint16_t)sliceEndLcux;
6089 concurrentTgData->CurrSliceEndLcuY = (uint16_t)sliceEndLcuy;
6090
6091 concurrentTgData->CurrTgStartLcuX = (uint16_t)sliceStartLcux;
6092 concurrentTgData->CurrTgStartLcuY = (uint16_t)sliceStartLcuy;
6093
6094 concurrentTgData->CurrTgEndLcuX = (uint16_t)sliceEndLcux;
6095 concurrentTgData->CurrTgEndLcuY = (uint16_t)sliceEndLcuy;
6096 }
6097
6098 m_osInterface->pfnUnlockResource(
6099 m_osInterface,
6100 &concurrentThreadGroupData);
6101 }
6102 else
6103 {
6104 CODECHAL_ENCODE_ASSERTMESSAGE("Null pointer exception\n");
6105 return MOS_STATUS_NULL_POINTER;
6106 }
6107
6108 return eStatus;
6109 }
6110
EncodeMbEncKernel(CODECHAL_MEDIA_STATE_TYPE encFunctionType)6111 MOS_STATUS CodechalEncHevcStateG12::EncodeMbEncKernel(
6112 CODECHAL_MEDIA_STATE_TYPE encFunctionType)
6113 {
6114 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6115
6116 PerfTagSetting perfTag;
6117 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_MBENC_KERNEL);
6118
6119 // Initialize DSH kernel state
6120 PMHW_KERNEL_STATE kernelState;
6121 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
6122 CODECHAL_WALKER_DEGREE walkerDegree;
6123 MHW_WALKER_PARAMS walkerParams;
6124 uint32_t walkerResolutionX, walkerResolutionY;
6125 bool customDispatchPattern = true;
6126 uint16_t totalThreadNumPerLcu = 1;
6127
6128 if (m_hevcPicParams->CodingType == I_TYPE)
6129 {
6130 encFunctionType = CODECHAL_MEDIA_STATE_HEVC_I_MBENC;
6131 }
6132 else
6133 {
6134 encFunctionType = m_isMaxLcu64 ? CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC : CODECHAL_MEDIA_STATE_HEVC_B_MBENC;
6135 }
6136
6137 if (m_isMaxLcu64)
6138 {
6139 kernelState = &m_mbEncKernelStates[MBENC_LCU64_KRNIDX];
6140 if (m_hevcSeqParams->TargetUsage == 1)
6141 {
6142 walkerResolutionX = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE) >> 6;
6143 walkerResolutionY = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE) >> 6;
6144 }
6145 else
6146 {
6147 walkerResolutionX = 2 * (MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE) >> 6);
6148 walkerResolutionY = 2 * (MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE) >> 6);
6149 }
6150 }
6151 else
6152 {
6153 kernelState = &m_mbEncKernelStates[MBENC_LCU32_KRNIDX];
6154 walkerResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
6155 walkerResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
6156 }
6157
6158 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
6159 walkerCodecParams.WalkerMode = m_walkerMode;
6160 walkerCodecParams.dwResolutionX = walkerResolutionX;
6161 walkerCodecParams.dwResolutionY = walkerResolutionY;
6162 walkerCodecParams.dwNumSlices = m_numSlices;
6163 walkerCodecParams.usTotalThreadNumPerLcu = totalThreadNumPerLcu;
6164 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCustomDispatchPattern(&walkerParams, &walkerCodecParams));
6165
6166 // If Single Task Phase is not enabled, use BT count for the kernel state.
6167 if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
6168 {
6169 uint32_t maxBtCount = m_singleTaskPhaseSupported ? m_maxBtCount : kernelState->KernelParams.iBTCount;
6170 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
6171 m_stateHeapInterface,
6172 maxBtCount));
6173 m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
6174 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
6175 }
6176
6177 // Set up the DSH/SSH as normal
6178 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
6179 m_stateHeapInterface,
6180 kernelState,
6181 false,
6182 0,
6183 false,
6184 m_storeData));
6185
6186 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
6187 MOS_ZeroMemory(&idParams, sizeof(idParams));
6188 idParams.pKernelState = kernelState;
6189 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
6190 m_stateHeapInterface,
6191 1,
6192 &idParams));
6193
6194 // Generate Lcu Level Data
6195 CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateLcuLevelData(m_lcuLevelInputDataSurface[m_currRecycledBufIdx]));
6196
6197 // Generate Concurrent Thread Group Data
6198 if (m_swScoreboardState->GetDependencyPattern() == dependencyWavefront26Degree ||
6199 m_swScoreboardState->GetDependencyPattern() == dependencyWavefront26ZDegree ||
6200 m_swScoreboardState->GetDependencyPattern() == dependencyWavefront26XDegree)
6201 {
6202 // Generate Concurrent Thread Group Data
6203 uint32_t curIdx = m_currRecycledBufIdx;
6204
6205 CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateConcurrentThreadGroupData(m_encBCombinedBuffer1[curIdx].sResource));
6206 }
6207 else
6208 {
6209 // For 45D walking patter, kernel generates the concurrent thread group by itself. No need for driver to generate.
6210 }
6211
6212 // setup curbe
6213 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeMbEncBKernel());
6214
6215 CODECHAL_DEBUG_TOOL(
6216 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6217 encFunctionType,
6218 MHW_DSH_TYPE,
6219 kernelState));
6220
6221 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
6222 encFunctionType,
6223 kernelState));
6224 //CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpHEVCMbEncCurbeG12(
6225 //m_debugInterface,
6226 //encFunctionType,
6227 //&m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource));
6228
6229 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6230 encFunctionType,
6231 MHW_ISH_TYPE,
6232 kernelState));)
6233
6234 MOS_COMMAND_BUFFER cmdBuffer;
6235 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
6236
6237 SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
6238 sendKernelCmdsParams.EncFunctionType = encFunctionType;
6239 sendKernelCmdsParams.pKernelState = kernelState;
6240 // TO DO : Remove scoreboard from VFE STATE Command
6241 sendKernelCmdsParams.bEnableCustomScoreBoard = false;
6242 sendKernelCmdsParams.pCustomScoreBoard = nullptr;
6243 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
6244
6245 // Add binding table
6246 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
6247 m_stateHeapInterface,
6248 kernelState));
6249
6250 // send surfaces
6251 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMbEncSurfacesBKernel(&cmdBuffer));
6252
6253 CODECHAL_DEBUG_TOOL(
6254 if (m_pictureCodingType == I_TYPE) {
6255 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
6256 &m_lcuLevelInputDataSurface[m_currRecycledBufIdx],
6257 CodechalDbgAttr::attrOutput,
6258 "HEVC_I_MBENC_LcuLevelData_In",
6259 CODECHAL_MEDIA_STATE_HEVC_I_MBENC));
6260 } else {
6261 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
6262 &m_lcuLevelInputDataSurface[m_currRecycledBufIdx],
6263 CodechalDbgAttr::attrOutput,
6264 "HEVC_B_MBENC_LcuLevelData_In",
6265 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
6266 })
6267
6268 if ((encFunctionType == CODECHAL_MEDIA_STATE_HEVC_B_MBENC) || (encFunctionType == CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC))
6269 {
6270 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6271 &m_encConstantTableForB.sResource,
6272 "HEVC_B_MBENC_ConstantData_In",
6273 CodechalDbgAttr::attrOutput,
6274 m_encConstantTableForB.dwSize,
6275 0,
6276 encFunctionType)));
6277 }
6278
6279 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObjectWalkerCmd(
6280 &cmdBuffer,
6281 &walkerParams));
6282
6283 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
6284
6285 // Add dump for MBEnc surface state heap here
6286 CODECHAL_DEBUG_TOOL(
6287 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6288 encFunctionType,
6289 MHW_SSH_TYPE,
6290 kernelState));)
6291
6292 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
6293 m_stateHeapInterface,
6294 kernelState));
6295
6296 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6297 {
6298 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
6299 m_stateHeapInterface));
6300 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(
6301 &cmdBuffer,
6302 nullptr));
6303 }
6304
6305 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
6306 &cmdBuffer,
6307 encFunctionType,
6308 nullptr)));
6309
6310 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
6311
6312 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
6313
6314 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6315 {
6316 m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
6317 m_lastTaskInPhase = false;
6318 }
6319
6320 CODECHAL_DEBUG_TOOL(
6321 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6322 &m_debugSurface[0].sResource,
6323 CodechalDbgAttr::attrOutput,
6324 "DebugDataSurface_Out0",
6325 m_debugSurface[0].dwSize,
6326 0,
6327 encFunctionType));
6328 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6329 &m_debugSurface[1].sResource,
6330 CodechalDbgAttr::attrOutput,
6331 "DebugDataSurface_Out1",
6332 m_debugSurface[1].dwSize,
6333 0,
6334 encFunctionType));
6335 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6336 &m_debugSurface[2].sResource,
6337 CodechalDbgAttr::attrOutput,
6338 "DebugDataSurface_Out2",
6339 m_debugSurface[2].dwSize,
6340 0,
6341 encFunctionType));
6342 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6343 &m_debugSurface[3].sResource,
6344 CodechalDbgAttr::attrOutput,
6345 "DebugDataSurface_Out3",
6346 m_debugSurface[3].dwSize,
6347 0,
6348 encFunctionType)););
6349
6350 #if 0 // the dump should be done in the GetStatusReport. However, if ENC causes PAK hangs-up, there is no way to get them.
6351 {
6352 CODECHAL_DEBUG_TOOL(
6353 CODEC_REF_LIST currRefList;
6354
6355 currRefList = *(pRefList[m_currReconstructedPic.FrameIdx]);
6356 currRefList.RefPic = m_currOriginalPic;
6357
6358 m_debugInterface->CurrPic = m_currOriginalPic;
6359 m_debugInterface->dwBufferDumpFrameNum = m_storeData;
6360 m_debugInterface->wFrameType = m_pictureCodingType;
6361
6362 //CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeMbEncMbPakOutput(
6363 // m_debugInterface,
6364 // this,
6365 // &currRefList,
6366 // (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
6367 // CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
6368 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6369 &currRefList.resRefMbCodeBuffer,
6370 CodechalDbgAttr::attrOutput,
6371 "MbCode",
6372 m_picWidthInMb * m_frameFieldHeightInMb*64,
6373 CodecHal_PictureIsBottomField(currRefList.RefPic) ? m_frameFieldHeightInMb * m_picWidthInMb * 64 : 0,
6374 (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
6375 CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
6376
6377 if (m_mvDataSize)
6378 {
6379 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6380 &currRefList.resRefMvDataBuffer,
6381 CodechalDbgAttr::attrOutput,
6382 "MbData",
6383 m_picWidthInMb * m_frameFieldHeightInMb * (32 * 4),
6384 CodecHal_PictureIsBottomField(currRefList.RefPic) ? MOS_ALIGN_CEIL(m_frameFieldHeightInMb * m_picWidthInMb * (32 * 4), 0x1000) : 0,
6385 (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
6386 CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
6387 }
6388 if (CodecHalIsFeiEncode(m_codecFunction))
6389 {
6390 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6391 &m_resDistortionBuffer,
6392 CodechalDbgAttr::attrOutput,
6393 "DistortionSurf",
6394 m_picWidthInMb * m_frameFieldHeightInMb * 48,
6395 CodecHal_PictureIsBottomField(currRefList.RefPic) ? MOS_ALIGN_CEIL(m_frameFieldHeightInMb * m_picWidthInMb * 48, 0x1000) : 0,
6396 (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
6397 CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
6398 }
6399
6400 )
6401
6402 CODECHAL_DEBUG_TOOL(
6403 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
6404 this,
6405 &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
6406 m_encBCombinedBuffer2[m_currRecycledBufIdx].dwSize,
6407 (const char*)"_Hevc_CombinedBuffer2",
6408 false));
6409 );
6410
6411 // Dump SW scoreboard surface - Output of MBENC
6412 CODECHAL_DEBUG_TOOL(
6413 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpHevcEncodeSwScoreboardSurface(
6414 m_debugInterface,
6415 m_swScoreboardState->GetCurSwScoreboardSurface(), false));
6416 );
6417
6418 CODECHAL_DEBUG_TOOL(
6419 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
6420 this,
6421 &m_encConstantTableForB.sResource,
6422 m_encConstantTableForB.dwSize,
6423 (const char*)"_Hevc_EncConstantTable",
6424 true));
6425 );
6426
6427 CODECHAL_DEBUG_TOOL(
6428 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
6429 this,
6430 &m_debugSurface[0].sResource,
6431 m_debugSurface[0].dwSize,
6432 (const char*)"_Hevc_DebugDump0",
6433 true));
6434 );
6435
6436 CODECHAL_DEBUG_TOOL(
6437 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
6438 this,
6439 &m_debugSurface[1].sResource,
6440 m_debugSurface[1].dwSize,
6441 (const char*)"_Hevc_DebugDump1",
6442 true));
6443 );
6444
6445 CODECHAL_DEBUG_TOOL(
6446 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
6447 this,
6448 &m_debugSurface[2].sResource,
6449 m_debugSurface[2].dwSize,
6450 (const char*)"_Hevc_DebugDump2",
6451 true));
6452 );
6453
6454 CODECHAL_DEBUG_TOOL(
6455 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
6456 this,
6457 &m_debugSurface[3].sResource,
6458 m_debugSurface[3].dwSize,
6459 (const char*)"_Hevc_DebugDump3",
6460 true));
6461 );
6462
6463 CODECHAL_DEBUG_TOOL(
6464 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
6465 &m_currPicWithReconBoundaryPix,
6466 CodechalDbgAttr::attrReconstructedSurface,
6467 "ReconSurf")));
6468 }
6469 #endif
6470
6471 return eStatus;
6472 }
6473
EncodeBrcInitResetKernel()6474 MOS_STATUS CodechalEncHevcStateG12::EncodeBrcInitResetKernel()
6475 {
6476 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6477
6478 CODECHAL_ENCODE_FUNCTION_ENTER;
6479
6480 CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
6481
6482 CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx = m_brcInit ? CODECHAL_HEVC_BRC_INIT : CODECHAL_HEVC_BRC_RESET;
6483
6484 // Initialize DSH kernel state
6485 PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
6486
6487 // If Single Task Phase is not enabled, use BT count for the kernel state.
6488 if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
6489 {
6490 uint32_t maxBtCount = m_singleTaskPhaseSupported ? m_maxBtCount : kernelState->KernelParams.iBTCount;
6491 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
6492 m_stateHeapInterface,
6493 maxBtCount));
6494 m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
6495 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
6496 }
6497
6498 // Set up the DSH/SSH as normal
6499 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
6500 m_stateHeapInterface,
6501 kernelState,
6502 false,
6503 0,
6504 false,
6505 m_storeData));
6506
6507 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
6508 MOS_ZeroMemory(&idParams, sizeof(idParams));
6509 idParams.pKernelState = kernelState;
6510 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
6511 m_stateHeapInterface,
6512 1,
6513 &idParams));
6514
6515 // Setup curbe for BrcInitReset kernel
6516 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeBrcInitReset(
6517 brcKrnIdx));
6518
6519 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_BRC_INIT_RESET;
6520 CODECHAL_DEBUG_TOOL(
6521 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6522 encFunctionType,
6523 MHW_DSH_TYPE,
6524 kernelState));
6525 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
6526 encFunctionType,
6527 kernelState));
6528 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6529 encFunctionType,
6530 MHW_ISH_TYPE,
6531 kernelState));)
6532
6533 MOS_COMMAND_BUFFER cmdBuffer;
6534 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
6535
6536 SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
6537 sendKernelCmdsParams.EncFunctionType = encFunctionType;
6538 sendKernelCmdsParams.pKernelState = kernelState;
6539 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
6540
6541 // Add binding table
6542 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
6543 m_stateHeapInterface,
6544 kernelState));
6545
6546 // Send surfaces for BrcInitReset Kernel
6547 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendBrcInitResetSurfaces(&cmdBuffer, brcKrnIdx));
6548
6549 MHW_MEDIA_OBJECT_PARAMS mediaObjectParams;
6550 MOS_ZeroMemory(&mediaObjectParams, sizeof(mediaObjectParams));
6551
6552 MediaObjectInlineData mediaObjectInlineData;
6553 MOS_ZeroMemory(&mediaObjectInlineData, sizeof(mediaObjectInlineData));
6554 mediaObjectParams.pInlineData = &mediaObjectInlineData;
6555 mediaObjectParams.dwInlineDataSize = sizeof(mediaObjectInlineData);
6556 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObject(
6557 &cmdBuffer,
6558 nullptr,
6559 &mediaObjectParams));
6560
6561 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
6562
6563 // Add dump for BrcInitReset surface state heap here
6564 CODECHAL_DEBUG_TOOL(
6565 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6566 encFunctionType,
6567 MHW_SSH_TYPE,
6568 kernelState));)
6569
6570 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
6571 m_stateHeapInterface,
6572 kernelState));
6573
6574 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6575 {
6576 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
6577 m_stateHeapInterface));
6578 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(
6579 &cmdBuffer,
6580 nullptr));
6581 }
6582
6583 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
6584 &cmdBuffer,
6585 encFunctionType,
6586 nullptr)));
6587
6588 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
6589
6590 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
6591
6592 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6593 {
6594 m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
6595 m_lastTaskInPhase = false;
6596 }
6597
6598 return eStatus;
6599 }
6600
EncodeBrcFrameUpdateKernel()6601 MOS_STATUS CodechalEncHevcStateG12::EncodeBrcFrameUpdateKernel()
6602 {
6603 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6604
6605 CODECHAL_ENCODE_FUNCTION_ENTER;
6606
6607 PerfTagSetting perfTag;
6608 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE);
6609
6610 CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx = CODECHAL_HEVC_BRC_FRAME_UPDATE;
6611
6612 // Initialize DSH kernel state
6613 PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
6614
6615 // If Single Task Phase is not enabled, use BT count for the kernel state.
6616 if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
6617 {
6618 uint32_t maxBtCount = m_singleTaskPhaseSupported ? m_maxBtCount : kernelState->KernelParams.iBTCount;
6619 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
6620 m_stateHeapInterface,
6621 maxBtCount));
6622 m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
6623 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
6624 }
6625
6626 // Set up the DSH/SSH as normal
6627 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
6628 m_stateHeapInterface,
6629 kernelState,
6630 false,
6631 0,
6632 false,
6633 m_storeData));
6634
6635 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
6636 MOS_ZeroMemory(&idParams, sizeof(idParams));
6637 idParams.pKernelState = kernelState;
6638 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
6639 m_stateHeapInterface,
6640 1,
6641 &idParams));
6642
6643 // Setup curbe for BrcFrameUpdate kernel
6644 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeBrcUpdate(
6645 brcKrnIdx));
6646
6647 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_BRC_UPDATE;
6648 CODECHAL_DEBUG_TOOL(
6649 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6650 encFunctionType,
6651 MHW_DSH_TYPE,
6652 kernelState));
6653 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
6654 encFunctionType,
6655 kernelState));
6656 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6657 encFunctionType,
6658 MHW_ISH_TYPE,
6659 kernelState));)
6660
6661 MOS_COMMAND_BUFFER cmdBuffer;
6662 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
6663
6664 SendKernelCmdsParams sendKernelCmdsParams;
6665 sendKernelCmdsParams = SendKernelCmdsParams();
6666 sendKernelCmdsParams.EncFunctionType = encFunctionType;
6667 sendKernelCmdsParams.pKernelState = kernelState;
6668 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
6669
6670 // Add binding table
6671 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
6672 m_stateHeapInterface,
6673 kernelState));
6674
6675 // Send surfaces for BrcFrameUpdate Kernel
6676 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendBrcFrameUpdateSurfaces(&cmdBuffer));
6677
6678 MHW_MEDIA_OBJECT_PARAMS mediaObjectParams;
6679 MOS_ZeroMemory(&mediaObjectParams, sizeof(mediaObjectParams));
6680
6681 MediaObjectInlineData mediaObjectInlineData;
6682 MOS_ZeroMemory(&mediaObjectInlineData, sizeof(mediaObjectInlineData));
6683 mediaObjectParams.pInlineData = &mediaObjectInlineData;
6684 mediaObjectParams.dwInlineDataSize = sizeof(mediaObjectInlineData);
6685 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObject(
6686 &cmdBuffer,
6687 nullptr,
6688 &mediaObjectParams));
6689
6690 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
6691
6692 // Add dump for BrcFrameUpdate surface state heap here
6693 CODECHAL_DEBUG_TOOL(
6694 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6695 encFunctionType,
6696 MHW_SSH_TYPE,
6697 kernelState));)
6698 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
6699 m_stateHeapInterface,
6700 kernelState));
6701
6702 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6703 {
6704 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
6705 m_stateHeapInterface));
6706 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(
6707 &cmdBuffer,
6708 nullptr));
6709 }
6710
6711 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
6712 &cmdBuffer,
6713 encFunctionType,
6714 nullptr)));
6715
6716 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
6717
6718 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
6719
6720 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6721 {
6722 m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
6723 m_lastTaskInPhase = false;
6724 }
6725
6726 return eStatus;
6727 }
6728
EncodeBrcLcuUpdateKernel()6729 MOS_STATUS CodechalEncHevcStateG12::EncodeBrcLcuUpdateKernel()
6730 {
6731 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6732
6733 CODECHAL_ENCODE_FUNCTION_ENTER;
6734
6735 PerfTagSetting perfTag;
6736 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE_LCU);
6737
6738 CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx = CODECHAL_HEVC_BRC_LCU_UPDATE;
6739
6740 // Initialize DSH kernel state
6741 PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
6742
6743 // If Single Task Phase is not enabled, use BT count for the kernel state.
6744 if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
6745 {
6746 uint32_t maxBtCount = m_singleTaskPhaseSupported ? m_maxBtCount : kernelState->KernelParams.iBTCount;
6747 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
6748 m_stateHeapInterface,
6749 maxBtCount));
6750 m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
6751 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
6752 }
6753
6754 // Set up the DSH/SSH as normal
6755 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
6756 m_stateHeapInterface,
6757 kernelState,
6758 false,
6759 0,
6760 false,
6761 m_storeData));
6762
6763 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
6764 MOS_ZeroMemory(&idParams, sizeof(idParams));
6765 idParams.pKernelState = kernelState;
6766 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
6767 m_stateHeapInterface,
6768 1,
6769 &idParams));
6770
6771 // Setup curbe for BrcFrameUpdate kernel
6772 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeBrcUpdate(
6773 brcKrnIdx));
6774
6775 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_MB_BRC_UPDATE;
6776 CODECHAL_DEBUG_TOOL(
6777 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6778 encFunctionType,
6779 MHW_DSH_TYPE,
6780 kernelState));
6781 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
6782 encFunctionType,
6783 kernelState));
6784 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6785 encFunctionType,
6786 MHW_ISH_TYPE,
6787 kernelState));)
6788
6789 MOS_COMMAND_BUFFER cmdBuffer;
6790 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
6791
6792 SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
6793 sendKernelCmdsParams.EncFunctionType = encFunctionType;
6794 sendKernelCmdsParams.pKernelState = kernelState;
6795 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
6796
6797 // Add binding table
6798 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
6799 m_stateHeapInterface,
6800 kernelState));
6801
6802 if (m_hevcPicParams->NumROI)
6803 {
6804 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupROISurface());
6805 }
6806
6807 // Send surfaces for BrcFrameUpdate Kernel
6808 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendBrcLcuUpdateSurfaces(&cmdBuffer));
6809
6810 // Program Media walker
6811 uint32_t resolutionX, resolutionY;
6812 resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth);
6813 resolutionX = MOS_ROUNDUP_SHIFT(resolutionX, 4);
6814 resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight);
6815 resolutionY = MOS_ROUNDUP_SHIFT(resolutionY, 3);
6816 CODECHAL_ENCODE_ASSERTMESSAGE("LucBRC thread space = %d x %d", resolutionX, resolutionY);
6817
6818 MHW_WALKER_PARAMS walkerParams;
6819 MOS_ZeroMemory(&walkerParams, sizeof(walkerParams));
6820
6821 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
6822 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
6823 walkerCodecParams.WalkerMode = m_walkerMode;
6824 walkerCodecParams.dwResolutionX = resolutionX;
6825 walkerCodecParams.dwResolutionY = resolutionY;
6826 walkerCodecParams.bNoDependency = true;
6827 walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported;
6828 walkerCodecParams.ucGroupId = m_groupId;
6829 walkerCodecParams.wPictureCodingType = m_pictureCodingType;
6830 walkerCodecParams.bUseScoreboard = false;
6831
6832 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
6833 m_hwInterface,
6834 &walkerParams,
6835 &walkerCodecParams));
6836
6837 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObjectWalkerCmd(
6838 &cmdBuffer,
6839 &walkerParams));
6840
6841 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
6842
6843 // Add dump for BrcFrameUpdate surface state heap here
6844 CODECHAL_DEBUG_TOOL(
6845 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
6846 encFunctionType,
6847 MHW_SSH_TYPE,
6848 kernelState));)
6849
6850 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
6851 m_stateHeapInterface,
6852 kernelState));
6853
6854 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6855 {
6856 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
6857 m_stateHeapInterface));
6858 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(
6859 &cmdBuffer,
6860 nullptr));
6861 }
6862
6863 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
6864 &cmdBuffer,
6865 encFunctionType,
6866 nullptr)));
6867
6868 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
6869
6870 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
6871
6872 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
6873 {
6874 m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
6875 m_lastTaskInPhase = false;
6876 }
6877
6878 return eStatus;
6879 }
6880
EncodeKernelFunctions()6881 MOS_STATUS CodechalEncHevcStateG12::EncodeKernelFunctions()
6882 {
6883 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6884
6885 if (m_pakOnlyTest)
6886 {
6887 // Skip ENC when PAK only mode is enabled
6888 return eStatus;
6889 }
6890
6891 if (m_pictureCodingType == P_TYPE)
6892 {
6893 m_lowDelay = true;
6894 }
6895
6896 if (m_hevcPicParams->bUsedAsRef || m_brcEnabled)
6897 {
6898 m_currRefSync = &m_refSync[m_currMbCodeIdx];
6899
6900 // Check if the signal obj has been used before
6901 if (!m_hevcSeqParams->ParallelBRC && (m_currRefSync->uiSemaphoreObjCount || m_currRefSync->bInUsed))
6902 {
6903 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
6904 syncParams.GpuContext = m_renderContext;
6905 syncParams.presSyncResource = &m_currRefSync->resSyncObject;
6906 syncParams.uiSemaphoreCount = m_currRefSync->uiSemaphoreObjCount;
6907
6908 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
6909 m_currRefSync->uiSemaphoreObjCount = 0;
6910 m_currRefSync->bInUsed = false;
6911 }
6912 }
6913 else
6914 {
6915 m_currRefSync = nullptr;
6916 }
6917
6918 //Reset to use a different performance tag ID
6919 m_osInterface->pfnResetPerfBufferID(m_osInterface);
6920
6921 m_firstTaskInPhase = true;
6922 m_lastTaskInPhase = false;
6923
6924 m_brcInputForEncKernelBuffer = &m_encBCombinedBuffer2[m_currRecycledBufIdx];
6925
6926 // BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface
6927 // BRC init is called once even for CQP mode when ROI is enabled, hence also checking for first frame flag
6928 if ((m_brcEnabled && (m_brcInit || m_brcReset)) || (m_firstFrame && m_hevcPicParams->NumROI))
6929 {
6930 m_firstTaskInPhase = m_lastTaskInPhase = true;
6931 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hevcBrcG12->EncodeBrcInitResetKernel());
6932 m_brcInit = m_brcReset = false;
6933 }
6934
6935 m_firstTaskInPhase = true;
6936 m_lastTaskInPhase = false;
6937
6938 CodechalEncodeSwScoreboard::KernelParams swScoreboardKernelParames;
6939 MOS_ZeroMemory(&swScoreboardKernelParames, sizeof(swScoreboardKernelParames));
6940
6941 InitSwScoreBoardParams(swScoreboardKernelParames);
6942
6943 if (m_useSwInitScoreboard)
6944 {
6945 SetupSwScoreBoard(&swScoreboardKernelParames);
6946 }
6947 else
6948 {
6949 // Call SW scoreboard Init kernel used by MBEnc kernel
6950 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_swScoreboardState->Execute(&swScoreboardKernelParames));
6951 }
6952
6953 // Dump SW scoreboard surface - Output of SW scoreboard Init Kernel and Input to MBENC
6954 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
6955 m_swScoreboardState->GetCurSwScoreboardSurface(),
6956 CodechalDbgAttr::attrInput,
6957 "InitSWScoreboard_In",
6958 CODECHAL_MEDIA_STATE_SW_SCOREBOARD_INIT)));
6959
6960 // Csc, Downscaling, and/or 10-bit to 8-bit conversion
6961 CODECHAL_ENCODE_CHK_NULL_RETURN(m_cscDsState);
6962
6963 CodechalEncodeCscDs::KernelParams cscScalingKernelParams;
6964 MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams));
6965 cscScalingKernelParams.bLastTaskInPhaseCSC =
6966 cscScalingKernelParams.bLastTaskInPhase4xDS = !(m_16xMeSupported || m_hmeEnabled || m_brcEnabled);
6967 cscScalingKernelParams.bLastTaskInPhase16xDS = !(m_32xMeSupported || m_hmeEnabled || m_brcEnabled);
6968 cscScalingKernelParams.bLastTaskInPhase32xDS = !(m_hmeEnabled || m_brcEnabled);
6969
6970 CodechalEncodeCscDsG12::HevcExtKernelParams hevcExtCscParams;
6971 MOS_ZeroMemory(&hevcExtCscParams, sizeof(hevcExtCscParams));
6972
6973 if (m_isMaxLcu64)
6974 {
6975 hevcExtCscParams.bHevcEncHistorySum = true;
6976 hevcExtCscParams.bUseLCU32 = false;
6977 hevcExtCscParams.presHistoryBuffer = &m_encBCombinedBuffer2[m_lastRecycledBufIdx].sResource;
6978 hevcExtCscParams.dwSizeHistoryBuffer = m_historyOutBufferSize;
6979 hevcExtCscParams.dwOffsetHistoryBuffer = m_historyOutBufferOffset;
6980 hevcExtCscParams.presHistorySumBuffer = &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource;
6981 hevcExtCscParams.dwSizeHistorySumBuffer = sizeof(MBENC_COMBINED_BUFFER2::ucHistoryInBuffer);
6982 hevcExtCscParams.dwOffsetHistorySumBuffer = sizeof(MBENC_COMBINED_BUFFER2::ucBrcCombinedEncBuffer);
6983 hevcExtCscParams.presMultiThreadTaskBuffer = &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource;
6984 hevcExtCscParams.dwSizeMultiThreadTaskBuffer = m_threadTaskBufferSize;
6985 hevcExtCscParams.dwOffsetMultiThreadTaskBuffer = m_threadTaskBufferOffset;
6986 cscScalingKernelParams.hevcExtParams = &hevcExtCscParams;
6987 }
6988 else
6989 {
6990 cscScalingKernelParams.hevcExtParams = nullptr; // LCU32 does not require history buffers
6991 }
6992
6993 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->KernelFunctions(&cscScalingKernelParams));
6994
6995 if (m_hmeEnabled)
6996 {
6997 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel());
6998 }
6999 else if (m_brcEnabled && m_hevcPicParams->CodingType == I_TYPE)
7000 {
7001 m_lastTaskInPhase = true;
7002 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeIntraDistKernel());
7003 }
7004
7005 // BRC + MbEnc in second task phase
7006 m_firstTaskInPhase = true;
7007 m_lastTaskInPhase = false;
7008
7009 // Wait for PAK if necessary
7010 CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitForPak());
7011
7012 // ROI uses the BRC LCU update kernel, even in CQP. So we will call it
7013 if (m_hevcPicParams->NumROI && !m_brcEnabled)
7014 {
7015 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hevcBrcG12->EncodeBrcLcuUpdateKernel());
7016 m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame;
7017
7018 CODECHAL_DEBUG_TOOL(
7019 if (!Mos_ResourceIsNull(&m_brcBuffers.sBrcMbQpBuffer.OsResource)) {
7020 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7021 &m_brcBuffers.sBrcMbQpBuffer.OsResource,
7022 CodechalDbgAttr::attrOutput,
7023 "MbQp",
7024 m_brcBuffers.sBrcMbQpBuffer.dwPitch * m_brcBuffers.sBrcMbQpBuffer.dwHeight,
7025 m_brcBuffers.dwBrcMbQpBottomFieldOffset,
7026 CODECHAL_MEDIA_STATE_BRC_UPDATE));
7027 } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(&m_brcDistortion->OsResource,
7028 CodechalDbgAttr::attrInput,
7029 "BrcDist_AfterLcuBrc",
7030 m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
7031 m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
7032 CODECHAL_MEDIA_STATE_BRC_UPDATE));)
7033 }
7034
7035 if (m_brcEnabled)
7036 {
7037 m_hevcBrcG12->m_brcNumPakPasses = m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses();
7038 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hevcBrcG12->EncodeBrcFrameUpdateKernel());
7039
7040 CODECHAL_DEBUG_TOOL(
7041 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7042 &m_brcDistortion->OsResource,
7043 CodechalDbgAttr::attrInput,
7044 "BrcDist_AfterFrameBrc",
7045 m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
7046 m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
7047 CODECHAL_MEDIA_STATE_BRC_UPDATE));
7048 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7049 &m_brcBuffers.resBrcHistoryBuffer,
7050 CodechalDbgAttr::attrOutput,
7051 "HistoryWrite",
7052 m_brcHistoryBufferSize,
7053 0,
7054 CODECHAL_MEDIA_STATE_BRC_UPDATE));
7055 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7056 &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
7057 CodechalDbgAttr::attrOutput,
7058 "ImgStateWrite",
7059 BRC_IMG_STATE_SIZE_PER_PASS_G11 * m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(),
7060 0,
7061 CODECHAL_MEDIA_STATE_BRC_UPDATE));)
7062
7063 CODECHAL_DEBUG_TOOL(
7064 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7065 &m_brcDistortion->OsResource,
7066 CodechalDbgAttr::attrInput,
7067 "BrcDist_AfterFrameBrcUpdate",
7068 m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
7069 m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
7070 CODECHAL_MEDIA_STATE_BRC_UPDATE));
7071 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7072 &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
7073 CodechalDbgAttr::attrOutput,
7074 "ImgStateWrite",
7075 BRC_IMG_STATE_SIZE_PER_PASS * m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(),
7076 0,
7077 CODECHAL_MEDIA_STATE_BRC_UPDATE));
7078 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7079 &m_brcBuffers.resBrcHistoryBuffer,
7080 CodechalDbgAttr::attrOutput,
7081 "HistoryWrite",
7082 m_brcHistoryBufferSize,
7083 0,
7084 CODECHAL_MEDIA_STATE_BRC_UPDATE));
7085 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7086 &m_brcBuffers.sBrcIntraDistortionBuffer.OsResource,
7087 CodechalDbgAttr::attrOutput,
7088 "Idistortion",
7089 m_brcBuffers.sBrcIntraDistortionBuffer.dwWidth * m_brcBuffers.sBrcIntraDistortionBuffer.dwHeight,
7090 0,
7091 CODECHAL_MEDIA_STATE_BRC_UPDATE));)
7092
7093 if (m_lcuBrcEnabled || m_hevcPicParams->NumROI)
7094 {
7095 // LCU-based BRC needs to have frame-based one to be call first in order to get HCP_IMG_STATE command result
7096 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hevcBrcG12->EncodeBrcLcuUpdateKernel());
7097 m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame;
7098 }
7099 else
7100 {
7101 m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame;
7102 }
7103
7104 CODECHAL_DEBUG_TOOL(
7105 if (!Mos_ResourceIsNull(&m_brcBuffers.sBrcMbQpBuffer.OsResource)) {
7106 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
7107 &m_brcBuffers.sBrcMbQpBuffer.OsResource,
7108 CodechalDbgAttr::attrOutput,
7109 "MbQp",
7110 m_brcBuffers.sBrcMbQpBuffer.dwPitch * m_brcBuffers.sBrcMbQpBuffer.dwHeight,
7111 m_brcBuffers.dwBrcMbQpBottomFieldOffset,
7112 CODECHAL_MEDIA_STATE_BRC_UPDATE));
7113 } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(&m_brcDistortion->OsResource,
7114 CodechalDbgAttr::attrInput,
7115 "BrcDist_AfterLcuBrcUpdate",
7116 m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
7117 m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
7118 CODECHAL_MEDIA_STATE_BRC_UPDATE));)
7119 }
7120
7121 m_useWeightedSurfaceForL0 = false;
7122 m_useWeightedSurfaceForL1 = false;
7123
7124 //currently only support same weightoffset for all slices, and only support Luma weighted prediction
7125 auto slicetype = m_hevcSliceParams->slice_type;
7126 if (m_weightedPredictionSupported && !m_feiEnable &&
7127 ((slicetype == CODECHAL_HEVC_P_SLICE && m_hevcPicParams->weighted_pred_flag) ||
7128 (slicetype == CODECHAL_HEVC_B_SLICE && m_hevcPicParams->weighted_bipred_flag)))
7129 {
7130 uint32_t LumaWeightFlag[2] = {0}; //[L0, L1]
7131 CodechalEncodeWP::SliceParams sliceWPParams;
7132 MOS_FillMemory((void *)&sliceWPParams, sizeof(sliceWPParams), 0);
7133
7134 //populate the slice WP parameter structure
7135 sliceWPParams.luma_log2_weight_denom = m_hevcSliceParams->luma_log2_weight_denom; // luma weidht denom
7136 for (auto i = 0; i < 2; i++)
7137 {
7138 for (auto j = 0; j < CODEC_MAX_NUM_REF_FRAME_HEVC; j++)
7139 {
7140 sliceWPParams.weights[i][j][0][0] = (1 << m_hevcSliceParams->luma_log2_weight_denom) +
7141 m_hevcSliceParams->delta_luma_weight[i][j]; //Luma weight
7142 sliceWPParams.weights[i][j][0][1] = m_hevcSliceParams->luma_offset[i][j]; //Luma offset
7143
7144 if (m_hevcSliceParams->delta_luma_weight[i][j] || m_hevcSliceParams->luma_offset[i][j])
7145 {
7146 LumaWeightFlag[i] |= (1 << j);
7147 }
7148 }
7149 }
7150
7151 CodechalEncodeWP::KernelParams wpKernelParams;
7152 MOS_FillMemory((void *)&wpKernelParams, sizeof(wpKernelParams), 0);
7153 wpKernelParams.useWeightedSurfaceForL0 = &m_useWeightedSurfaceForL0;
7154 wpKernelParams.useWeightedSurfaceForL1 = &m_useWeightedSurfaceForL1;
7155 wpKernelParams.slcWPParams = &sliceWPParams;
7156
7157 // Weighted Prediction to be applied for L0
7158 for (auto i = 0; i < (m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1); i++)
7159 {
7160 if ((LumaWeightFlag[LIST_0] & (1 << i)) && (i < CODEC_MAX_FORWARD_WP_FRAME))
7161 {
7162 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][i];
7163 if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
7164 {
7165 MOS_SURFACE refFrameInput;
7166 uint8_t frameIndex = m_picIdx[refPic.FrameIdx].ucPicIdx;
7167 refFrameInput = m_hevcPicParams->bUseRawPicForRef ? m_refList[frameIndex]->sRefRawBuffer : m_refList[frameIndex]->sRefReconBuffer;
7168
7169 //Weighted Prediction for ith forward reference frame
7170 wpKernelParams.useRefPicList1 = false;
7171 wpKernelParams.wpIndex = i;
7172 wpKernelParams.refFrameInput = &refFrameInput;
7173 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_wpState->Execute(&wpKernelParams));
7174 }
7175 }
7176 }
7177
7178 // Weighted Predition to be applied for L1
7179 if (slicetype == CODECHAL_HEVC_B_SLICE && m_hevcPicParams->weighted_bipred_flag)
7180 {
7181 for (auto i = 0; i < (m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1); i++)
7182 {
7183 if ((LumaWeightFlag[LIST_1] & (1 << i)) && (i < CODEC_MAX_BACKWARD_WP_FRAME))
7184 {
7185 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_1][i];
7186 if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
7187 {
7188 MOS_SURFACE refFrameInput;
7189 uint8_t frameIndex = m_picIdx[refPic.FrameIdx].ucPicIdx;
7190 refFrameInput = m_hevcPicParams->bUseRawPicForRef ? m_refList[frameIndex]->sRefRawBuffer : m_refList[frameIndex]->sRefReconBuffer;
7191
7192 //Weighted Prediction for ith backward reference frame
7193 wpKernelParams.useRefPicList1 = true;
7194 wpKernelParams.wpIndex = i;
7195 wpKernelParams.refFrameInput = &refFrameInput;
7196 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_wpState->Execute(&wpKernelParams));
7197 }
7198 }
7199 }
7200 }
7201 }
7202
7203 // Reset to use a different performance tag ID
7204 m_osInterface->pfnResetPerfBufferID(m_osInterface);
7205
7206 m_lastTaskInPhase = true;
7207
7208 if (m_hevcPicParams->CodingType == I_TYPE)
7209 {
7210 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMbEncKernel(CODECHAL_MEDIA_STATE_HEVC_I_MBENC));
7211 }
7212 else
7213 {
7214 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMbEncKernel(m_isMaxLcu64 ? CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC : CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
7215 }
7216
7217 if (m_brcEnabled && m_enableFramePanicMode && (false == m_hevcSeqParams->DisableHRDConformance) &&
7218 m_skipFrameInfo.numSlices != m_numSlices) // 'numSlices != m_numSlices' check is to re-generate surface if slice layout changed from previous frame
7219 {
7220 CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateSkipFrameMbCodeSurface(m_skipFrameInfo));
7221 }
7222
7223 // Notify PAK engine once ENC is done
7224 if (!Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
7225 {
7226 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
7227 if (m_useMdf)
7228 {
7229 if (!m_computeContextEnabled)
7230 {
7231 syncParams.GpuContext = MOS_GPU_CONTEXT_RENDER3; //MDF uses render3
7232 }
7233 else
7234 {
7235 syncParams.GpuContext = MOS_GPU_CONTEXT_CM_COMPUTE;
7236 }
7237 }
7238 else
7239 {
7240 syncParams.GpuContext = m_renderContext;
7241 }
7242 syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
7243
7244 uint32_t old_stream_index = m_osInterface->streamIndex;
7245 m_osInterface->streamIndex = static_cast<CmQueueRT *>(m_cmQueue)->StreamIndex();
7246 CODECHAL_ENCODE_CHK_STATUS_RETURN(
7247 m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
7248 m_osInterface->streamIndex = old_stream_index;
7249 }
7250
7251 if (m_brcEnabled)
7252 {
7253 if (m_hevcSeqParams->ParallelBRC)
7254 {
7255 m_brcBuffers.uiCurrBrcPakStasIdxForRead =
7256 (m_brcBuffers.uiCurrBrcPakStasIdxForRead + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
7257 }
7258 }
7259
7260 CODECHAL_DEBUG_TOOL(
7261 uint8_t index;
7262 CODEC_PICTURE refPic;
7263 if (m_useWeightedSurfaceForL0) {
7264 refPic = m_hevcSliceParams->RefPicList[LIST_0][0];
7265 index = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
7266
7267 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
7268 &m_refList[index]->sRefBuffer,
7269 CodechalDbgAttr::attrReferenceSurfaces,
7270 "WP_In_L0")));
7271
7272 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
7273 m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L0_START + 0),
7274 CodechalDbgAttr::attrReferenceSurfaces,
7275 "WP_Out_L0")));
7276 } if (m_useWeightedSurfaceForL1) {
7277 refPic = m_hevcSliceParams->RefPicList[LIST_1][0];
7278 index = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
7279
7280 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
7281 &m_refList[index]->sRefBuffer,
7282 CodechalDbgAttr::attrReferenceSurfaces,
7283 "WP_In_L1")));
7284
7285 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
7286 m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L1_START + 0),
7287 CodechalDbgAttr::attrReferenceSurfaces,
7288 "WP_Out_L1")));
7289 })
7290
7291 m_lastPictureCodingType = m_pictureCodingType;
7292 m_lastRecycledBufIdx = m_currRecycledBufIdx;
7293
7294 return eStatus;
7295 }
7296
EncodeIntraDistKernel()7297 MOS_STATUS CodechalEncHevcStateG12::EncodeIntraDistKernel()
7298 {
7299 CodechalKernelIntraDist::CurbeParam curbeParam;
7300 curbeParam.downScaledWidthInMb4x = m_downscaledWidthInMb4x;
7301 curbeParam.downScaledHeightInMb4x = m_downscaledHeightInMb4x;
7302
7303 CodechalKernelIntraDist::SurfaceParams surfaceParam;
7304 surfaceParam.input4xDsSurface =
7305 surfaceParam.input4xDsVmeSurface = m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
7306 surfaceParam.intraDistSurface = m_brcDistortion;
7307 surfaceParam.intraDistBottomFieldOffset = m_brcBuffers.dwMeBrcDistortionBottomFieldOffset;
7308 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_intraDistKernel->Execute(curbeParam, surfaceParam));
7309
7310 return MOS_STATUS_SUCCESS;
7311 }
7312
InitKernelState()7313 MOS_STATUS CodechalEncHevcStateG12::InitKernelState()
7314 {
7315 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7316
7317 CODECHAL_ENCODE_FUNCTION_ENTER;
7318
7319 // Init kernel state
7320 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMbEnc());
7321 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateBrc());
7322
7323 // Create weighted prediction kernel state
7324 CODECHAL_ENCODE_CHK_NULL_RETURN(m_wpState = MOS_New(CodechalEncodeWPG12, this));
7325 m_wpState->SetKernelBase(m_kernelBase);
7326 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_wpState->InitKernelState());
7327 // create intra distortion kernel
7328 m_intraDistKernel = MOS_New(CodechalKernelIntraDist, this);
7329 CODECHAL_ENCODE_CHK_NULL_RETURN(m_intraDistKernel);
7330 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_intraDistKernel->Initialize(
7331 GetCommonKernelHeaderAndSizeG12,
7332 m_kernelBase,
7333 m_kuidCommon));
7334
7335 // Create SW scoreboard init kernel state
7336 CODECHAL_ENCODE_CHK_NULL_RETURN(m_swScoreboardState = MOS_New(CodechalEncodeSwScoreboardG12, this));
7337 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_swScoreboardState->InitKernelState());
7338 // Create Hme kernel
7339 m_hmeKernel = MOS_New(CodechalKernelHmeG12, this);
7340 CODECHAL_ENCODE_CHK_NULL_RETURN(m_hmeKernel);
7341 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Initialize(
7342 GetCommonKernelHeaderAndSizeG12,
7343 m_kernelBase,
7344 m_kuidCommon));
7345
7346 return eStatus;
7347 }
7348
SetDmemHuCPakIntegrate(PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)7349 MOS_STATUS CodechalEncHevcStateG12::SetDmemHuCPakIntegrate(
7350 PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)
7351 {
7352 CODECHAL_ENCODE_FUNCTION_ENTER;
7353
7354 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7355
7356 MOS_LOCK_PARAMS lockFlagsWriteOnly;
7357 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
7358 lockFlagsWriteOnly.WriteOnly = true;
7359
7360 int32_t currentPass = GetCurrentPass();
7361 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES || !m_brcEnabled)
7362 {
7363 eStatus = MOS_STATUS_INVALID_PARAMETER;
7364 return eStatus;
7365 }
7366
7367 HucPakStitchDmemEncG12 *hucPakStitchDmem = (HucPakStitchDmemEncG12 *)m_osInterface->pfnLockResource(
7368 m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]), &lockFlagsWriteOnly);
7369 CODECHAL_ENCODE_CHK_NULL_RETURN(hucPakStitchDmem);
7370
7371 MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakStitchDmemEncG12));
7372
7373 // reset all the offsets to -1
7374 uint32_t TotalOffsetSize = sizeof(hucPakStitchDmem->TileSizeRecord_offset) +
7375 sizeof(hucPakStitchDmem->VDENCSTAT_offset) +
7376 sizeof(hucPakStitchDmem->HEVC_PAKSTAT_offset) +
7377 sizeof(hucPakStitchDmem->HEVC_Streamout_offset) +
7378 sizeof(hucPakStitchDmem->VP9_PAK_STAT_offset) +
7379 sizeof(hucPakStitchDmem->Vp9CounterBuffer_offset);
7380 MOS_FillMemory(hucPakStitchDmem, TotalOffsetSize, 0xFF);
7381
7382 uint16_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
7383 uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
7384 CODECHAL_ENCODE_ASSERT(numTileColumns > 0 && numTileColumns % 2 == 0); //numTileColumns is nonzero and even number; 2 or 4
7385 CODECHAL_ENCODE_ASSERT(m_numPipe > 0 && m_numPipe % 2 == 0 && numTileColumns <= m_numPipe); //ucNumPipe is nonzero and even number; 2 or 4
7386 uint16_t numTiles = numTileRows * numTileColumns;
7387 uint16_t numTilesPerPipe = m_numTiles / m_numPipe;
7388
7389 hucPakStitchDmem->PicWidthInPixel = (uint16_t)m_frameWidth;
7390 hucPakStitchDmem->PicHeightInPixel = (uint16_t)m_frameHeight;
7391 hucPakStitchDmem->TotalNumberOfPAKs = m_numPipe;
7392 hucPakStitchDmem->Codec = 1; // 1: HEVC DP; 2: HEVC VDEnc; 3: VP9 VDEnc
7393 hucPakStitchDmem->MAXPass = m_brcEnabled ? (m_numPassesInOnePipe + 1) : 1;
7394 hucPakStitchDmem->CurrentPass = (uint8_t)currentPass + 1; // // Current BRC pass [1..MAXPass]
7395 hucPakStitchDmem->MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
7396 hucPakStitchDmem->CabacZeroWordFlag = true; // to do: set to true later
7397 hucPakStitchDmem->bitdepth_luma = m_hevcSeqParams->bit_depth_luma_minus8 + 8; // default: 8
7398 hucPakStitchDmem->bitdepth_chroma = m_hevcSeqParams->bit_depth_chroma_minus8 + 8; // default: 8
7399 hucPakStitchDmem->ChromaFormatIdc = m_hevcSeqParams->chroma_format_idc;
7400 hucPakStitchDmem->TotalSizeInCommandBuffer = m_numTiles * CODECHAL_CACHELINE_SIZE;
7401 // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record
7402 hucPakStitchDmem->OffsetInCommandBuffer = m_tileParams[m_numTiles - 1].TileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8;
7403 hucPakStitchDmem->LastTileBS_StartInBytes = m_tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
7404
7405 hucPakStitchDmem->StitchEnable = false;
7406 hucPakStitchDmem->StitchCommandOffset = 0;
7407 hucPakStitchDmem->BBEndforStitch = HUC_BATCH_BUFFER_END;
7408 hucPakStitchDmem->brcUnderFlowEnable = false; //temporally disable underflow bit rate control in HUC fw since it need more tuning.
7409
7410 PCODEC_ENCODER_SLCDATA slcData = m_slcData;
7411 CODECHAL_ENCODE_CHK_NULL_RETURN(slcData);
7412 uint32_t totalSliceHeaderSize = 0;
7413 for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
7414 {
7415 totalSliceHeaderSize += (slcData->BitSize + 7) >> 3;
7416 slcData++;
7417 }
7418 hucPakStitchDmem->SliceHeaderSizeinBits = totalSliceHeaderSize * 8;
7419 hucPakStitchDmem->currFrameBRClevel = m_currFrameBrcLevel;
7420
7421 //Set the kernel output offsets
7422 hucPakStitchDmem->TileSizeRecord_offset[0] = m_hevcFrameStatsOffset.uiTileSizeRecord;
7423 hucPakStitchDmem->HEVC_PAKSTAT_offset[0] = m_hevcFrameStatsOffset.uiHevcPakStatistics;
7424 hucPakStitchDmem->HEVC_Streamout_offset[0] = 0xFFFFFFFF;
7425 hucPakStitchDmem->VDENCSTAT_offset[0] = 0xFFFFFFFF;
7426
7427 for (auto i = 0; i < m_numPipe; i++)
7428 {
7429 hucPakStitchDmem->NumTiles[i] = numTilesPerPipe;
7430
7431 // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic.
7432 // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region.
7433 hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiTileSizeRecord) +
7434 m_hevcTileStatsOffset.uiTileSizeRecord;
7435 hucPakStitchDmem->HEVC_PAKSTAT_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiHevcPakStatistics) +
7436 m_hevcTileStatsOffset.uiHevcPakStatistics;
7437 }
7438
7439 m_osInterface->pfnUnlockResource(m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]));
7440
7441 MOS_ZeroMemory(dmemParams, sizeof(MHW_VDBOX_HUC_DMEM_STATE_PARAMS));
7442 dmemParams->presHucDataSource = &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]);
7443 dmemParams->dwDataLength = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemEncG12), CODECHAL_CACHELINE_SIZE);
7444 dmemParams->dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
7445
7446 return eStatus;
7447 }
7448
SetRegionsHuCPakIntegrate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)7449 MOS_STATUS CodechalEncHevcStateG12::SetRegionsHuCPakIntegrate(
7450 PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
7451 {
7452 CODECHAL_ENCODE_FUNCTION_ENTER;
7453
7454 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7455
7456 int32_t currentPass = GetCurrentPass();
7457 if (currentPass < 0 ||
7458 (currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES && m_brcEnabled) ||
7459 (currentPass != 0 && m_cqpEnabled))
7460 {
7461 eStatus = MOS_STATUS_INVALID_PARAMETER;
7462 return eStatus;
7463 }
7464
7465 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
7466 MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
7467 // Add Virtual addr
7468 virtualAddrParams->regionParams[0].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; // Region 0 - Tile based input statistics from PAK/ VDEnc
7469 virtualAddrParams->regionParams[0].dwOffset = 0;
7470 virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 1 - HuC Frame statistics output
7471 virtualAddrParams->regionParams[1].isWritable = true;
7472 virtualAddrParams->regionParams[4].presRegion = &m_resBitstreamBuffer; // Region 4 - Last Tile bitstream
7473 virtualAddrParams->regionParams[5].presRegion = &m_resBitstreamBuffer; // Region 5 - HuC modifies the last tile bitstream before stitch command
7474 virtualAddrParams->regionParams[5].isWritable = true;
7475 virtualAddrParams->regionParams[6].presRegion = &m_brcBuffers.resBrcHistoryBuffer; // Region 6 History Buffer (Input/Output)
7476 virtualAddrParams->regionParams[6].isWritable = true;
7477 virtualAddrParams->regionParams[7].presRegion = &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx]; //&m_resHucPakStitchReadBatchBuffer; // Region 7 - HCP PIC state command
7478 virtualAddrParams->regionParams[9].presRegion = &m_resBrcDataBuffer; // Region 9 HuC outputs BRC data
7479 virtualAddrParams->regionParams[9].isWritable = true;
7480 virtualAddrParams->regionParams[8].presRegion = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]; // Region 8 - data buffer read by HUC for stitching cmd generation
7481 virtualAddrParams->regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource; // Region 10 - SLB for stitching cmd output from Huc
7482 virtualAddrParams->regionParams[10].isWritable = true;
7483 virtualAddrParams->regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource; // Region 15 [In/Out] - Tile Record Buffer
7484 virtualAddrParams->regionParams[15].dwOffset = 0;
7485
7486 return eStatus;
7487 }
7488
SetDmemHuCPakIntegrateCqp(PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)7489 MOS_STATUS CodechalEncHevcStateG12::SetDmemHuCPakIntegrateCqp(
7490 PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)
7491 {
7492 CODECHAL_ENCODE_FUNCTION_ENTER;
7493
7494 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7495
7496 MOS_LOCK_PARAMS lockFlagsWriteOnly;
7497 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
7498 lockFlagsWriteOnly.WriteOnly = true;
7499
7500 int32_t currentPass = GetCurrentPass();
7501 if (currentPass != 0 || (!m_cqpEnabled && m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ))
7502 {
7503 eStatus = MOS_STATUS_INVALID_PARAMETER;
7504 return eStatus;
7505 }
7506
7507 HucPakStitchDmemEncG12 *hucPakStitchDmem = (HucPakStitchDmemEncG12 *)m_osInterface->pfnLockResource(
7508 m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]), &lockFlagsWriteOnly);
7509 CODECHAL_ENCODE_CHK_NULL_RETURN(hucPakStitchDmem);
7510
7511 MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakStitchDmemEncG12));
7512
7513 // reset all the offsets to -1
7514 uint32_t TotalOffsetSize = sizeof(hucPakStitchDmem->TileSizeRecord_offset) +
7515 sizeof(hucPakStitchDmem->VDENCSTAT_offset) +
7516 sizeof(hucPakStitchDmem->HEVC_PAKSTAT_offset) +
7517 sizeof(hucPakStitchDmem->HEVC_Streamout_offset) +
7518 sizeof(hucPakStitchDmem->VP9_PAK_STAT_offset) +
7519 sizeof(hucPakStitchDmem->Vp9CounterBuffer_offset);
7520 MOS_FillMemory(hucPakStitchDmem, TotalOffsetSize, 0xFF);
7521
7522 uint16_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
7523 uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
7524 CODECHAL_ENCODE_ASSERT(numTileColumns > 0 && numTileColumns % 2 == 0); //numTileColumns is nonzero and even number; 2 or 4
7525 CODECHAL_ENCODE_ASSERT(m_numPipe > 0 && m_numPipe % 2 == 0 && numTileColumns <= m_numPipe); //ucNumPipe is nonzero and even number; 2 or 4
7526 uint16_t numTiles = numTileRows * numTileColumns;
7527 uint16_t numTilesPerPipe = m_numTiles / m_numPipe;
7528
7529 hucPakStitchDmem->PicWidthInPixel = (uint16_t)m_frameWidth;
7530 hucPakStitchDmem->PicHeightInPixel = (uint16_t)m_frameHeight;
7531 hucPakStitchDmem->TotalNumberOfPAKs = m_numPipe;
7532 hucPakStitchDmem->Codec = 2; //HEVC DP CQP
7533 hucPakStitchDmem->MAXPass = 1;
7534 hucPakStitchDmem->CurrentPass = 1;
7535 hucPakStitchDmem->MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
7536 hucPakStitchDmem->CabacZeroWordFlag = true;
7537 hucPakStitchDmem->bitdepth_luma = m_hevcSeqParams->bit_depth_luma_minus8 + 8; // default: 8
7538 hucPakStitchDmem->bitdepth_chroma = m_hevcSeqParams->bit_depth_chroma_minus8 + 8; // default: 8
7539 hucPakStitchDmem->ChromaFormatIdc = m_hevcSeqParams->chroma_format_idc;
7540 hucPakStitchDmem->TotalSizeInCommandBuffer = m_numTiles * CODECHAL_CACHELINE_SIZE;
7541 // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record
7542 hucPakStitchDmem->OffsetInCommandBuffer = m_tileParams[m_numTiles - 1].TileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8;
7543 hucPakStitchDmem->LastTileBS_StartInBytes = m_tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
7544
7545 hucPakStitchDmem->StitchEnable = false;
7546 hucPakStitchDmem->StitchCommandOffset = 0;
7547 hucPakStitchDmem->BBEndforStitch = HUC_BATCH_BUFFER_END;
7548
7549 //Set the kernel output offsets
7550 hucPakStitchDmem->TileSizeRecord_offset[0] = m_hevcFrameStatsOffset.uiTileSizeRecord;
7551 hucPakStitchDmem->HEVC_PAKSTAT_offset[0] = 0xFFFFFFFF;
7552 hucPakStitchDmem->HEVC_Streamout_offset[0] = 0xFFFFFFFF;
7553 hucPakStitchDmem->VDENCSTAT_offset[0] = 0xFFFFFFFF;
7554
7555 for (auto i = 0; i < m_numPipe; i++)
7556 {
7557 hucPakStitchDmem->NumTiles[i] = numTilesPerPipe;
7558
7559 // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic.
7560 // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region.
7561 hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiTileSizeRecord) +
7562 m_hevcTileStatsOffset.uiTileSizeRecord;
7563 }
7564
7565 m_osInterface->pfnUnlockResource(m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]));
7566
7567 MOS_ZeroMemory(dmemParams, sizeof(MHW_VDBOX_HUC_DMEM_STATE_PARAMS));
7568 dmemParams->presHucDataSource = &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]);
7569 dmemParams->dwDataLength = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemEncG12), CODECHAL_CACHELINE_SIZE);
7570 dmemParams->dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
7571
7572 return eStatus;
7573 }
7574
ConfigStitchDataBuffer()7575 MOS_STATUS CodechalEncHevcStateG12::ConfigStitchDataBuffer()
7576 {
7577 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7578 CODECHAL_ENCODE_FUNCTION_ENTER;
7579 int32_t currentPass = GetCurrentPass();
7580 if (currentPass < 0 ||
7581 (currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES && m_brcEnabled) ||
7582 (currentPass != 0 && m_cqpEnabled))
7583 {
7584 eStatus = MOS_STATUS_INVALID_PARAMETER;
7585 return eStatus;
7586 }
7587
7588 MOS_LOCK_PARAMS lockFlagsWriteOnly;
7589 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
7590 lockFlagsWriteOnly.WriteOnly = 1;
7591
7592 HucCommandData *hucStitchDataBuf = (HucCommandData *)m_osInterface->pfnLockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass], &lockFlagsWriteOnly);
7593 CODECHAL_ENCODE_CHK_NULL_RETURN(hucStitchDataBuf);
7594
7595 MOS_ZeroMemory(hucStitchDataBuf, sizeof(HucCommandData));
7596 hucStitchDataBuf->TotalCommands = 1;
7597 hucStitchDataBuf->InputCOM[0].SizeOfData = 0xF;
7598
7599 HucInputCmdG12 hucInputCmd;
7600 MOS_ZeroMemory(&hucInputCmd, sizeof(HucInputCmdG12));
7601
7602 CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
7603 hucInputCmd.SelectionForIndData = m_osInterface->osCpInterface->IsCpEnabled() ? 4 : 0;
7604 hucInputCmd.CmdMode = HUC_CMD_LIST_MODE;
7605 hucInputCmd.LengthOfTable = (uint8_t)(m_numTiles);
7606 hucInputCmd.CopySize = m_hwInterface->m_tileRecordSize;
7607 ;
7608
7609 PMOS_RESOURCE presSrc = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource;
7610
7611 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
7612 m_osInterface,
7613 presSrc,
7614 false,
7615 false));
7616 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
7617 m_osInterface,
7618 &m_resBitstreamBuffer,
7619 true,
7620 true));
7621
7622 uint64_t srcAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, presSrc);
7623 uint64_t destAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, &m_resBitstreamBuffer);
7624 hucInputCmd.SrcAddrBottom = (uint32_t)(srcAddr & 0x00000000FFFFFFFF);
7625 hucInputCmd.SrcAddrTop = (uint32_t)((srcAddr & 0xFFFFFFFF00000000) >> 32);
7626
7627 hucInputCmd.DestAddrBottom = (uint32_t)(destAddr & 0x00000000FFFFFFFF);
7628 hucInputCmd.DestAddrTop = (uint32_t)((destAddr & 0xFFFFFFFF00000000) >> 32);
7629
7630 MOS_SecureMemcpy(hucStitchDataBuf->InputCOM[0].data, sizeof(HucInputCmdG12), &hucInputCmd, sizeof(HucInputCmdG12));
7631
7632 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]);
7633
7634 return eStatus;
7635 }
7636
SetRegionsHuCPakIntegrateCqp(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)7637 MOS_STATUS CodechalEncHevcStateG12::SetRegionsHuCPakIntegrateCqp(
7638 PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
7639 {
7640 CODECHAL_ENCODE_FUNCTION_ENTER;
7641
7642 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7643
7644 int32_t currentPass = GetCurrentPass();
7645 if (currentPass < 0 ||
7646 (m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ && m_brcEnabled) ||
7647 (currentPass != 0 && m_cqpEnabled))
7648 {
7649 eStatus = MOS_STATUS_INVALID_PARAMETER;
7650 return eStatus;
7651 }
7652 MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
7653
7654 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
7655
7656 // Add Virtual addr
7657 virtualAddrParams->regionParams[0].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; // Region 0 - Tile based input statistics from PAK/ VDEnc
7658 virtualAddrParams->regionParams[0].dwOffset = 0;
7659 virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 1 - HuC Frame statistics output
7660 virtualAddrParams->regionParams[1].isWritable = true;
7661 virtualAddrParams->regionParams[4].presRegion = &m_resBitstreamBuffer; // Region 4 - Last Tile bitstream
7662 virtualAddrParams->regionParams[5].presRegion = &m_resBitstreamBuffer; // Region 5 - HuC modifies the last tile bitstream before stitch command
7663 virtualAddrParams->regionParams[5].isWritable = true;
7664 virtualAddrParams->regionParams[6].presRegion = &m_brcBuffers.resBrcHistoryBuffer; // Region 6 History Buffer (Input/Output)
7665 virtualAddrParams->regionParams[6].isWritable = true;
7666 virtualAddrParams->regionParams[7].presRegion = &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx]; //&m_resHucPakStitchReadBatchBuffer; // Region 7 - HCP PIC state command
7667
7668 virtualAddrParams->regionParams[9].presRegion = &m_resBrcDataBuffer; // Region 9 HuC outputs BRC data
7669 virtualAddrParams->regionParams[9].isWritable = true;
7670 virtualAddrParams->regionParams[8].presRegion = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]; // Region 8 - data buffer read by HUC for stitching cmd generation
7671 virtualAddrParams->regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource; // Region 10 - SLB for stitching cmd output from Huc
7672 virtualAddrParams->regionParams[10].isWritable = true;
7673 virtualAddrParams->regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource; // Region 15 [In/Out] - Tile Record Buffer
7674 virtualAddrParams->regionParams[15].dwOffset = 0;
7675
7676 return eStatus;
7677 }
7678
7679 #if (_DEBUG || _RELEASE_INTERNAL)
ResetImgCtrlRegInPAKStatisticsBuffer(PMOS_COMMAND_BUFFER cmdBuffer)7680 MOS_STATUS CodechalEncHevcStateG12::ResetImgCtrlRegInPAKStatisticsBuffer(
7681 PMOS_COMMAND_BUFFER cmdBuffer)
7682 {
7683 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7684
7685 CODECHAL_ENCODE_FUNCTION_ENTER;
7686
7687 MHW_MI_STORE_DATA_PARAMS storeDataParams;
7688 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
7689 storeDataParams.pOsResource = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
7690 storeDataParams.dwResourceOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL);
7691 storeDataParams.dwValue = 0;
7692 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
7693 cmdBuffer,
7694 &storeDataParams));
7695
7696 return eStatus;
7697 }
7698 #endif
7699
ReadBrcPakStatisticsForScalability(PMOS_COMMAND_BUFFER cmdBuffer)7700 MOS_STATUS CodechalEncHevcStateG12::ReadBrcPakStatisticsForScalability(
7701 PMOS_COMMAND_BUFFER cmdBuffer)
7702 {
7703 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7704
7705 CODECHAL_ENCODE_FUNCTION_ENTER;
7706
7707 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
7708
7709 MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
7710 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
7711 miCpyMemMemParams.presSrc = &m_resBrcDataBuffer;
7712 miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, FrameByteCount);
7713 miCpyMemMemParams.presDst = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
7714 miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_BITSTREAM_BYTECOUNT_FRAME);
7715 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
7716
7717 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
7718 miCpyMemMemParams.presSrc = &m_resBrcDataBuffer;
7719 miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, FrameByteCountNoHeader);
7720 miCpyMemMemParams.presDst = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
7721 miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_BITSTREAM_BYTECOUNT_FRAME_NOHEADER);
7722 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
7723
7724 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
7725 miCpyMemMemParams.presSrc = &m_resBrcDataBuffer;
7726 miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, HCP_ImageStatusControl);
7727 miCpyMemMemParams.presDst = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
7728 miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL);
7729 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
7730
7731 uint32_t dwOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
7732 m_encodeStatusBuf.dwNumPassesOffset + // Num passes offset
7733 sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource
7734
7735 MHW_MI_STORE_DATA_PARAMS storeDataParams;
7736 storeDataParams.pOsResource = &m_encodeStatusBuf.resStatusBuffer;
7737 storeDataParams.dwResourceOffset = dwOffset;
7738 storeDataParams.dwValue = (uint8_t)GetCurrentPass();
7739 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
7740
7741 return eStatus;
7742 }
7743
DumpHucDebugOutputBuffers()7744 MOS_STATUS CodechalEncHevcStateG12::DumpHucDebugOutputBuffers()
7745 {
7746 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7747
7748 //only dump HuC in/out buffers in brc scalability case
7749 bool dumpDebugBuffers = IsLastPipe() && (m_numPipe >= 2) && m_brcEnabled;
7750 if (m_singleTaskPhaseSupported)
7751 {
7752 dumpDebugBuffers = dumpDebugBuffers && IsLastPass();
7753 }
7754
7755 if (dumpDebugBuffers)
7756 {
7757 CODECHAL_DEBUG_TOOL(
7758 int32_t currentPass = GetCurrentPass();
7759 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
7760 &m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass],
7761 sizeof(HucPakStitchDmemEncG12),
7762 currentPass,
7763 hucRegionDumpPakIntegrate));
7764
7765 // Region 7 - HEVC PIC State Command
7766 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
7767 &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
7768 0,
7769 m_hwInterface->m_vdenc2ndLevelBatchBufferSize,
7770 7,
7771 "_PicState",
7772 true,
7773 currentPass,
7774 hucRegionDumpPakIntegrate));
7775
7776 // Region 5 - Last Tile PAK Bitstream Output
7777 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
7778 &m_resBitstreamBuffer,
7779 0,
7780 m_encodeParams.dwBitstreamSize,
7781 5,
7782 "_Bitstream",
7783 false,
7784 currentPass,
7785 hucRegionDumpPakIntegrate));
7786
7787 // Region 6 - BRC History buffer
7788 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
7789 &m_brcBuffers.resBrcHistoryBuffer,
7790 0,
7791 m_brcHistoryBufferSize,
7792 6,
7793 "_HistoryBuffer",
7794 false,
7795 currentPass,
7796 hucRegionDumpPakIntegrate));
7797 // Region 9 - HCP BRC Data Output
7798 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
7799 &m_resBrcDataBuffer,
7800 0,
7801 CODECHAL_CACHELINE_SIZE,
7802 9,
7803 "_HcpBrcData",
7804 false,
7805 currentPass,
7806 hucRegionDumpPakIntegrate));
7807 // Region 1 - Output Aggregated Frame Level Statistics
7808 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
7809 &m_resHuCPakAggregatedFrameStatsBuffer.sResource,
7810 0,
7811 m_hwInterface->m_pakIntAggregatedFrameStatsSize, // program exact out size
7812 1,
7813 "_AggregateFrameStats",
7814 false,
7815 currentPass,
7816 hucRegionDumpPakIntegrate));
7817 // Region 0 - Tile Statistics Constant Buffer
7818 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
7819 &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
7820 0,
7821 m_hwInterface->m_pakIntTileStatsSize,
7822 0,
7823 "_TileBasedStats",
7824 true,
7825 currentPass,
7826 hucRegionDumpPakIntegrate));
7827 // Region 15 - Tile Record Buffer
7828 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
7829 &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource,
7830 0,
7831 m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize,
7832 15,
7833 "_TileRecord",
7834 false,
7835 currentPass,
7836 hucRegionDumpPakIntegrate));)
7837 }
7838
7839 return eStatus;
7840 }
7841
CodechalEncHevcStateG12(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)7842 CodechalEncHevcStateG12::CodechalEncHevcStateG12(
7843 CodechalHwInterface * hwInterface,
7844 CodechalDebugInterface *debugInterface,
7845 PCODECHAL_STANDARD_INFO standardInfo)
7846 : CodechalEncHevcState(hwInterface, debugInterface, standardInfo)
7847 {
7848 m_2xMeSupported =
7849 m_useCommonKernel = true;
7850 m_useHwScoreboard = false;
7851 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
7852 m_kernelBase = (uint8_t *)IGCODECKRN_G12;
7853 #else
7854 m_kernelBase = nullptr;
7855 #endif
7856 m_kuidCommon = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
7857 m_hucPakStitchEnabled = true;
7858 m_scalabilityState = nullptr;
7859
7860 MOS_ZeroMemory(&m_currPicWithReconBoundaryPix, sizeof(m_currPicWithReconBoundaryPix));
7861 MOS_ZeroMemory(&m_lcuLevelInputDataSurface, sizeof(m_lcuLevelInputDataSurface));
7862 MOS_ZeroMemory(&m_encoderHistoryInputBuffer, sizeof(m_encoderHistoryInputBuffer));
7863 MOS_ZeroMemory(&m_encoderHistoryOutputBuffer, sizeof(m_encoderHistoryOutputBuffer));
7864 MOS_ZeroMemory(&m_intermediateCuRecordSurfaceLcu32, sizeof(m_intermediateCuRecordSurfaceLcu32));
7865 MOS_ZeroMemory(&m_scratchSurface, sizeof(m_scratchSurface));
7866 MOS_ZeroMemory(&m_16x16QpInputData, sizeof(m_16x16QpInputData));
7867 MOS_ZeroMemory(m_debugSurface, sizeof(m_debugSurface));
7868 MOS_ZeroMemory(&m_encConstantTableForB, sizeof(m_encConstantTableForB));
7869 MOS_ZeroMemory(&m_mvAndDistortionSumSurface, sizeof(m_mvAndDistortionSumSurface));
7870 MOS_ZeroMemory(m_encBCombinedBuffer1, sizeof(m_encBCombinedBuffer1));
7871 MOS_ZeroMemory(m_encBCombinedBuffer2, sizeof(m_encBCombinedBuffer2));
7872
7873 MOS_ZeroMemory(&m_resPakcuLevelStreamoutData, sizeof(m_resPakcuLevelStreamoutData));
7874 MOS_ZeroMemory(&m_resPakSliceLevelStreamoutData, sizeof(m_resPakSliceLevelStreamoutData));
7875 MOS_ZeroMemory(m_resTileBasedStatisticsBuffer, sizeof(m_resTileBasedStatisticsBuffer));
7876 MOS_ZeroMemory(&m_resHuCPakAggregatedFrameStatsBuffer, sizeof(m_resHuCPakAggregatedFrameStatsBuffer));
7877 MOS_ZeroMemory(m_tileRecordBuffer, sizeof(m_tileRecordBuffer));
7878 MOS_ZeroMemory(&m_kmdVeOveride, sizeof(m_kmdVeOveride));
7879 MOS_ZeroMemory(&m_resHcpScalabilitySyncBuffer, sizeof(m_resHcpScalabilitySyncBuffer));
7880
7881 MOS_ZeroMemory(m_veBatchBuffer, sizeof(m_veBatchBuffer));
7882 MOS_ZeroMemory(&m_realCmdBuffer, sizeof(m_realCmdBuffer));
7883 MOS_ZeroMemory(&m_resBrcSemaphoreMem, sizeof(m_resBrcSemaphoreMem));
7884 MOS_ZeroMemory(&m_resBrcPakSemaphoreMem, sizeof(m_resBrcPakSemaphoreMem));
7885 MOS_ZeroMemory(&m_resPipeStartSemaMem, sizeof(m_resPipeStartSemaMem));
7886 MOS_ZeroMemory(&m_resPipeCompleteSemaMem, sizeof(m_resPipeCompleteSemaMem));
7887 MOS_ZeroMemory(m_resHucPakStitchDmemBuffer, sizeof(m_resHucPakStitchDmemBuffer));
7888 MOS_ZeroMemory(&m_resBrcDataBuffer, sizeof(m_resBrcDataBuffer));
7889 MOS_ZeroMemory(&m_skipFrameInfo.m_resMbCodeSkipFrameSurface, sizeof(m_skipFrameInfo.m_resMbCodeSkipFrameSurface));
7890
7891 CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_osInterface);
7892 m_hwInterface->GetStateHeapSettings()->dwNumSyncTags = CODECHAL_ENCODE_HEVC_NUM_SYNC_TAGS;
7893 m_hwInterface->GetStateHeapSettings()->dwDshSize = CODECHAL_INIT_DSH_SIZE_HEVC_ENC;
7894
7895 m_kuid = IDR_CODEC_HEVC_COMBINED_KENREL_INTEL;
7896 MOS_STATUS eStatus = CodecHalGetKernelBinaryAndSize(
7897 m_kernelBase,
7898 m_kuid,
7899 &m_kernelBinary,
7900 &m_combinedKernelSize);
7901 CODECHAL_ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
7902
7903 m_hwInterface->GetStateHeapSettings()->dwIshSize +=
7904 MOS_ALIGN_CEIL(m_combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
7905
7906 m_osInterface->pfnVirtualEngineSupported(m_osInterface, false, true);
7907
7908 Mos_SetVirtualEngineSupported(m_osInterface, true);
7909 }
7910
~CodechalEncHevcStateG12()7911 CodechalEncHevcStateG12::~CodechalEncHevcStateG12()
7912 {
7913 CODECHAL_ENCODE_FUNCTION_ENTER;
7914
7915 if (m_wpState)
7916 {
7917 MOS_Delete(m_wpState);
7918 m_wpState = nullptr;
7919 }
7920
7921 if (m_intraDistKernel)
7922 {
7923 MOS_Delete(m_intraDistKernel);
7924 m_intraDistKernel = nullptr;
7925 }
7926
7927 if (m_swScoreboardState)
7928 {
7929 MOS_Delete(m_swScoreboardState);
7930 m_swScoreboardState = nullptr;
7931 }
7932
7933 if (m_scalabilityState)
7934 {
7935 MOS_FreeMemAndSetNull(m_scalabilityState);
7936 }
7937
7938 #if (_DEBUG || _RELEASE_INTERNAL)
7939 if (m_statusReportDebugInterface != nullptr)
7940 {
7941 MOS_Delete(m_statusReportDebugInterface);
7942 m_statusReportDebugInterface = nullptr;
7943 }
7944 #endif
7945 }
7946
Allocate(CodechalSetting * codecHalSettings)7947 MOS_STATUS CodechalEncHevcStateG12::Allocate(CodechalSetting *codecHalSettings)
7948 {
7949 #if (_DEBUG || _RELEASE_INTERNAL)
7950 if (!m_statusReportDebugInterface)
7951 {
7952 m_statusReportDebugInterface = MOS_New(CodechalDebugInterface);
7953 CODECHAL_ENCODE_CHK_NULL_RETURN(m_statusReportDebugInterface);
7954 CODECHAL_ENCODE_CHK_STATUS_RETURN(
7955 m_statusReportDebugInterface->Initialize(m_hwInterface, codecHalSettings->codecFunction));
7956 }
7957 #endif
7958
7959 return CodechalEncoderState::Allocate(codecHalSettings);
7960 }
7961
CodecHalHevc_GetFileSize(char * fileName)7962 uint32_t CodechalEncHevcStateG12::CodecHalHevc_GetFileSize(char *fileName)
7963 {
7964 FILE * fp = nullptr;
7965 uint32_t fileSize = 0;
7966 MosUtilities::MosSecureFileOpen(&fp, fileName, "rb");
7967 if (fp == nullptr)
7968 {
7969 return 0;
7970 }
7971 fseek(fp, 0, SEEK_END);
7972 fileSize = ftell(fp);
7973 fseek(fp, 0, SEEK_SET);
7974 fclose(fp);
7975
7976 return fileSize;
7977 }
7978
LoadSourceAndRef2xDSFromFile(PMOS_SURFACE pRef2xSurface,PMOS_SURFACE pSrc2xSurface,uint8_t reflist,uint8_t refIdx)7979 MOS_STATUS CodechalEncHevcStateG12::LoadSourceAndRef2xDSFromFile(
7980 PMOS_SURFACE pRef2xSurface,
7981 PMOS_SURFACE pSrc2xSurface,
7982 uint8_t reflist,
7983 uint8_t refIdx)
7984 {
7985 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7986
7987 CODECHAL_ENCODE_FUNCTION_ENTER;
7988
7989 if (m_loadKernelInput == false || (pSrc2xSurface && Mos_ResourceIsNull(&pSrc2xSurface->OsResource)) ||
7990 (pRef2xSurface && Mos_ResourceIsNull(&pRef2xSurface->OsResource)) ||
7991 (pSrc2xSurface == NULL && pRef2xSurface == NULL))
7992 {
7993 return eStatus;
7994 }
7995
7996 char pathOfRef2xDSCmd[MOS_USER_CONTROL_MAX_DATA_SIZE];
7997 MOS_SecureStringPrint(pathOfRef2xDSCmd,
7998 sizeof(pathOfRef2xDSCmd),
7999 sizeof(pathOfRef2xDSCmd),
8000 "%s\\Ref2xDSL%1d%1d.dat.%d",
8001 m_loadKernelInputDataFolder,
8002 reflist,
8003 refIdx,
8004 m_frameNum);
8005 char pathOfSrc2xDSCmd[MOS_USER_CONTROL_MAX_DATA_SIZE];
8006 MOS_SecureStringPrint(pathOfSrc2xDSCmd,
8007 sizeof(pathOfSrc2xDSCmd),
8008 sizeof(pathOfSrc2xDSCmd),
8009 "%s\\Src2xDS.dat.%d",
8010 m_loadKernelInputDataFolder,
8011 m_frameNum);
8012
8013 uint32_t sizeRef2xDS = CodecHalHevc_GetFileSize(pathOfRef2xDSCmd);
8014 uint32_t sizeSrc2xDS = CodecHalHevc_GetFileSize(pathOfSrc2xDSCmd);
8015 if (sizeRef2xDS == 0 && sizeSrc2xDS == 0)
8016 return MOS_STATUS_SUCCESS;
8017 MOS_LOCK_PARAMS lockFlags;
8018
8019 if (pRef2xSurface && sizeRef2xDS)
8020 {
8021 if (sizeRef2xDS > (pRef2xSurface->dwPitch * pRef2xSurface->dwHeight * 3 / 2))
8022 {
8023 return MOS_STATUS_INVALID_FILE_SIZE;
8024 }
8025 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
8026 lockFlags.WriteOnly = 1;
8027 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
8028 m_osInterface, &pRef2xSurface->OsResource, &lockFlags);
8029 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8030
8031 FILE *Ref2xDS = nullptr;
8032 eStatus = MosUtilities::MosSecureFileOpen(&Ref2xDS, pathOfRef2xDSCmd, "rb");
8033 if (Ref2xDS == nullptr)
8034 {
8035 m_osInterface->pfnUnlockResource(m_osInterface, &pRef2xSurface->OsResource);
8036 return eStatus;
8037 }
8038
8039 uint32_t sizeToRead = sizeRef2xDS * 2 / 3;
8040 if (sizeToRead != fread((void *)data, 1, sizeToRead, Ref2xDS))
8041 {
8042 fclose(Ref2xDS);
8043 m_osInterface->pfnUnlockResource(m_osInterface, &pRef2xSurface->OsResource);
8044 return MOS_STATUS_INVALID_FILE_SIZE;
8045 }
8046 fclose(Ref2xDS);
8047 //MOS_ZeroMemory(data + sizeToRead, sizeRef2xDS-sizeToRead);
8048
8049 m_osInterface->pfnUnlockResource(m_osInterface, &pRef2xSurface->OsResource);
8050 }
8051
8052 if (pSrc2xSurface && sizeSrc2xDS)
8053 {
8054 if (sizeSrc2xDS > (pSrc2xSurface->dwPitch * pSrc2xSurface->dwHeight * 3 / 2))
8055 {
8056 return MOS_STATUS_INVALID_FILE_SIZE;
8057 }
8058
8059 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
8060 lockFlags.WriteOnly = 1;
8061 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
8062 m_osInterface, &pSrc2xSurface->OsResource, &lockFlags);
8063 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8064
8065 FILE *Src2xDS = nullptr;
8066 eStatus = MosUtilities::MosSecureFileOpen(&Src2xDS, pathOfSrc2xDSCmd, "rb");
8067 if (Src2xDS == nullptr)
8068 {
8069 m_osInterface->pfnUnlockResource(m_osInterface, &pSrc2xSurface->OsResource);
8070 return eStatus;
8071 }
8072
8073 uint32_t sizeToRead = sizeSrc2xDS * 2 / 3;
8074 if (sizeToRead != fread((void *)data, 1, sizeToRead, Src2xDS))
8075 {
8076 fclose(Src2xDS);
8077 m_osInterface->pfnUnlockResource(m_osInterface, &pSrc2xSurface->OsResource);
8078 return MOS_STATUS_INVALID_FILE_SIZE;
8079 }
8080 fclose(Src2xDS);
8081 //MOS_ZeroMemory(data + sizeToRead, sizeRef2xDS-sizeToRead);
8082
8083 m_osInterface->pfnUnlockResource(m_osInterface, &pSrc2xSurface->OsResource);
8084 }
8085
8086 return eStatus;
8087 }
8088
LoadPakCommandAndCuRecordFromFile()8089 MOS_STATUS CodechalEncHevcStateG12::LoadPakCommandAndCuRecordFromFile()
8090 {
8091 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8092
8093 CODECHAL_ENCODE_FUNCTION_ENTER;
8094
8095 char pathOfPakCmd[MOS_USER_CONTROL_MAX_DATA_SIZE];
8096 MOS_SecureStringPrint(pathOfPakCmd,
8097 sizeof(pathOfPakCmd),
8098 sizeof(pathOfPakCmd),
8099 "%s\\PAKObj.dat.%d",
8100 m_pakOnlyDataFolder,
8101 m_frameNum);
8102
8103 char pathOfCuRecord[MOS_USER_CONTROL_MAX_DATA_SIZE];
8104 MOS_SecureStringPrint(pathOfCuRecord,
8105 sizeof(pathOfCuRecord),
8106 sizeof(pathOfCuRecord),
8107 "%s\\CURecord.dat.%d",
8108 m_pakOnlyDataFolder,
8109 m_frameNum);
8110
8111 uint32_t sizePakObj = CodecHalHevc_GetFileSize(pathOfPakCmd);
8112 if (sizePakObj == 0 || sizePakObj > m_mvOffset)
8113 {
8114 return MOS_STATUS_INVALID_FILE_SIZE;
8115 }
8116
8117 uint32_t sizeCuRecord = CodecHalHevc_GetFileSize(pathOfCuRecord);
8118 if (sizeCuRecord == 0 || sizeCuRecord > m_mbCodeSize - m_mvOffset)
8119 {
8120 return MOS_STATUS_INVALID_FILE_SIZE;
8121 }
8122
8123 MOS_LOCK_PARAMS lockFlags;
8124 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
8125 lockFlags.WriteOnly = 1;
8126 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
8127 m_osInterface, &m_resMbCodeSurface, &lockFlags);
8128 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8129
8130 FILE *pakObj = nullptr;
8131 eStatus = MosUtilities::MosSecureFileOpen(&pakObj, pathOfPakCmd, "rb");
8132 if (pakObj == nullptr)
8133 {
8134 m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
8135 return eStatus;
8136 }
8137
8138 uint8_t *pakCmd = data;
8139 if (sizePakObj != fread((void *)pakCmd, 1, sizePakObj, pakObj))
8140 {
8141 fclose(pakObj);
8142 m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
8143 return MOS_STATUS_INVALID_FILE_SIZE;
8144 }
8145 fclose(pakObj);
8146
8147 uint8_t *record = data + m_mvOffset;
8148 FILE * fRecord = nullptr;
8149 eStatus = MosUtilities::MosSecureFileOpen(&fRecord, pathOfCuRecord, "rb");
8150 if (fRecord == nullptr)
8151 {
8152 m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
8153 return eStatus;
8154 }
8155
8156 if (sizeCuRecord != fread((void *)record, 1, sizeCuRecord, fRecord))
8157 {
8158 fclose(fRecord);
8159 m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
8160 return MOS_STATUS_INVALID_FILE_SIZE;
8161 }
8162 fclose(fRecord);
8163
8164 m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
8165
8166 if (m_brcEnabled)
8167 {
8168 //Image State
8169 char pathOfPicState[MOS_USER_CONTROL_MAX_DATA_SIZE];
8170 MOS_SecureStringPrint(pathOfPicState,
8171 sizeof(pathOfPicState),
8172 sizeof(pathOfPicState),
8173 "%s\\BrcUpdate_ImgStateWrite.dat.%d",
8174 m_pakOnlyDataFolder,
8175 m_frameNum);
8176
8177 int32_t tmpSizePicState = CodecHalHevc_GetFileSize(pathOfPicState);
8178 uint32_t sizePicState = 0;
8179 if (tmpSizePicState <= 0)
8180 {
8181 return MOS_STATUS_INVALID_FILE_SIZE;
8182 }
8183 else
8184 {
8185 sizePicState = static_cast<uint32_t>(tmpSizePicState);
8186 }
8187
8188 data = (uint8_t *)m_osInterface->pfnLockResource(
8189 m_osInterface, &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx], &lockFlags);
8190 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8191
8192 FILE *fPicState = nullptr;
8193 eStatus = MosUtilities::MosSecureFileOpen(&fPicState, pathOfPicState, "rb");
8194 if (fPicState == nullptr)
8195 {
8196 m_osInterface->pfnUnlockResource(m_osInterface, &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx]);
8197 return eStatus;
8198 }
8199
8200 if (sizePicState != fread((void *)data, 1, sizePicState, fPicState))
8201 {
8202 fclose(fPicState);
8203 m_osInterface->pfnUnlockResource(m_osInterface, &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx]);
8204 return MOS_STATUS_INVALID_FILE_SIZE;
8205 }
8206 fclose(fPicState);
8207 m_osInterface->pfnUnlockResource(m_osInterface, &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx]);
8208 }
8209
8210 return eStatus;
8211 }
8212
PicCodingTypeToSliceType(uint16_t pictureCodingType)8213 uint8_t CodechalEncHevcStateG12::PicCodingTypeToSliceType(uint16_t pictureCodingType)
8214 {
8215 uint8_t sliceType = 0;
8216
8217 switch (pictureCodingType)
8218 {
8219 case I_TYPE:
8220 sliceType = CODECHAL_ENCODE_HEVC_I_SLICE;
8221 break;
8222 case P_TYPE:
8223 sliceType = CODECHAL_ENCODE_HEVC_P_SLICE;
8224 break;
8225 case B_TYPE:
8226 case B1_TYPE:
8227 case B2_TYPE:
8228 sliceType = CODECHAL_ENCODE_HEVC_B_SLICE;
8229 break;
8230 default:
8231 CODECHAL_ENCODE_ASSERT(false);
8232 }
8233 return sliceType;
8234 }
8235
8236 // The following code is from the kernel ULT
InitMediaObjectWalker(uint32_t threadSpaceWidth,uint32_t threadSpaceHeight,uint32_t colorCountMinusOne,DependencyPattern dependencyPattern,uint32_t childThreadNumber,uint32_t localLoopExecCount,MHW_WALKER_PARAMS & walkerParams)8237 MOS_STATUS CodechalEncHevcStateG12::InitMediaObjectWalker(
8238 uint32_t threadSpaceWidth,
8239 uint32_t threadSpaceHeight,
8240 uint32_t colorCountMinusOne,
8241 DependencyPattern dependencyPattern,
8242 uint32_t childThreadNumber,
8243 uint32_t localLoopExecCount,
8244 MHW_WALKER_PARAMS &walkerParams)
8245 {
8246 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8247
8248 walkerParams.ColorCountMinusOne = colorCountMinusOne;
8249 walkerParams.dwGlobalLoopExecCount = 0x3ff;
8250 walkerParams.dwLocalLoopExecCount = 0x3ff;
8251
8252 if (dependencyPattern == dependencyWavefrontHorizontal)
8253 {
8254 // Global
8255 walkerParams.GlobalResolution.x = threadSpaceWidth;
8256 walkerParams.GlobalResolution.y = threadSpaceHeight;
8257 walkerParams.GlobalStart.x = 0;
8258 walkerParams.GlobalStart.y = 0;
8259 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8260 walkerParams.GlobalOutlerLoopStride.y = 0;
8261 walkerParams.GlobalInnerLoopUnit.x = 0;
8262 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
8263
8264 // Local
8265 walkerParams.BlockResolution.x = threadSpaceWidth;
8266 walkerParams.BlockResolution.y = threadSpaceHeight;
8267 walkerParams.LocalStart.x = 0;
8268 walkerParams.LocalStart.y = 0;
8269 walkerParams.LocalOutLoopStride.x = 1;
8270 walkerParams.LocalOutLoopStride.y = 0;
8271 walkerParams.LocalInnerLoopUnit.x = 0;
8272 walkerParams.LocalInnerLoopUnit.y = 1;
8273
8274 // Mid
8275 walkerParams.MiddleLoopExtraSteps = 0;
8276 walkerParams.MidLoopUnitX = 0;
8277 walkerParams.MidLoopUnitY = 0;
8278 }
8279 else if (dependencyPattern == dependencyWavefrontVertical)
8280 {
8281 // Global
8282 walkerParams.GlobalResolution.x = threadSpaceWidth;
8283 walkerParams.GlobalResolution.y = threadSpaceHeight;
8284 walkerParams.GlobalStart.x = 0;
8285 walkerParams.GlobalStart.y = 0;
8286 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8287 walkerParams.GlobalOutlerLoopStride.y = 0;
8288 walkerParams.GlobalInnerLoopUnit.x = 0;
8289 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
8290
8291 // Local
8292 walkerParams.BlockResolution.x = threadSpaceWidth;
8293 walkerParams.BlockResolution.y = threadSpaceHeight;
8294 walkerParams.LocalStart.x = 0;
8295 walkerParams.LocalStart.y = 0;
8296 walkerParams.LocalOutLoopStride.x = 0;
8297 walkerParams.LocalOutLoopStride.y = 1;
8298 walkerParams.LocalInnerLoopUnit.x = 1;
8299 walkerParams.LocalInnerLoopUnit.y = 0;
8300
8301 // Mid
8302 walkerParams.MiddleLoopExtraSteps = 0;
8303 walkerParams.MidLoopUnitX = 0;
8304 walkerParams.MidLoopUnitY = 0;
8305 }
8306 else if (dependencyPattern == dependencyWavefront45Degree)
8307 {
8308 // Global
8309 walkerParams.GlobalResolution.x = threadSpaceWidth;
8310 walkerParams.GlobalResolution.y = threadSpaceHeight;
8311 walkerParams.GlobalStart.x = 0;
8312 walkerParams.GlobalStart.y = 0;
8313 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8314 walkerParams.GlobalOutlerLoopStride.y = 0;
8315 walkerParams.GlobalInnerLoopUnit.x = 0;
8316 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
8317
8318 // Local
8319 walkerParams.BlockResolution.x = threadSpaceWidth;
8320 walkerParams.BlockResolution.y = threadSpaceHeight;
8321 walkerParams.LocalStart.x = 0;
8322 walkerParams.LocalStart.y = 0;
8323 walkerParams.LocalOutLoopStride.x = 1;
8324 walkerParams.LocalOutLoopStride.y = 0;
8325 walkerParams.LocalInnerLoopUnit.x = -1;
8326 walkerParams.LocalInnerLoopUnit.y = 1;
8327
8328 // Mid
8329 walkerParams.MiddleLoopExtraSteps = 0;
8330 walkerParams.MidLoopUnitX = 0;
8331 walkerParams.MidLoopUnitY = 0;
8332 }
8333 else if (dependencyPattern == dependencyWavefront26Degree)
8334 {
8335 // Global
8336 walkerParams.GlobalResolution.x = threadSpaceWidth;
8337 walkerParams.GlobalResolution.y = threadSpaceHeight;
8338 walkerParams.GlobalStart.x = 0;
8339 walkerParams.GlobalStart.y = 0;
8340 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8341 walkerParams.GlobalOutlerLoopStride.y = 0;
8342 walkerParams.GlobalInnerLoopUnit.x = 0;
8343 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
8344
8345 // Local
8346 walkerParams.BlockResolution.x = threadSpaceWidth;
8347 walkerParams.BlockResolution.y = threadSpaceHeight;
8348 walkerParams.LocalStart.x = 0;
8349 walkerParams.LocalStart.y = 0;
8350 walkerParams.LocalOutLoopStride.x = 1;
8351 walkerParams.LocalOutLoopStride.y = 0;
8352 walkerParams.LocalInnerLoopUnit.x = -2;
8353 walkerParams.LocalInnerLoopUnit.y = 1;
8354
8355 // Mid
8356 walkerParams.MiddleLoopExtraSteps = 0;
8357 walkerParams.MidLoopUnitX = 0;
8358 walkerParams.MidLoopUnitY = 0;
8359 }
8360 else if ((dependencyPattern == dependencyWavefront45XDegree) ||
8361 (dependencyPattern == dependencyWavefront45XDegreeAlt))
8362 {
8363 // Global
8364 walkerParams.GlobalResolution.x = threadSpaceWidth;
8365 walkerParams.GlobalResolution.y = threadSpaceHeight;
8366 walkerParams.GlobalStart.x = 0;
8367 walkerParams.GlobalStart.y = 0;
8368 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8369 walkerParams.GlobalOutlerLoopStride.y = 0;
8370 walkerParams.GlobalInnerLoopUnit.x = 0;
8371 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
8372
8373 // Local
8374 walkerParams.BlockResolution.x = threadSpaceWidth;
8375 walkerParams.BlockResolution.y = threadSpaceHeight;
8376 walkerParams.LocalStart.x = 0;
8377 walkerParams.LocalStart.y = 0;
8378 walkerParams.LocalOutLoopStride.x = 1;
8379 walkerParams.LocalOutLoopStride.y = 0;
8380 walkerParams.LocalInnerLoopUnit.x = -1;
8381 walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1;
8382
8383 // Mid
8384 walkerParams.MiddleLoopExtraSteps = childThreadNumber;
8385 walkerParams.MidLoopUnitX = 0;
8386 walkerParams.MidLoopUnitY = 1;
8387 }
8388 else if ((dependencyPattern == dependencyWavefront26XDegree) ||
8389 (dependencyPattern == dependencyWavefront26XDegreeAlt))
8390 {
8391 // Global
8392 walkerParams.GlobalResolution.x = threadSpaceWidth;
8393 walkerParams.GlobalResolution.y = threadSpaceHeight;
8394 walkerParams.GlobalStart.x = 0;
8395 walkerParams.GlobalStart.y = 0;
8396 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8397 walkerParams.GlobalOutlerLoopStride.y = 0;
8398 walkerParams.GlobalInnerLoopUnit.x = 0;
8399 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
8400
8401 // Local
8402 walkerParams.BlockResolution.x = threadSpaceWidth;
8403 walkerParams.BlockResolution.y = threadSpaceHeight;
8404 walkerParams.LocalStart.x = 0;
8405 walkerParams.LocalStart.y = 0;
8406 walkerParams.LocalOutLoopStride.x = 1;
8407 walkerParams.LocalOutLoopStride.y = 0;
8408 walkerParams.LocalInnerLoopUnit.x = -2;
8409 walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1;
8410
8411 // Mid
8412 walkerParams.MiddleLoopExtraSteps = childThreadNumber;
8413 walkerParams.MidLoopUnitX = 0;
8414 walkerParams.MidLoopUnitY = 1;
8415 }
8416 else if (dependencyPattern == dependencyWavefront45XVp9Degree)
8417 {
8418 // Global
8419 walkerParams.GlobalResolution.x = threadSpaceWidth;
8420 walkerParams.GlobalResolution.y = threadSpaceHeight;
8421 walkerParams.GlobalStart.x = 0;
8422 walkerParams.GlobalStart.y = 0;
8423 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8424 walkerParams.GlobalOutlerLoopStride.y = 0;
8425 walkerParams.GlobalInnerLoopUnit.x = 0;
8426 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
8427
8428 // Local
8429 walkerParams.BlockResolution.x = threadSpaceWidth;
8430 walkerParams.BlockResolution.y = threadSpaceHeight;
8431 walkerParams.LocalStart.x = 0;
8432 walkerParams.LocalStart.y = 0;
8433 walkerParams.LocalOutLoopStride.x = 1;
8434 walkerParams.LocalOutLoopStride.y = 0;
8435 walkerParams.LocalInnerLoopUnit.x = -1;
8436 walkerParams.LocalInnerLoopUnit.y = 4;
8437
8438 // Mid
8439 walkerParams.MiddleLoopExtraSteps = 3;
8440 walkerParams.MidLoopUnitX = 0;
8441 walkerParams.MidLoopUnitY = 1;
8442 }
8443 else if (dependencyPattern == dependencyWavefront26ZDegree)
8444 {
8445 // Global
8446 walkerParams.GlobalResolution.x = threadSpaceWidth;
8447 walkerParams.GlobalResolution.y = threadSpaceHeight;
8448 walkerParams.GlobalStart.x = 0;
8449 walkerParams.GlobalStart.y = 0;
8450 walkerParams.GlobalOutlerLoopStride.x = 2;
8451 walkerParams.GlobalOutlerLoopStride.y = 0;
8452 walkerParams.GlobalInnerLoopUnit.x = -4;
8453 walkerParams.GlobalInnerLoopUnit.y = 2;
8454
8455 // Local
8456 walkerParams.BlockResolution.x = 2;
8457 walkerParams.BlockResolution.y = 2;
8458 walkerParams.LocalStart.x = 0;
8459 walkerParams.LocalStart.y = 0;
8460 walkerParams.LocalOutLoopStride.x = 0;
8461 walkerParams.LocalOutLoopStride.y = 1;
8462 walkerParams.LocalInnerLoopUnit.x = 1;
8463 walkerParams.LocalInnerLoopUnit.y = 0;
8464
8465 // Mid
8466 walkerParams.MiddleLoopExtraSteps = 0;
8467 walkerParams.MidLoopUnitX = 0;
8468 walkerParams.MidLoopUnitY = 0;
8469 }
8470 else if (dependencyPattern == dependencyWavefront26ZigDegree)
8471 {
8472 int32_t size_x = threadSpaceWidth; //(threadSpaceWidth + 1)>> 1;
8473 int32_t size_y = threadSpaceHeight; //threadSpaceHeight << 1;
8474
8475 // Global
8476 walkerParams.GlobalResolution.x = size_x;
8477 walkerParams.GlobalResolution.y = size_y;
8478 walkerParams.GlobalStart.x = 0;
8479 walkerParams.GlobalStart.y = 0;
8480 walkerParams.GlobalOutlerLoopStride.x = size_x;
8481 walkerParams.GlobalOutlerLoopStride.y = 0;
8482 walkerParams.GlobalInnerLoopUnit.x = 0;
8483 walkerParams.GlobalInnerLoopUnit.y = size_y;
8484
8485 // Local
8486 walkerParams.BlockResolution.x = size_x;
8487 walkerParams.BlockResolution.y = size_y;
8488 walkerParams.LocalStart.x = 0;
8489 walkerParams.LocalStart.y = 0;
8490 walkerParams.LocalOutLoopStride.x = 1;
8491 walkerParams.LocalOutLoopStride.y = 0;
8492 walkerParams.LocalInnerLoopUnit.x = -2;
8493 walkerParams.LocalInnerLoopUnit.y = 4;
8494
8495 // Mid
8496 walkerParams.MiddleLoopExtraSteps = 3;
8497 walkerParams.MidLoopUnitX = 0;
8498 walkerParams.MidLoopUnitY = 1;
8499 }
8500 else if (dependencyPattern == dependencyWavefront45DDegree)
8501 {
8502 // Global
8503 walkerParams.GlobalResolution.x = threadSpaceWidth;
8504 walkerParams.GlobalResolution.y = threadSpaceHeight;
8505 walkerParams.GlobalStart.x = 0;
8506 walkerParams.GlobalStart.y = 0;
8507 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8508 walkerParams.GlobalOutlerLoopStride.y = 0;
8509 walkerParams.GlobalInnerLoopUnit.x = 0;
8510 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
8511
8512 // Local
8513 walkerParams.BlockResolution.x = threadSpaceWidth;
8514 walkerParams.BlockResolution.y = threadSpaceHeight;
8515 walkerParams.LocalStart.x = threadSpaceWidth;
8516 walkerParams.LocalStart.y = 0;
8517 walkerParams.LocalOutLoopStride.x = 1;
8518 walkerParams.LocalOutLoopStride.y = 0;
8519 walkerParams.LocalInnerLoopUnit.x = -1;
8520 walkerParams.LocalInnerLoopUnit.y = 1;
8521
8522 // Mid
8523 walkerParams.MiddleLoopExtraSteps = 0;
8524 walkerParams.MidLoopUnitX = 0;
8525 walkerParams.MidLoopUnitY = 0;
8526 if (colorCountMinusOne > 0)
8527 {
8528 walkerParams.dwLocalLoopExecCount = localLoopExecCount;
8529 }
8530 }
8531 else if (dependencyPattern == dependencyWavefront26DDegree)
8532 {
8533 // Global
8534 walkerParams.GlobalResolution.x = threadSpaceWidth;
8535 walkerParams.GlobalResolution.y = threadSpaceHeight;
8536 walkerParams.GlobalStart.x = 0;
8537 walkerParams.GlobalStart.y = 0;
8538 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8539 walkerParams.GlobalOutlerLoopStride.y = 0;
8540 walkerParams.GlobalInnerLoopUnit.x = 0;
8541 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
8542 // Local
8543 walkerParams.BlockResolution.x = threadSpaceWidth;
8544 walkerParams.BlockResolution.y = threadSpaceHeight;
8545 walkerParams.LocalStart.x = threadSpaceWidth;
8546 walkerParams.LocalStart.y = 0;
8547 walkerParams.LocalOutLoopStride.x = 1;
8548 walkerParams.LocalOutLoopStride.y = 0;
8549 walkerParams.LocalInnerLoopUnit.x = -2;
8550 walkerParams.LocalInnerLoopUnit.y = 1;
8551 // Mid
8552 walkerParams.MiddleLoopExtraSteps = 0;
8553 walkerParams.MidLoopUnitX = 0;
8554 walkerParams.MidLoopUnitY = 0;
8555
8556 if (colorCountMinusOne > 0)
8557 {
8558 walkerParams.dwLocalLoopExecCount = localLoopExecCount;
8559 }
8560 }
8561 else if (dependencyPattern == dependencyWavefront45XDDegree)
8562 {
8563 // Global
8564 walkerParams.GlobalResolution.x = threadSpaceWidth;
8565 walkerParams.GlobalResolution.y = threadSpaceHeight;
8566 walkerParams.GlobalStart.x = 0;
8567 walkerParams.GlobalStart.y = 0;
8568 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8569 walkerParams.GlobalOutlerLoopStride.y = 0;
8570 walkerParams.GlobalInnerLoopUnit.x = 0;
8571 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
8572
8573 // Local
8574 walkerParams.BlockResolution.x = threadSpaceWidth;
8575 walkerParams.BlockResolution.y = threadSpaceHeight;
8576 walkerParams.LocalStart.x = threadSpaceWidth;
8577 walkerParams.LocalStart.y = 0;
8578 walkerParams.LocalOutLoopStride.x = 1;
8579 walkerParams.LocalOutLoopStride.y = 0;
8580 walkerParams.LocalInnerLoopUnit.x = -1;
8581 walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1;
8582
8583 // Mid
8584 walkerParams.MiddleLoopExtraSteps = childThreadNumber;
8585 walkerParams.MidLoopUnitX = 0;
8586 walkerParams.MidLoopUnitY = 1;
8587 if (colorCountMinusOne > 0)
8588 {
8589 walkerParams.dwLocalLoopExecCount = localLoopExecCount;
8590 }
8591 }
8592 else if (dependencyPattern == dependencyWavefront26XDDegree)
8593 {
8594 // Global
8595 walkerParams.GlobalResolution.x = threadSpaceWidth;
8596 walkerParams.GlobalResolution.y = threadSpaceHeight;
8597 walkerParams.GlobalStart.x = 0;
8598 walkerParams.GlobalStart.y = 0;
8599 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
8600 walkerParams.GlobalOutlerLoopStride.y = 0;
8601 walkerParams.GlobalInnerLoopUnit.x = 0;
8602 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
8603 // Local
8604 walkerParams.BlockResolution.x = threadSpaceWidth;
8605 walkerParams.BlockResolution.y = threadSpaceHeight;
8606 walkerParams.LocalStart.x = threadSpaceWidth;
8607 walkerParams.LocalStart.y = 0;
8608 walkerParams.LocalOutLoopStride.x = 1;
8609 walkerParams.LocalOutLoopStride.y = 0;
8610 walkerParams.LocalInnerLoopUnit.x = -2;
8611 walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1;
8612 // Mid
8613 walkerParams.MiddleLoopExtraSteps = childThreadNumber;
8614 walkerParams.MidLoopUnitX = 0;
8615 walkerParams.MidLoopUnitY = 1;
8616
8617 if (colorCountMinusOne > 0)
8618 {
8619 walkerParams.dwLocalLoopExecCount = localLoopExecCount;
8620 }
8621 }
8622 else
8623 {
8624 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported walking pattern is observed\n");
8625 eStatus = MOS_STATUS_INVALID_PARAMETER;
8626 }
8627 return eStatus;
8628 }
8629
IsDegree45Needed()8630 bool CodechalEncHevcStateG12::IsDegree45Needed()
8631 {
8632 if (m_numberConcurrentGroup == 1 && m_numberEncKernelSubThread == 1)
8633 {
8634 return false;
8635 }
8636 return true;
8637 }
8638
DecideConcurrentGroupAndWaveFrontNumber()8639 void CodechalEncHevcStateG12::DecideConcurrentGroupAndWaveFrontNumber()
8640 {
8641 uint32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
8642 uint32_t widthInLcu = MOS_ROUNDUP_SHIFT((m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1), shift);
8643 uint32_t heightInLcu = MOS_ROUNDUP_SHIFT((m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1), shift);
8644 DependencyPattern walkerDegree;
8645
8646 //As per kernel ULT,for all non TU1 cases m_numberEncKernelSubThread should be set to 1
8647 // LCU32 has no multiple thread support,
8648 if (!m_isMaxLcu64 || m_hevcSeqParams->TargetUsage != 1)
8649 {
8650 m_numberEncKernelSubThread = 1; // LCU32 has no multiple thread support
8651 }
8652
8653 while (heightInLcu / m_numberConcurrentGroup == 0)
8654 {
8655 m_numberConcurrentGroup = m_numberConcurrentGroup >> 1;
8656 if (m_numberConcurrentGroup == 0)
8657 {
8658 // Try out all values and now have to use the default ones.
8659 // Concurrent group and wave-front split must be enabled together
8660 m_numberConcurrentGroup = 1;
8661 break;
8662 }
8663 }
8664
8665 if (m_numberConcurrentGroup > 1)
8666 {
8667 m_numWavefrontInOneRegion = 0;
8668 while (m_numWavefrontInOneRegion == 0)
8669 {
8670 uint32_t shift = m_degree45Needed ? 0 : 1;
8671
8672 m_numWavefrontInOneRegion =
8673 (widthInLcu + ((heightInLcu - 1) << shift) + m_numberConcurrentGroup - 1) / m_numberConcurrentGroup;
8674
8675 if (m_numWavefrontInOneRegion > 0)
8676 {
8677 // this is a valid setting and number of regisions is greater than or equal to 1
8678 break;
8679 }
8680 m_numberConcurrentGroup = m_numberConcurrentGroup >> 1;
8681 if (m_numberConcurrentGroup == 0)
8682 {
8683 // Try out all values and now have to use the default ones.
8684 m_numberConcurrentGroup = 1;
8685 break;
8686 }
8687 }
8688 }
8689 else
8690 {
8691 m_numWavefrontInOneRegion = 0;
8692 }
8693
8694 m_numberEncKernelSubThread = MOS_MIN(m_numberEncKernelSubThread, m_hevcThreadTaskDataNum);
8695
8696 return;
8697 }
8698
InitSwScoreBoardParams(CodechalEncodeSwScoreboard::KernelParams & swScoreboardKernelParames)8699 void CodechalEncHevcStateG12::InitSwScoreBoardParams(CodechalEncodeSwScoreboard::KernelParams &swScoreboardKernelParames)
8700 {
8701 uint32_t widthAlignedMaxLcu;
8702 uint32_t heightAlignedMaxLcu;
8703 uint32_t widthAlignedLcu32;
8704 uint32_t heightAlignedLcu32;
8705
8706 if (m_mfeEnabled && m_colorBitMfeEnabled)
8707 {
8708 widthAlignedMaxLcu = MOS_ALIGN_CEIL(m_mfeEncodeParams.maxWidth, MAX_LCU_SIZE);
8709 heightAlignedMaxLcu = MOS_ALIGN_CEIL(m_mfeEncodeParams.maxHeight, MAX_LCU_SIZE);
8710 widthAlignedLcu32 = MOS_ALIGN_CEIL(m_mfeEncodeParams.maxWidth, 32);
8711 heightAlignedLcu32 = MOS_ALIGN_CEIL(m_mfeEncodeParams.maxHeight, 32);
8712 }
8713 else
8714 {
8715 widthAlignedMaxLcu = m_widthAlignedMaxLcu;
8716 heightAlignedMaxLcu = m_heightAlignedMaxLcu;
8717 widthAlignedLcu32 = m_widthAlignedLcu32;
8718 heightAlignedLcu32 = m_heightAlignedLcu32;
8719 }
8720
8721 // SW scoreboard Kernel Call -- to be continued - DS + HME kernel call
8722 swScoreboardKernelParames.isHevc = false; // can be set to false. Need to enabled only for an optimization which is not needed for now
8723
8724 m_degree45Needed = true;
8725 if (m_hevcSeqParams->TargetUsage == 1)
8726 {
8727 m_numberConcurrentGroup = MOS_MIN(m_maxWavefrontsforTU1, m_numberConcurrentGroup);
8728 // m_numberConcurrentGroup should default to 2 here for TU1. the only other value allowed from reg key will be 1
8729 m_degree45Needed = false;
8730 }
8731 else if (m_hevcSeqParams->TargetUsage == 4)
8732 {
8733 m_numberConcurrentGroup = MOS_MIN(m_maxWavefrontsforTU4, m_numberConcurrentGroup);
8734 }
8735 DecideConcurrentGroupAndWaveFrontNumber();
8736
8737 DependencyPattern walkPattern;
8738 if (m_hevcSeqParams->TargetUsage == 1)
8739 {
8740 if (m_isMaxLcu64)
8741 {
8742 walkPattern = m_numberConcurrentGroup == 1 ? dependencyWavefront26XDegreeAlt : dependencyWavefront26XDDegree;
8743 }
8744 else
8745 {
8746 walkPattern = m_numberConcurrentGroup == 1 ? dependencyWavefront26Degree : dependencyWavefront26DDegree;
8747 }
8748 }
8749 else if (m_hevcSeqParams->TargetUsage == 4)
8750 {
8751 walkPattern = m_numberConcurrentGroup == 1 ? dependencyWavefront45Degree : dependencyWavefront45DDegree;
8752 }
8753 else
8754 {
8755 walkPattern = dependencyWavefront45DDegree;
8756 }
8757 m_swScoreboardState->SetDependencyPattern(walkPattern);
8758
8759 if (m_isMaxLcu64)
8760 {
8761 if (m_hevcSeqParams->TargetUsage == 1)
8762 {
8763 swScoreboardKernelParames.scoreboardWidth = (widthAlignedMaxLcu >> 6);
8764 swScoreboardKernelParames.scoreboardHeight = (heightAlignedMaxLcu >> 6) * m_numberEncKernelSubThread;
8765 }
8766 else
8767 {
8768 swScoreboardKernelParames.scoreboardWidth = 2 * (widthAlignedMaxLcu >> 6);
8769 swScoreboardKernelParames.scoreboardHeight = 2 * (heightAlignedMaxLcu >> 6);
8770 }
8771 swScoreboardKernelParames.numberOfWaveFrontSplit = m_numberConcurrentGroup;
8772 swScoreboardKernelParames.numberOfChildThread = m_numberEncKernelSubThread - 1; // child thread number is minus one of the total sub-thread for the main thread takes one.
8773 }
8774 else
8775 {
8776 swScoreboardKernelParames.scoreboardWidth = widthAlignedLcu32 >> 5;
8777 swScoreboardKernelParames.scoreboardHeight = heightAlignedLcu32 >> 5;
8778 swScoreboardKernelParames.numberOfWaveFrontSplit = m_numberConcurrentGroup;
8779 swScoreboardKernelParames.numberOfChildThread = 0;
8780 }
8781
8782 swScoreboardKernelParames.swScoreboardSurfaceWidth = swScoreboardKernelParames.scoreboardWidth;
8783 swScoreboardKernelParames.swScoreboardSurfaceHeight = swScoreboardKernelParames.scoreboardHeight;
8784
8785 m_swScoreboardState->SetCurSwScoreboardSurfaceIndex(m_currRecycledBufIdx);
8786
8787 swScoreboardKernelParames.lcuInfoSurface = &m_lcuLevelInputDataSurface[m_currRecycledBufIdx];
8788 }
8789
UserFeatureKeyReport()8790 MOS_STATUS CodechalEncHevcStateG12::UserFeatureKeyReport()
8791 {
8792 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8793
8794 CODECHAL_ENCODE_FUNCTION_ENTER;
8795
8796 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::UserFeatureKeyReport());
8797 #if (_DEBUG || _RELEASE_INTERNAL)
8798 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_REGION_NUMBER_ID, m_numberConcurrentGroup, m_osInterface->pOsContext);
8799 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_SUBTHREAD_NUM_ID, m_numberEncKernelSubThread, m_osInterface->pOsContext);
8800 CodecHalEncode_WriteKey64(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE, m_kmdVeOveride.Value, m_osInterface->pOsContext);
8801
8802 if (m_pakOnlyTest)
8803 {
8804 CodecHalEncode_WriteStringKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_PAK_ONLY_ID, m_pakOnlyDataFolder, strlen(m_pakOnlyDataFolder), m_osInterface->pOsContext);
8805 }
8806 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENCODE_USED_VDBOX_NUM_ID, m_numPipe, m_osInterface->pOsContext);
8807 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENABLE_ENCODE_VE_CTXSCHEDULING_ID, MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface), m_osInterface->pOsContext);
8808 #endif
8809
8810 return eStatus;
8811 }
8812
SetupSwScoreBoard(CodechalEncodeSwScoreboard::KernelParams * params)8813 MOS_STATUS CodechalEncHevcStateG12::SetupSwScoreBoard(CodechalEncodeSwScoreboard::KernelParams *params)
8814 {
8815 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8816
8817 MEDIA_WA_TABLE* waTable = m_osInterface->pfnGetWaTable(m_osInterface);
8818 uint32_t memType = (MEDIA_IS_WA(waTable, WaForceAllocateLML4)) ? MOS_MEMPOOL_DEVICEMEMORY : 0;
8819
8820 if (Mos_ResourceIsNull(&m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource))
8821 {
8822 MOS_ZeroMemory(m_swScoreboardState->GetCurSwScoreboardSurface(), sizeof(*m_swScoreboardState->GetCurSwScoreboardSurface()));
8823
8824 MOS_ALLOC_GFXRES_PARAMS allocParamsForBuffer2D;
8825 MOS_ZeroMemory(&allocParamsForBuffer2D, sizeof(MOS_ALLOC_GFXRES_PARAMS));
8826 allocParamsForBuffer2D.Type = MOS_GFXRES_2D;
8827 allocParamsForBuffer2D.TileType = MOS_TILE_LINEAR;
8828 allocParamsForBuffer2D.Format = Format_R32U;
8829 allocParamsForBuffer2D.dwWidth = params->swScoreboardSurfaceWidth;
8830 allocParamsForBuffer2D.dwHeight = params->swScoreboardSurfaceHeight;
8831 allocParamsForBuffer2D.pBufName = "SW Scoreboard Init buffer";
8832 allocParamsForBuffer2D.dwMemType = memType;
8833
8834 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
8835 m_osInterface,
8836 &allocParamsForBuffer2D,
8837 &m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource);
8838
8839 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo(
8840 m_osInterface,
8841 m_swScoreboardState->GetCurSwScoreboardSurface()));
8842 }
8843
8844 if (m_swScoreboard == nullptr)
8845 {
8846 m_swScoreboard = (uint8_t *)MOS_AllocAndZeroMemory(params->scoreboardWidth * sizeof(uint32_t) * params->scoreboardHeight);
8847 InitSWScoreboard(m_swScoreboard, params->scoreboardWidth, params->scoreboardHeight, m_swScoreboardState->GetDependencyPattern(), (char)(params->numberOfChildThread));
8848 }
8849
8850 MOS_LOCK_PARAMS lockFlags;
8851
8852 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
8853 lockFlags.WriteOnly = 1;
8854 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
8855 m_osInterface,
8856 &m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource,
8857 &lockFlags);
8858 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8859
8860 for (uint32_t h = 0; h < params->scoreboardHeight; h++)
8861 {
8862 uint32_t s = params->scoreboardWidth * sizeof(uint32_t);
8863 MOS_SecureMemcpy(data, s, &m_swScoreboard[h * s], s);
8864 data += m_swScoreboardState->GetCurSwScoreboardSurface()->dwPitch;
8865 }
8866
8867 m_osInterface->pfnUnlockResource(
8868 m_osInterface,
8869 &m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource);
8870
8871 return eStatus;
8872 }
8873
SetDependency(uint8_t & numDependencies,char * scoreboardDeltaX,char * scoreboardDeltaY,uint32_t dependencyPattern,char childThreadNumber)8874 void CodechalEncHevcStateG12::SetDependency(
8875 uint8_t &numDependencies,
8876 char * scoreboardDeltaX,
8877 char * scoreboardDeltaY,
8878 uint32_t dependencyPattern,
8879 char childThreadNumber)
8880 {
8881 if (dependencyPattern == dependencyWavefrontHorizontal)
8882 {
8883 numDependencies = m_numDependencyHorizontal;
8884 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependencyHorizontal, m_dxWavefrontHorizontal, m_numDependencyHorizontal);
8885 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependencyHorizontal, m_dyWavefrontHorizontal, m_numDependencyHorizontal);
8886 }
8887 else if (dependencyPattern == dependencyWavefrontVertical)
8888 {
8889 numDependencies = m_numDependencyVertical;
8890 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependencyVertical, m_dxWavefrontVertical, m_numDependencyVertical);
8891 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependencyVertical, m_dyWavefrontVertical, m_numDependencyVertical);
8892 }
8893 else if (dependencyPattern == dependencyWavefront45Degree)
8894 {
8895 numDependencies = m_numDependency45Degree;
8896 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45Degree, m_dxWavefront45Degree, m_numDependency45Degree);
8897 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45Degree, m_dyWavefront45Degree, m_numDependency45Degree);
8898 }
8899 else if (dependencyPattern == dependencyWavefront26Degree ||
8900 dependencyPattern == dependencyWavefront26DDegree)
8901 {
8902 numDependencies = m_numDependency26Degree;
8903 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26Degree, m_dxWavefront26Degree, m_numDependency26Degree);
8904 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26Degree, m_dyWavefront26Degree, m_numDependency26Degree);
8905 }
8906 else if (dependencyPattern == dependencyWavefront45XDegree)
8907 {
8908 numDependencies = m_numDependency45xDegree;
8909 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45xDegree, m_dxWavefront45xDegree, m_numDependency45xDegree);
8910 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45xDegree, m_dyWavefront45xDegree, m_numDependency45xDegree);
8911 numDependencies = childThreadNumber + 2;
8912 scoreboardDeltaY[0] = childThreadNumber;
8913 }
8914 else if (dependencyPattern == dependencyWavefront26XDegree)
8915 {
8916 numDependencies = m_numDependency26xDegree;
8917 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26xDegree, m_dxWavefront26xDegree, m_numDependency26xDegree);
8918 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26xDegree, m_dyWavefront26xDegree, m_numDependency26xDegree);
8919 numDependencies = childThreadNumber + 3;
8920 scoreboardDeltaY[0] = childThreadNumber;
8921 }
8922 else if ((dependencyPattern == dependencyWavefront45XDegreeAlt) ||
8923 (dependencyPattern == dependencyWavefront45XDDegree))
8924 {
8925 numDependencies = m_numDependency45xDegreeAlt;
8926 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45xDegreeAlt, m_dxWavefront45xDegreeAlt, m_numDependency45xDegreeAlt);
8927 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45xDegreeAlt, m_dyWavefront45xDegreeAlt, m_numDependency45xDegreeAlt);
8928 scoreboardDeltaY[0] = childThreadNumber;
8929 }
8930 else if ((dependencyPattern == dependencyWavefront26XDegreeAlt) ||
8931 (dependencyPattern == dependencyWavefront26XDDegree))
8932 {
8933 numDependencies = m_numDependency26xDegreeAlt;
8934 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26xDegreeAlt, m_dxWavefront26xDegreeAlt, m_numDependency26xDegreeAlt);
8935 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26xDegreeAlt, m_dyWavefront26xDegreeAlt, m_numDependency26xDegreeAlt);
8936 scoreboardDeltaY[0] = childThreadNumber;
8937 }
8938 else if (dependencyPattern == dependencyWavefront45XVp9Degree)
8939 {
8940 numDependencies = m_numDependency45xVp9Degree;
8941 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45xVp9Degree, m_dxWavefront45xVp9Degree, m_numDependency45xVp9Degree);
8942 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45xVp9Degree, m_dyWavefront45xVp9Degree, m_numDependency45xVp9Degree);
8943 }
8944 else if (dependencyPattern == dependencyWavefront26ZDegree)
8945 {
8946 numDependencies = m_numDependency26zDegree;
8947 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26zDegree, m_dxWavefront26zDegree, m_numDependency26zDegree);
8948 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26zDegree, m_dyWavefront26zDegree, m_numDependency26zDegree);
8949 }
8950 else if (dependencyPattern == dependencyWavefront26ZigDegree)
8951 {
8952 numDependencies = m_numDependency26ZigDegree;
8953 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26ZigDegree, m_dxWavefront26ZigDegree, m_numDependency26ZigDegree);
8954 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26ZigDegree, m_dyWavefront26ZigDegree, m_numDependency26ZigDegree);
8955 }
8956 else if (dependencyPattern == dependencyWavefront45DDegree)
8957 {
8958 numDependencies = m_numDependency45Degree;
8959 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45Degree, m_dxWavefront45Degree, m_numDependency45Degree);
8960 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45Degree, m_dyWavefront45Degree, m_numDependency45Degree);
8961 }
8962 else
8963 {
8964 numDependencies = m_numDependencyNone;
8965 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependencyNone, m_dxWavefrontNone, m_numDependencyNone);
8966 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependencyNone, m_dyWavefrontNone, m_numDependencyNone);
8967 }
8968 }
8969
8970 // ========================================================================================
8971 // FUNCTION: InitSWScoreboard
8972 // DESCRIPTION: Initialize software scoreboard for a specific dependency pattern.
8973 // INPUTS: scoreboardWidth - Width of scoreboard in Entries
8974 // scoreboardHeight - Height of scoreboard in Entries
8975 // dependencyPattern - The Enumeration of the Dependency Pattern
8976 // OUTPUTS: scoreboard - Pointer to scoreboard in Memory
8977 // ========================================================================================
InitSWScoreboard(uint8_t * scoreboard,uint32_t scoreboardWidth,uint32_t scoreboardHeight,uint32_t dependencyPattern,char childThreadNumber)8978 void CodechalEncHevcStateG12::InitSWScoreboard(uint8_t *scoreboard, uint32_t scoreboardWidth, uint32_t scoreboardHeight, uint32_t dependencyPattern, char childThreadNumber)
8979 {
8980 // 1. Select Dependency Pattern
8981 uint8_t numDependencies = 0;
8982 char scoreboardDeltaX[m_maxNumDependency];
8983 char scoreboardDeltaY[m_maxNumDependency];
8984 memset(scoreboardDeltaX, 0, sizeof(scoreboardDeltaX));
8985 memset(scoreboardDeltaY, 0, sizeof(scoreboardDeltaY));
8986
8987 SetDependency(numDependencies, scoreboardDeltaX, scoreboardDeltaY, dependencyPattern, childThreadNumber);
8988
8989 // 2. Initialize scoreboard (CPU Based)
8990 int32_t dependentLocationX = 0;
8991 int32_t dependentLocationY = 0;
8992 uint32_t *scoreboardInDws = (uint32_t *)scoreboard;
8993 int32_t totalThreadNumber = childThreadNumber + 1;
8994 for (int32_t y = 0; y < (int32_t)scoreboardHeight; y += totalThreadNumber)
8995 {
8996 for (int32_t x = 0; x < (int32_t)scoreboardWidth; x++)
8997 {
8998 scoreboardInDws[y * scoreboardWidth + x] = 0;
8999
9000 // Add dependencies accordingly
9001 for (int32_t i = 0; i < numDependencies; i++)
9002 {
9003 dependentLocationX = x + scoreboardDeltaX[i];
9004 dependentLocationY = y + scoreboardDeltaY[i];
9005 if ((dependentLocationX < 0) || (dependentLocationY < 0) ||
9006 (dependentLocationX >= (int32_t)scoreboardWidth) ||
9007 (dependentLocationY >= (int32_t)scoreboardHeight))
9008 {
9009 // Do not add dependency because thread does not exist
9010 }
9011 else
9012 {
9013 scoreboardInDws[y * scoreboardWidth + x] |= (1 << i);
9014 }
9015 } // End NumDep
9016 } // End x
9017
9018 for (int32_t n = y + 1; n < y + totalThreadNumber; n++)
9019 {
9020 for (int32_t k = 0; k < (int32_t)scoreboardWidth; k++)
9021 {
9022 scoreboardInDws[n * scoreboardWidth + k] = scoreboardInDws[y * scoreboardWidth + k];
9023 }
9024 }
9025
9026 } // End y
9027 }
9028
CreateMhwParams()9029 void CodechalEncHevcStateG12::CreateMhwParams()
9030 {
9031 m_sliceStateParams = MOS_New(MHW_VDBOX_HEVC_SLICE_STATE_G12);
9032 m_pipeModeSelectParams = MOS_New(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12);
9033 m_pipeBufAddrParams = MOS_New(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS_G12);
9034 }
9035
CalculatePictureStateCommandSize()9036 MOS_STATUS CodechalEncHevcStateG12::CalculatePictureStateCommandSize()
9037 {
9038 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9039
9040 CODECHAL_ENCODE_FUNCTION_ENTER;
9041
9042 MHW_VDBOX_STATE_CMDSIZE_PARAMS_G12 stateCmdSizeParams;
9043 CODECHAL_ENCODE_CHK_STATUS_RETURN(
9044 m_hwInterface->GetHxxStateCommandSize(
9045 CODECHAL_ENCODE_MODE_HEVC,
9046 &m_defaultPictureStatesSize,
9047 &m_defaultPicturePatchListSize,
9048 &stateCmdSizeParams));
9049
9050 return eStatus;
9051 }
9052
AddHcpPipeBufAddrCmd(PMOS_COMMAND_BUFFER cmdBuffer)9053 MOS_STATUS CodechalEncHevcStateG12::AddHcpPipeBufAddrCmd(
9054 PMOS_COMMAND_BUFFER cmdBuffer)
9055 {
9056 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9057
9058 CODECHAL_ENCODE_FUNCTION_ENTER;
9059
9060 *m_pipeBufAddrParams = {};
9061 SetHcpPipeBufAddrParams(*m_pipeBufAddrParams);
9062 #ifdef _MMC_SUPPORTED
9063 m_mmcState->SetPipeBufAddr(m_pipeBufAddrParams);
9064 #endif
9065 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(cmdBuffer, m_pipeBufAddrParams));
9066
9067 return eStatus;
9068 }
9069
SetTileData(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 * tileCodingParams,uint32_t bitstreamBufSize)9070 MOS_STATUS CodechalEncHevcStateG12::SetTileData(
9071 MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 *tileCodingParams,
9072 uint32_t bitstreamBufSize)
9073 {
9074 CODECHAL_ENCODE_FUNCTION_ENTER;
9075
9076 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9077
9078 if (!m_hevcPicParams->tiles_enabled_flag)
9079 {
9080 return eStatus;
9081 }
9082
9083 uint32_t colBd[100] = {0};
9084 uint32_t num_tile_columns = m_hevcPicParams->num_tile_columns_minus1 + 1;
9085 for (uint32_t i = 0; i < num_tile_columns; i++)
9086 {
9087 colBd[i + 1] = colBd[i] + m_hevcPicParams->tile_column_width[i];
9088 }
9089
9090 uint32_t rowBd[100] = {0};
9091 uint32_t num_tile_rows = m_hevcPicParams->num_tile_rows_minus1 + 1;
9092 for (uint32_t i = 0; i < num_tile_rows; i++)
9093 {
9094 rowBd[i + 1] = rowBd[i] + m_hevcPicParams->tile_row_height[i];
9095 }
9096
9097 m_numTiles = num_tile_rows * num_tile_columns;
9098
9099 uint32_t const uiNumCuRecordTab[] = {1, 4, 16, 64}; //LCU: 8x8->1, 16x16->4, 32x32->16, 64x64->64
9100 uint32_t numCuRecord = uiNumCuRecordTab[MOS_MIN(3, m_hevcSeqParams->log2_max_coding_block_size_minus3)];
9101 uint32_t bitstreamByteOffset = 0, saoRowstoreOffset = 0, cuLevelStreamoutOffset = 0, sseRowstoreOffset = 0;
9102 int32_t frameWidthInMinCb = m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1;
9103 int32_t frameHeightInMinCb = m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1;
9104 int32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
9105 uint32_t NumLCUInPic = 0;
9106
9107 for (uint32_t i = 0; i < num_tile_rows; i++)
9108 {
9109 for (uint32_t j = 0; j < num_tile_columns; j++)
9110 {
9111 NumLCUInPic += m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j];
9112 }
9113 }
9114
9115 uint32_t numSliceInTile = 0;
9116 for (uint32_t uiNumLCUsInTiles = 0, i = 0; i < num_tile_rows; i++)
9117 {
9118 for (uint32_t j = 0; j < num_tile_columns; j++)
9119 {
9120 uint32_t idx = i * num_tile_columns + j;
9121 uint32_t numLCUInTile = m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j];
9122
9123 tileCodingParams[idx].TileStartLCUX = colBd[j];
9124 tileCodingParams[idx].TileStartLCUY = rowBd[i];
9125
9126 tileCodingParams[idx].TileColumnStoreSelect = j % 2;
9127 tileCodingParams[idx].TileRowStoreSelect = i % 2;
9128
9129 if (j != num_tile_columns - 1)
9130 {
9131 tileCodingParams[idx].TileWidthInMinCbMinus1 = (m_hevcPicParams->tile_column_width[j] << shift) - 1;
9132 tileCodingParams[idx].IsLastTileofRow = false;
9133 }
9134 else
9135 {
9136 tileCodingParams[idx].TileWidthInMinCbMinus1 = (frameWidthInMinCb - (colBd[j] << shift)) - 1;
9137 tileCodingParams[idx].IsLastTileofRow = true;
9138 }
9139
9140 if (i != num_tile_rows - 1)
9141 {
9142 tileCodingParams[idx].IsLastTileofColumn = false;
9143 tileCodingParams[idx].TileHeightInMinCbMinus1 = (m_hevcPicParams->tile_row_height[i] << shift) - 1;
9144 }
9145 else
9146 {
9147 tileCodingParams[idx].TileHeightInMinCbMinus1 = (frameHeightInMinCb - (rowBd[i] << shift)) - 1;
9148 tileCodingParams[idx].IsLastTileofColumn = true;
9149 }
9150
9151 tileCodingParams[idx].NumOfTilesInFrame = m_numTiles;
9152 tileCodingParams[idx].NumOfTileColumnsInFrame = num_tile_columns;
9153 tileCodingParams[idx].CuRecordOffset = MOS_ALIGN_CEIL(((numCuRecord * uiNumLCUsInTiles) * m_hcpInterface->GetHevcEncCuRecordSize()),
9154 CODECHAL_CACHELINE_SIZE) /
9155 CODECHAL_CACHELINE_SIZE;
9156 tileCodingParams[idx].NumberOfActiveBePipes = (m_numPipe > 1) ? m_numPipe : 1;
9157
9158 tileCodingParams[idx].PakTileStatisticsOffset = m_sizeOfHcpPakFrameStats * idx / CODECHAL_CACHELINE_SIZE;
9159 tileCodingParams[idx].TileSizeStreamoutOffset = idx;
9160 tileCodingParams[idx].Vp9ProbabilityCounterStreamoutOffset = 0;
9161 tileCodingParams[idx].presHcpSyncBuffer = &m_resHcpScalabilitySyncBuffer.sResource;
9162 tileCodingParams[idx].CuLevelStreamoutOffset = cuLevelStreamoutOffset;
9163 tileCodingParams[idx].SliceSizeStreamoutOffset = numSliceInTile;
9164 tileCodingParams[idx].SseRowstoreOffset = sseRowstoreOffset;
9165 tileCodingParams[idx].BitstreamByteOffset = bitstreamByteOffset;
9166 tileCodingParams[idx].SaoRowstoreOffset = saoRowstoreOffset;
9167
9168 cuLevelStreamoutOffset += MOS_ALIGN_CEIL((tileCodingParams[idx].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[idx].TileHeightInMinCbMinus1 + 1) * 16, CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
9169 sseRowstoreOffset += ((m_hevcPicParams->tile_column_width[j] + 3) * m_sizeOfSseSrcPixelRowStoreBufferPerLcu) / CODECHAL_CACHELINE_SIZE;
9170 saoRowstoreOffset += (MOS_ALIGN_CEIL(m_hevcPicParams->tile_column_width[j], 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU) / CODECHAL_CACHELINE_SIZE;
9171 uint64_t totalSizeTemp = (uint64_t)bitstreamBufSize * (uint64_t)numLCUInTile;
9172 uint32_t bitStreamSizePerTile = (uint32_t)(totalSizeTemp / (uint64_t)NumLCUInPic) + ((totalSizeTemp % (uint64_t)NumLCUInPic) ? 1 : 0);
9173 bitstreamByteOffset += MOS_ALIGN_CEIL(bitStreamSizePerTile, CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
9174 uiNumLCUsInTiles += numLCUInTile;
9175
9176 for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
9177 {
9178 bool lastSliceInTile = false, sliceInTile = false;
9179 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
9180 &tileCodingParams[idx],
9181 &sliceInTile,
9182 &lastSliceInTile));
9183 numSliceInTile += (sliceInTile ? 1 : 0);
9184 }
9185 }
9186 // same row store buffer for different tile rows.
9187 saoRowstoreOffset = 0;
9188 sseRowstoreOffset = 0;
9189 }
9190
9191 return eStatus;
9192 }
9193
IsSliceInTile(uint32_t sliceNumber,PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 currentTile,bool * sliceInTile,bool * lastSliceInTile)9194 MOS_STATUS CodechalEncHevcStateG12::IsSliceInTile(
9195 uint32_t sliceNumber,
9196 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 currentTile,
9197 bool * sliceInTile,
9198 bool * lastSliceInTile)
9199 {
9200 CODECHAL_ENCODE_FUNCTION_ENTER;
9201
9202 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9203
9204 CODECHAL_ENCODE_CHK_NULL_RETURN(currentTile);
9205 CODECHAL_ENCODE_CHK_NULL_RETURN(sliceInTile);
9206 CODECHAL_ENCODE_CHK_NULL_RETURN(lastSliceInTile);
9207
9208 uint32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
9209 uint32_t residual = (1 << shift) - 1;
9210 uint32_t frameWidthInLCU = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift;
9211 uint32_t frameHeightInLCU = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift;
9212
9213 PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = &m_hevcSliceParams[sliceNumber];
9214 uint32_t sliceStartLCU = hevcSlcParams->slice_segment_address;
9215 uint32_t sliceLCUx = sliceStartLCU % frameWidthInLCU;
9216 uint32_t sliceLCUy = sliceStartLCU / frameWidthInLCU;
9217
9218 uint32_t tile_column_width = (currentTile->TileWidthInMinCbMinus1 + 1 + residual) >> shift;
9219 uint32_t tile_row_height = (currentTile->TileHeightInMinCbMinus1 + 1 + residual) >> shift;
9220 if (sliceLCUx < currentTile->TileStartLCUX ||
9221 sliceLCUy < currentTile->TileStartLCUY ||
9222 sliceLCUx >= currentTile->TileStartLCUX + tile_column_width ||
9223 sliceLCUy >= currentTile->TileStartLCUY + tile_row_height)
9224 {
9225 // slice start is not in the tile boundary
9226 *lastSliceInTile = *sliceInTile = false;
9227 return eStatus;
9228 }
9229
9230 sliceLCUx += (hevcSlcParams->NumLCUsInSlice - 1) % tile_column_width;
9231 sliceLCUy += (hevcSlcParams->NumLCUsInSlice - 1) / tile_column_width;
9232
9233 if (sliceLCUx >= currentTile->TileStartLCUX + tile_column_width)
9234 {
9235 sliceLCUx -= tile_column_width;
9236 sliceLCUy++;
9237 }
9238
9239 if (sliceLCUx < currentTile->TileStartLCUX ||
9240 sliceLCUy < currentTile->TileStartLCUY ||
9241 sliceLCUx >= currentTile->TileStartLCUX + tile_column_width ||
9242 sliceLCUy >= currentTile->TileStartLCUY + tile_row_height)
9243 {
9244 // last LCU of the slice is out of the tile boundary
9245 *lastSliceInTile = *sliceInTile = false;
9246 return eStatus;
9247 }
9248
9249 *sliceInTile = true;
9250
9251 sliceLCUx++;
9252 sliceLCUy++;
9253
9254 // the end of slice is at the boundary of tile
9255 *lastSliceInTile = (sliceLCUx == currentTile->TileStartLCUX + tile_column_width &&
9256 sliceLCUy == currentTile->TileStartLCUY + tile_row_height);
9257
9258 return eStatus;
9259 }
9260
AddHcpRefIdxCmd(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_BATCH_BUFFER batchBuffer,PMHW_VDBOX_HEVC_SLICE_STATE params)9261 MOS_STATUS CodechalEncHevcStateG12::AddHcpRefIdxCmd(
9262 PMOS_COMMAND_BUFFER cmdBuffer,
9263 PMHW_BATCH_BUFFER batchBuffer,
9264 PMHW_VDBOX_HEVC_SLICE_STATE params)
9265 {
9266 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9267
9268 CODECHAL_ENCODE_FUNCTION_ENTER;
9269
9270 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
9271 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcSliceParams);
9272 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcPicParams);
9273
9274 if (cmdBuffer == nullptr && batchBuffer == nullptr)
9275 {
9276 CODECHAL_ENCODE_ASSERTMESSAGE("There was no valid buffer to add the HW command to.");
9277 return MOS_STATUS_NULL_POINTER;
9278 }
9279
9280 PCODEC_HEVC_ENCODE_PICTURE_PARAMS hevcPicParams = params->pEncodeHevcPicParams;
9281 PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = params->pEncodeHevcSliceParams;
9282
9283 if (hevcSlcParams->slice_type != CODECHAL_ENCODE_HEVC_I_SLICE)
9284 {
9285 MHW_VDBOX_HEVC_REF_IDX_PARAMS_G12 refIdxParams;
9286
9287 refIdxParams.CurrPic = hevcPicParams->CurrReconstructedPic;
9288 refIdxParams.isEncode = true;
9289 refIdxParams.ucList = LIST_0;
9290 refIdxParams.ucNumRefForList = hevcSlcParams->num_ref_idx_l0_active_minus1 + 1;
9291 eStatus = MOS_SecureMemcpy(&refIdxParams.RefPicList, sizeof(refIdxParams.RefPicList), &hevcSlcParams->RefPicList, sizeof(hevcSlcParams->RefPicList));
9292 if (eStatus != MOS_STATUS_SUCCESS)
9293 {
9294 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to copy memory.");
9295 return eStatus;
9296 }
9297
9298 refIdxParams.hevcRefList = (void **)m_refList;
9299 refIdxParams.poc_curr_pic = hevcPicParams->CurrPicOrderCnt;
9300 for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
9301 {
9302 refIdxParams.poc_list[i] = hevcPicParams->RefFramePOCList[i];
9303 }
9304
9305 refIdxParams.pRefIdxMapping = params->pRefIdxMapping;
9306 refIdxParams.RefFieldPicFlag = 0; // there is no interlaced support in encoder
9307 refIdxParams.RefBottomFieldFlag = 0; // there is no interlaced support in encoder
9308
9309 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpRefIdxStateCmd(cmdBuffer, batchBuffer, &refIdxParams));
9310
9311 if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
9312 {
9313 refIdxParams.ucList = LIST_1;
9314 refIdxParams.ucNumRefForList = hevcSlcParams->num_ref_idx_l1_active_minus1 + 1;
9315 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpRefIdxStateCmd(cmdBuffer, batchBuffer, &refIdxParams));
9316 }
9317 }
9318
9319 return eStatus;
9320 }
9321
SendPrologWithFrameTracking(PMOS_COMMAND_BUFFER cmdBuffer,bool frameTrackingRequested,MHW_MI_MMIOREGISTERS * mmioRegister)9322 MOS_STATUS CodechalEncHevcStateG12::SendPrologWithFrameTracking(
9323 PMOS_COMMAND_BUFFER cmdBuffer,
9324 bool frameTrackingRequested,
9325 MHW_MI_MMIOREGISTERS *mmioRegister)
9326 {
9327 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9328
9329 MOS_GPU_CONTEXT gpuContext = m_osInterface->pfnGetGpuContext(m_osInterface);
9330
9331 MHW_MI_FORCE_WAKEUP_PARAMS forceWakeupParams;
9332 MOS_ZeroMemory(&forceWakeupParams, sizeof(MHW_MI_FORCE_WAKEUP_PARAMS));
9333 forceWakeupParams.bMFXPowerWellControl = false;
9334 forceWakeupParams.bMFXPowerWellControlMask = true;
9335 forceWakeupParams.bHEVCPowerWellControl = true;
9336 forceWakeupParams.bHEVCPowerWellControlMask = true;
9337 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiForceWakeupCmd(
9338 cmdBuffer,
9339 &forceWakeupParams));
9340
9341 if (UseRenderCommandBuffer())
9342 {
9343 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SendPrologWithFrameTracking(cmdBuffer, frameTrackingRequested, mmioRegister));
9344 return eStatus;
9345 }
9346
9347 #ifdef _MMC_SUPPORTED
9348 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
9349 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SendPrologCmd(m_miInterface, cmdBuffer, gpuContext));
9350 #endif
9351
9352 if (!IsLastPipe())
9353 {
9354 return eStatus;
9355 }
9356
9357 PMOS_COMMAND_BUFFER commandBufferInUse;
9358 if (m_realCmdBuffer.pCmdBase)
9359 {
9360 commandBufferInUse = &m_realCmdBuffer;
9361 }
9362 else if (cmdBuffer && cmdBuffer->pCmdBase)
9363 {
9364 commandBufferInUse = cmdBuffer;
9365 }
9366 else
9367 {
9368 eStatus = MOS_STATUS_INVALID_PARAMETER;
9369 return eStatus;
9370 }
9371
9372 // initialize command buffer attributes
9373 commandBufferInUse->Attributes.bTurboMode = m_hwInterface->m_turboMode;
9374 commandBufferInUse->Attributes.dwNumRequestedEUSlices = m_hwInterface->m_numRequestedEuSlices;
9375 commandBufferInUse->Attributes.dwNumRequestedSubSlices = m_hwInterface->m_numRequestedSubSlices;
9376 commandBufferInUse->Attributes.dwNumRequestedEUs = m_hwInterface->m_numRequestedEus;
9377 commandBufferInUse->Attributes.bValidPowerGatingRequest = true;
9378
9379 if (frameTrackingRequested && m_frameTrackingEnabled)
9380 {
9381 commandBufferInUse->Attributes.bEnableMediaFrameTracking = true;
9382 commandBufferInUse->Attributes.resMediaFrameTrackingSurface =
9383 &m_encodeStatusBuf.resStatusBuffer;
9384 commandBufferInUse->Attributes.dwMediaFrameTrackingTag = m_storeData;
9385 // Set media frame tracking address offset(the offset from the encoder status buffer page)
9386 commandBufferInUse->Attributes.dwMediaFrameTrackingAddrOffset = 0;
9387 }
9388
9389 MHW_GENERIC_PROLOG_PARAMS genericPrologParams;
9390 MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
9391 genericPrologParams.pOsInterface = m_hwInterface->GetOsInterface();
9392 genericPrologParams.pvMiInterface = m_hwInterface->GetMiInterface();
9393 genericPrologParams.bMmcEnabled = m_mmcState ? m_mmcState->IsMmcEnabled() : false;
9394 genericPrologParams.dwStoreDataValue = m_storeData - 1;
9395
9396 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(commandBufferInUse, &genericPrologParams));
9397
9398 return eStatus;
9399 }
9400
ResizeOnResChange()9401 void CodechalEncHevcStateG12::ResizeOnResChange()
9402 {
9403 CODECHAL_ENCODE_FUNCTION_ENTER;
9404
9405 CodechalEncoderState::ResizeOnResChange();
9406
9407 // need to re-allocate surfaces according to resolution
9408 m_swScoreboardState->ReleaseResources();
9409 }
9410
InitMmcState()9411 MOS_STATUS CodechalEncHevcStateG12::InitMmcState()
9412 {
9413 CODECHAL_ENCODE_FUNCTION_ENTER;
9414 #ifdef _MMC_SUPPORTED
9415 m_mmcState = MOS_New(CodechalMmcEncodeHevcG12, m_hwInterface, this);
9416 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
9417 #endif
9418 return MOS_STATUS_SUCCESS;
9419 }
9420
9421 #if USE_CODECHAL_DEBUG_TOOL
9422
9423 //MOS_STATUS CodechalEncHevcStateG12::CodecHal_DbgDumpHEVCMbEncCurbeG12(
9424 // CodechalDebugInterface *pDebugInterface,
9425 // CODECHAL_MEDIA_STATE_TYPE Function,
9426 // PMOS_RESOURCE presDBuffer)
9427 //{
9428 //#define WRITE_CURBE_FIELD_TO_FILE(field) {\
9429 // MOS_SecureStringPrint(sOutBuf, sizeof(sOutBuf), sizeof(sOutBuf), "field = %d\n", pCurbeData->field);\
9430 // CodecHal_DbgAddStringToBufferNewLine(&FileParams, sOutBuf);}
9431 //
9432 // PMOS_INTERFACE m_osInterface = nullptr;
9433 // PCCHAR pcFunction = nullptr;
9434 // char sAttrib[125];
9435 // char sOutBuf[MAX_FIELD_LENGTH];
9436 // CODECHAL_DBG_FILE_PARAMS FileParams;
9437 // MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9438 // MOS_LOCK_PARAMS LockFlags;
9439 // CodechalEncHevcStateG12::MBENC_COMBINED_BUFFER1 *pEncComBuf1 = nullptr;
9440 //
9441 // CODECHAL_DEBUG_FUNCTION_ENTER;
9442 //
9443 // CODECHAL_DEBUG_CHK_NULL(pDebugInterface);
9444 // CODECHAL_DEBUG_CHK_NULL(pDebugInterface->pOsInterface);
9445 // CODECHAL_DEBUG_CHK_NULL(pDebugInterface->pHwInterface);
9446 // m_osInterface = pDebugInterface->pOsInterface;
9447 //
9448 // pcFunction = CodecHal_DbgGetFunctionType(
9449 // pDebugInterface, Function, DBG_CMD_BUFFER_DUMP_DEFAULT);
9450 // CODECHAL_DEBUG_CHK_NULL(pcFunction);
9451 //
9452 // MOS_SecureStringPrint(sAttrib, sizeof(sAttrib), sizeof(sAttrib), "%s%s", pcFunction, CODECHAL_DBG_STRING_CURBE);
9453 //
9454 // MOS_ZeroMemory(&LockFlags, sizeof(MOS_LOCK_PARAMS));
9455 // LockFlags.ReadOnly = 1;
9456 //
9457 // pEncComBuf1 = (CodechalEncHevcStateG12::MBENC_COMBINED_BUFFER1*)m_osInterface->pfnLockResource(
9458 // m_osInterface,
9459 // presDBuffer,
9460 // &LockFlags);
9461 //
9462 // FileParams = g_cInitDbgFileParams;
9463 //
9464 // if (!CodecHal_DbgAttribIsEnabled(pDebugInterface, sAttrib))
9465 // {
9466 // return eStatus;
9467 // }
9468 //
9469 // MOS_ZeroMemory(pDebugInterface->sPath, sizeof(pDebugInterface->sPath));
9470 //
9471 // CODECHAL_DEBUG_CHK_STATUS(CodecHal_DbgConstructFilenameString(
9472 // pDebugInterface,
9473 // pcFunction,
9474 // CODECHAL_DBG_STRING_CURBE,
9475 // CODECHAL_DBG_STRING_TXT));
9476 //
9477 // if (CodecHal_DbgAttribIsEnabled(pDebugInterface, CODECHAL_DBG_STRING_DUMPDATAINBINARY))
9478 // {
9479 // CODECHAL_DEBUG_CHK_STATUS(CodecHal_DbgDumpBufferInHexDwords(
9480 // pDebugInterface,
9481 // (uint8_t*)&pEncComBuf1->Curbe,
9482 // sizeof(pEncComBuf1->Curbe)));
9483 // }
9484 // else
9485 // {
9486 // CodechalEncHevcStateG12::MBENC_CURBE* pCurbeData = &pEncComBuf1->Curbe;
9487 //
9488 // FileParams.lRemaining = sizeof(char)* MAX_FIELD_LENGTH * MAX_NUM_ATTRIBUTES;
9489 // FileParams.psWriteToFile = (char*)MOS_AllocAndZeroMemory(FileParams.lRemaining);
9490 // CODECHAL_DEBUG_CHK_NULL(FileParams.psWriteToFile);
9491 // FileParams.dwOffset = 0;
9492 //
9493 // memset(sOutBuf, 0, sizeof(sOutBuf));
9494 //
9495 // MOS_SecureStringPrint(sOutBuf, sizeof(sOutBuf), sizeof(sOutBuf), "# CURBE Parameters:");
9496 // CodecHal_DbgAddStringToBufferNewLine(&FileParams, sOutBuf);
9497 //
9498 // WRITE_CURBE_FIELD_TO_FILE(FrameWidthInSamples);
9499 // WRITE_CURBE_FIELD_TO_FILE(FrameHeightInSamples);
9500 //
9501 // WRITE_CURBE_FIELD_TO_FILE(Log2MaxCUSize);
9502 // WRITE_CURBE_FIELD_TO_FILE(Log2MinCUSize);
9503 // WRITE_CURBE_FIELD_TO_FILE(Log2MaxTUSize);
9504 // WRITE_CURBE_FIELD_TO_FILE(Log2MinTUSize);
9505 // WRITE_CURBE_FIELD_TO_FILE(MaxIntraRdeIter);
9506 // WRITE_CURBE_FIELD_TO_FILE(QPType);
9507 // WRITE_CURBE_FIELD_TO_FILE(MaxTransformDepthInter);
9508 // WRITE_CURBE_FIELD_TO_FILE(MaxTransformDepthIntra);
9509 // WRITE_CURBE_FIELD_TO_FILE(Log2ParallelMergeLevel);
9510 //
9511 // WRITE_CURBE_FIELD_TO_FILE(CornerNeighborPixel);
9512 // WRITE_CURBE_FIELD_TO_FILE(IntraNeighborAvailFlags);
9513 // WRITE_CURBE_FIELD_TO_FILE(ChromaFormatType);
9514 // WRITE_CURBE_FIELD_TO_FILE(SubPelMode);
9515 // WRITE_CURBE_FIELD_TO_FILE(InterSADMeasure);
9516 // WRITE_CURBE_FIELD_TO_FILE(IntraSADMeasure);
9517 // WRITE_CURBE_FIELD_TO_FILE(IntraPrediction);
9518 // WRITE_CURBE_FIELD_TO_FILE(RefIDCostMode);
9519 // WRITE_CURBE_FIELD_TO_FILE(TUBasedCostSetting);
9520 //
9521 // WRITE_CURBE_FIELD_TO_FILE(ExplictModeEn);
9522 // WRITE_CURBE_FIELD_TO_FILE(AdaptiveEn);
9523 // WRITE_CURBE_FIELD_TO_FILE(EarlyImeSuccessEn);
9524 // WRITE_CURBE_FIELD_TO_FILE(IntraSpeedMode);
9525 // WRITE_CURBE_FIELD_TO_FILE(IMECostCentersSel);
9526 // WRITE_CURBE_FIELD_TO_FILE(RDEQuantRoundValue);
9527 // WRITE_CURBE_FIELD_TO_FILE(IMERefWindowSize);
9528 // WRITE_CURBE_FIELD_TO_FILE(IntraComputeType);
9529 // WRITE_CURBE_FIELD_TO_FILE(Depth0IntraPredition);
9530 // WRITE_CURBE_FIELD_TO_FILE(TUDepthControl);
9531 // WRITE_CURBE_FIELD_TO_FILE(IntraTuRecFeedbackDisable);
9532 // WRITE_CURBE_FIELD_TO_FILE(MergeListBiDisable);
9533 // WRITE_CURBE_FIELD_TO_FILE(EarlyImeStop);
9534 //
9535 // WRITE_CURBE_FIELD_TO_FILE(FrameQP);
9536 // WRITE_CURBE_FIELD_TO_FILE(FrameQPSign);
9537 // WRITE_CURBE_FIELD_TO_FILE(ConcurrentGroupNum);
9538 // WRITE_CURBE_FIELD_TO_FILE(NumofUnitInWaveFront);
9539 //
9540 // WRITE_CURBE_FIELD_TO_FILE(LoadBalenceEnable);
9541 // WRITE_CURBE_FIELD_TO_FILE(NumberofMultiFrame);
9542 // WRITE_CURBE_FIELD_TO_FILE(Degree45);
9543 // WRITE_CURBE_FIELD_TO_FILE(Break12Dependency);
9544 // WRITE_CURBE_FIELD_TO_FILE(ThreadNumber);
9545 //
9546 // WRITE_CURBE_FIELD_TO_FILE(Pic_init_qp_B);
9547 // WRITE_CURBE_FIELD_TO_FILE(Pic_init_qp_P);
9548 // WRITE_CURBE_FIELD_TO_FILE(Pic_init_qp_I);
9549 //
9550 // WRITE_CURBE_FIELD_TO_FILE(NumofRowTile);
9551 // WRITE_CURBE_FIELD_TO_FILE(NumofColumnTile);
9552 //
9553 // WRITE_CURBE_FIELD_TO_FILE(TransquantBypassEnableFlag);
9554 // WRITE_CURBE_FIELD_TO_FILE(PCMEnabledFlag);
9555 // WRITE_CURBE_FIELD_TO_FILE(CuQpDeltaEnabledFlag);
9556 // WRITE_CURBE_FIELD_TO_FILE(Stepping);
9557 // WRITE_CURBE_FIELD_TO_FILE(WaveFrontSplitsEnable);
9558 // WRITE_CURBE_FIELD_TO_FILE(HMEFlag);
9559 // WRITE_CURBE_FIELD_TO_FILE(SuperHME);
9560 // WRITE_CURBE_FIELD_TO_FILE(UltraHME);
9561 // WRITE_CURBE_FIELD_TO_FILE(Cu64SkipCheckOnly);
9562 // WRITE_CURBE_FIELD_TO_FILE(EnableCu64Check);
9563 // WRITE_CURBE_FIELD_TO_FILE(Cu642Nx2NCheckOnly);
9564 // WRITE_CURBE_FIELD_TO_FILE(EnableCu64AmpCheck);
9565 // WRITE_CURBE_FIELD_TO_FILE(DisablePIntra);
9566 // WRITE_CURBE_FIELD_TO_FILE(DisableIntraTURec);
9567 // WRITE_CURBE_FIELD_TO_FILE(InheritIntraModeFromTU0);
9568 // WRITE_CURBE_FIELD_TO_FILE(CostScalingForRA);
9569 // WRITE_CURBE_FIELD_TO_FILE(DisableIntraNxN);
9570 //
9571 // WRITE_CURBE_FIELD_TO_FILE(MaxRefIdxL0);
9572 // WRITE_CURBE_FIELD_TO_FILE(MaxRefIdxL1);
9573 // WRITE_CURBE_FIELD_TO_FILE(MaxBRefIdxL0);
9574 //
9575 // WRITE_CURBE_FIELD_TO_FILE(SkipEarlyTermination);
9576 // WRITE_CURBE_FIELD_TO_FILE(SkipEarlyTermSize);
9577 // WRITE_CURBE_FIELD_TO_FILE(Dynamic64Enable);
9578 // WRITE_CURBE_FIELD_TO_FILE(Dynamic64Order);
9579 // WRITE_CURBE_FIELD_TO_FILE(Dynamic64Th);
9580 // WRITE_CURBE_FIELD_TO_FILE(DynamicOrderTh);
9581 // WRITE_CURBE_FIELD_TO_FILE(PerBFrameQPOffset);
9582 // WRITE_CURBE_FIELD_TO_FILE(IncreaseExitThresh);
9583 // WRITE_CURBE_FIELD_TO_FILE(Dynamic64Min32);
9584 // WRITE_CURBE_FIELD_TO_FILE(LastFrameIsIntra);
9585 //
9586 // WRITE_CURBE_FIELD_TO_FILE(LenSP);
9587 // WRITE_CURBE_FIELD_TO_FILE(MaxNumSU);
9588 //
9589 // WRITE_CURBE_FIELD_TO_FILE(CostTableIndex);
9590 //
9591 // WRITE_CURBE_FIELD_TO_FILE(SliceType);
9592 // WRITE_CURBE_FIELD_TO_FILE(TemporalMvpEnableFlag);
9593 // WRITE_CURBE_FIELD_TO_FILE(CollocatedFromL0Flag);
9594 // WRITE_CURBE_FIELD_TO_FILE(theSameRefList);
9595 // WRITE_CURBE_FIELD_TO_FILE(IsLowDelay);
9596 // WRITE_CURBE_FIELD_TO_FILE(MaxNumMergeCand);
9597 // WRITE_CURBE_FIELD_TO_FILE(NumRefIdxL0);
9598 // WRITE_CURBE_FIELD_TO_FILE(NumRefIdxL1);
9599 //
9600 // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_0);
9601 // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_0);
9602 // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_1);
9603 // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_1);
9604 //
9605 // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_2);
9606 // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_2);
9607 // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_3);
9608 // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_3);
9609 //
9610 // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_4);
9611 // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_4);
9612 // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_5);
9613 // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_5);
9614 //
9615 // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_6);
9616 // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_6);
9617 // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_7);
9618 // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_7);
9619 //
9620 // WRITE_CURBE_FIELD_TO_FILE(LongTermReferenceFlags_L0);
9621 // WRITE_CURBE_FIELD_TO_FILE(LongTermReferenceFlags_L1);
9622 //
9623 // WRITE_CURBE_FIELD_TO_FILE(RefFrameWinWidth);
9624 // WRITE_CURBE_FIELD_TO_FILE(RefFrameWinHeight);
9625 //
9626 // WRITE_CURBE_FIELD_TO_FILE(RoundingInter);
9627 // WRITE_CURBE_FIELD_TO_FILE(RoundingIntra);
9628 // WRITE_CURBE_FIELD_TO_FILE(MaxThreadWidth);
9629 // WRITE_CURBE_FIELD_TO_FILE(MaxThreadHeight);
9630 //
9631 // CODECHAL_DEBUG_CHK_STATUS(MosUtilities::MosWriteFileFromPtr(
9632 // pDebugInterface->sPath,
9633 // FileParams.psWriteToFile,
9634 // FileParams.dwOffset));
9635 // }
9636 //
9637 //finish:
9638 // if (m_osInterface && pEncComBuf1)
9639 // {
9640 // m_osInterface->pfnUnlockResource(
9641 // m_osInterface,
9642 // presDBuffer);
9643 // }
9644 //
9645 // if (FileParams.psWriteToFile)
9646 // {
9647 // MOS_FreeMemory(FileParams.psWriteToFile);
9648 // }
9649 // return eStatus;
9650 //}
9651
9652 #endif
VerifyCommandBufferSize()9653 MOS_STATUS CodechalEncHevcStateG12::VerifyCommandBufferSize()
9654 {
9655 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9656
9657 CODECHAL_ENCODE_FUNCTION_ENTER;
9658
9659 if (UseRenderCommandBuffer() || m_numPipe == 1)
9660 {
9661 // legacy mode & resize CommandBuffer Size for every BRC pass
9662 if (!m_singleTaskPhaseSupported)
9663 {
9664 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
9665 }
9666 return eStatus;
9667 }
9668
9669 // virtual engine
9670 uint32_t requestedSize =
9671 m_pictureStatesSize +
9672 m_extraPictureStatesSize +
9673 (m_sliceStatesSize * m_numSlices);
9674
9675 requestedSize += (requestedSize * m_numPassesInOnePipe + m_hucCommandsSize);
9676
9677 // Running in the multiple VDBOX mode
9678 int currentPipe = GetCurrentPipe();
9679 if (currentPipe < 0 || currentPipe >= m_numPipe)
9680 {
9681 eStatus = MOS_STATUS_INVALID_PARAMETER;
9682 return eStatus;
9683 }
9684 int currentPass = GetCurrentPass();
9685 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
9686 {
9687 eStatus = MOS_STATUS_INVALID_PARAMETER;
9688 return eStatus;
9689 }
9690
9691 if (IsFirstPipe() && m_osInterface->bUsesPatchList)
9692 {
9693 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
9694 }
9695
9696 PMOS_COMMAND_BUFFER pCmdBuffer = m_singleTaskPhaseSupported ? &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass];
9697
9698 if (Mos_ResourceIsNull(&pCmdBuffer->OsResource) ||
9699 m_sizeOfVeBatchBuffer < requestedSize)
9700 {
9701 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
9702
9703 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
9704 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
9705 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
9706 allocParamsForBufferLinear.Format = Format_Buffer;
9707 allocParamsForBufferLinear.dwBytes = requestedSize;
9708 allocParamsForBufferLinear.pBufName = "Batch buffer for each VDBOX";
9709
9710 if (!Mos_ResourceIsNull(&pCmdBuffer->OsResource))
9711 {
9712 if (pCmdBuffer->pCmdBase)
9713 {
9714 m_osInterface->pfnUnlockResource(m_osInterface, &pCmdBuffer->OsResource);
9715 }
9716 m_osInterface->pfnFreeResource(m_osInterface, &pCmdBuffer->OsResource);
9717 }
9718
9719 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
9720 m_osInterface,
9721 &allocParamsForBufferLinear,
9722 &pCmdBuffer->OsResource));
9723
9724 m_sizeOfVeBatchBuffer = requestedSize;
9725 }
9726
9727 if (pCmdBuffer->pCmdBase == nullptr)
9728 {
9729 MOS_LOCK_PARAMS lockParams;
9730 MOS_ZeroMemory(&lockParams, sizeof(lockParams));
9731 lockParams.WriteOnly = true;
9732 pCmdBuffer->pCmdPtr = pCmdBuffer->pCmdBase = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &pCmdBuffer->OsResource, &lockParams);
9733 pCmdBuffer->iRemaining = m_sizeOfVeBatchBuffer;
9734 pCmdBuffer->iOffset = 0;
9735
9736 if (pCmdBuffer->pCmdBase == nullptr)
9737 {
9738 eStatus = MOS_STATUS_NULL_POINTER;
9739 return eStatus;
9740 }
9741 }
9742
9743 return eStatus;
9744 }
9745
GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)9746 MOS_STATUS CodechalEncHevcStateG12::GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)
9747 {
9748 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9749
9750 CODECHAL_ENCODE_FUNCTION_ENTER;
9751
9752 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
9753 CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
9754
9755 if (UseRenderCommandBuffer() || m_numPipe == 1)
9756 {
9757 // legacy mode
9758 m_realCmdBuffer.pCmdBase = m_realCmdBuffer.pCmdPtr = nullptr;
9759 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, 0));
9760 return eStatus;
9761 }
9762
9763 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &m_realCmdBuffer, 0));
9764
9765 int currentPipe = GetCurrentPipe();
9766 if (currentPipe < 0 || currentPipe >= m_numPipe)
9767 {
9768 eStatus = MOS_STATUS_INVALID_PARAMETER;
9769 return eStatus;
9770 }
9771 int currentPass = GetCurrentPass();
9772 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
9773 {
9774 eStatus = MOS_STATUS_INVALID_PARAMETER;
9775 return eStatus;
9776 }
9777
9778 *cmdBuffer = m_singleTaskPhaseSupported ? m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass];
9779
9780 if (m_osInterface->osCpInterface->IsCpEnabled() && cmdBuffer->iOffset == 0)
9781 {
9782 // Insert CP Prolog
9783 CODECHAL_ENCODE_NORMALMESSAGE("Adding cp prolog for secure scalable encode");
9784 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, cmdBuffer));
9785 }
9786 return eStatus;
9787 }
9788
ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)9789 MOS_STATUS CodechalEncHevcStateG12::ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)
9790 {
9791 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9792
9793 CODECHAL_ENCODE_FUNCTION_ENTER;
9794
9795 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
9796
9797 if (UseRenderCommandBuffer() || m_numPipe == 1)
9798 {
9799 // legacy mode
9800 m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, 0);
9801 return eStatus;
9802 }
9803
9804 int currentPipe = GetCurrentPipe();
9805 if (currentPipe < 0 || currentPipe >= m_numPipe)
9806 {
9807 eStatus = MOS_STATUS_INVALID_PARAMETER;
9808 return eStatus;
9809 }
9810 int currentPass = GetCurrentPass();
9811 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
9812 {
9813 eStatus = MOS_STATUS_INVALID_PARAMETER;
9814 return eStatus;
9815 }
9816 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
9817 m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][passIndex] = *cmdBuffer;
9818 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
9819
9820 return eStatus;
9821 }
9822
SubmitCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer,bool bNullRendering)9823 MOS_STATUS CodechalEncHevcStateG12::SubmitCommandBuffer(
9824 PMOS_COMMAND_BUFFER cmdBuffer,
9825 bool bNullRendering)
9826 {
9827 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9828
9829 CODECHAL_ENCODE_FUNCTION_ENTER;
9830
9831 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
9832
9833 if (UseRenderCommandBuffer() || m_numPipe == 1)
9834 {
9835 // legacy mode
9836 if (!UseRenderCommandBuffer()) // Set VE Hints for video contexts only
9837 {
9838 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(cmdBuffer));
9839 }
9840
9841 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, bNullRendering));
9842 return eStatus;
9843 }
9844
9845 bool cmdBufferReadyForSubmit = IsLastPipe();
9846
9847 // In STF, Hold the command buffer submission till last pass
9848 if (m_singleTaskPhaseSupported)
9849 {
9850 cmdBufferReadyForSubmit = cmdBufferReadyForSubmit && IsLastPass();
9851 }
9852
9853 if (!cmdBufferReadyForSubmit)
9854 {
9855 return eStatus;
9856 }
9857
9858 int currentPass = GetCurrentPass();
9859 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
9860 {
9861 eStatus = MOS_STATUS_INVALID_PARAMETER;
9862 return eStatus;
9863 }
9864 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
9865
9866 for (uint32_t i = 0; i < m_numPipe; i++)
9867 {
9868 PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex];
9869
9870 if (cmdBuffer->pCmdBase)
9871 {
9872 m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
9873 }
9874
9875 cmdBuffer->pCmdBase = 0;
9876 cmdBuffer->iOffset = cmdBuffer->iRemaining = 0;
9877 }
9878 m_sizeOfVeBatchBuffer = 0;
9879
9880 if (eStatus == MOS_STATUS_SUCCESS)
9881 {
9882 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(&m_realCmdBuffer));
9883 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
9884 }
9885
9886 return eStatus;
9887 }
9888
SetSliceStructs()9889 MOS_STATUS CodechalEncHevcStateG12::SetSliceStructs()
9890 {
9891 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9892 eStatus = CodechalEncodeHevcBase::SetSliceStructs();
9893 m_numPassesInOnePipe = m_numPasses;
9894 m_numPasses = (m_numPasses + 1) * m_numPipe - 1;
9895 return eStatus;
9896 }
9897
AllocateTileStatistics()9898 MOS_STATUS CodechalEncHevcStateG12::AllocateTileStatistics()
9899 {
9900 CODECHAL_ENCODE_FUNCTION_ENTER;
9901
9902 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9903
9904 if (!m_hevcPicParams->tiles_enabled_flag)
9905 {
9906 return eStatus;
9907 }
9908
9909 auto num_tile_rows = m_hevcPicParams->num_tile_rows_minus1 + 1;
9910 auto num_tile_columns = m_hevcPicParams->num_tile_columns_minus1 + 1;
9911 auto num_tiles = num_tile_rows * num_tile_columns;
9912
9913 MOS_ZeroMemory(&m_hevcFrameStatsOffset, sizeof(HEVC_TILE_STATS_INFO));
9914 MOS_ZeroMemory(&m_hevcTileStatsOffset, sizeof(HEVC_TILE_STATS_INFO));
9915 MOS_ZeroMemory(&m_hevcStatsSize, sizeof(HEVC_TILE_STATS_INFO));
9916
9917 MOS_LOCK_PARAMS lockFlagsWriteOnly;
9918 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
9919 lockFlagsWriteOnly.WriteOnly = true;
9920
9921 // Set the maximum size based on frame level statistics.
9922 m_hevcStatsSize.uiTileSizeRecord = CODECHAL_CACHELINE_SIZE;
9923 m_hevcStatsSize.uiHevcPakStatistics = m_sizeOfHcpPakFrameStats;
9924 m_hevcStatsSize.uiVdencStatistics = 0;
9925 m_hevcStatsSize.uiHevcSliceStreamout = CODECHAL_CACHELINE_SIZE;
9926
9927 // Maintain the offsets to use for patching addresses in to the HuC Pak Integration kernel Aggregated Frame Statistics Output Buffer
9928 // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions
9929 m_hevcFrameStatsOffset.uiTileSizeRecord = 0; // Tile Size Record is not present in resHuCPakAggregatedFrameStatsBuffer
9930 m_hevcFrameStatsOffset.uiHevcPakStatistics = 0;
9931 m_hevcFrameStatsOffset.uiVdencStatistics = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcPakStatistics + m_hevcStatsSize.uiHevcPakStatistics, CODECHAL_PAGE_SIZE);
9932 m_hevcFrameStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiVdencStatistics + m_hevcStatsSize.uiVdencStatistics, CODECHAL_PAGE_SIZE);
9933
9934 // Frame level statistics
9935 m_hwInterface->m_pakIntAggregatedFrameStatsSize = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcSliceStreamout + (m_hevcStatsSize.uiHevcSliceStreamout * CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6), CODECHAL_PAGE_SIZE);
9936
9937 // HEVC Frame Statistics Buffer - Output from HuC PAK Integration kernel
9938 if (Mos_ResourceIsNull(&m_resHuCPakAggregatedFrameStatsBuffer.sResource))
9939 {
9940 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
9941 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
9942 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
9943 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
9944 allocParamsForBufferLinear.Format = Format_Buffer;
9945 allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntAggregatedFrameStatsSize;
9946 allocParamsForBufferLinear.pBufName = "GEN11 HCP Aggregated Frame Statistics Streamout Buffer";
9947
9948 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
9949 m_osInterface,
9950 &allocParamsForBufferLinear,
9951 &m_resHuCPakAggregatedFrameStatsBuffer.sResource));
9952 m_resHuCPakAggregatedFrameStatsBuffer.dwSize = m_hwInterface->m_pakIntAggregatedFrameStatsSize;
9953
9954 uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource(
9955 m_osInterface,
9956 &m_resHuCPakAggregatedFrameStatsBuffer.sResource,
9957 &lockFlagsWriteOnly);
9958
9959 CODECHAL_ENCODE_CHK_NULL_RETURN(pData);
9960 MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes);
9961 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource);
9962 }
9963
9964 // Maintain the offsets to use for patching addresses in to the Tile Based Statistics Buffer
9965 // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions
9966 m_hevcTileStatsOffset.uiTileSizeRecord = 0; // TileReord is in a separated resource
9967 m_hevcTileStatsOffset.uiHevcPakStatistics = 0; // PakStaticstics is head of m_resTileBasedStatisticsBuffer
9968 m_hevcTileStatsOffset.uiVdencStatistics = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcPakStatistics + (m_hevcStatsSize.uiHevcPakStatistics * num_tiles), CODECHAL_PAGE_SIZE);
9969 m_hevcTileStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiVdencStatistics + (m_hevcStatsSize.uiVdencStatistics * num_tiles), CODECHAL_PAGE_SIZE);
9970 // Combined statistics size for all tiles
9971 m_hwInterface->m_pakIntTileStatsSize = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcSliceStreamout + m_hevcStatsSize.uiHevcSliceStreamout * CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6, CODECHAL_PAGE_SIZE);
9972
9973 // Tile size record size for all tiles
9974 m_hwInterface->m_tileRecordSize = m_hevcStatsSize.uiTileSizeRecord * num_tiles;
9975
9976 if (Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource) || m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_pakIntTileStatsSize)
9977 {
9978 if (!Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource))
9979 {
9980 m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource);
9981 }
9982 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
9983 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
9984 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
9985 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
9986 allocParamsForBufferLinear.Format = Format_Buffer;
9987 allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntTileStatsSize;
9988 allocParamsForBufferLinear.pBufName = "GEN11 HCP Tile Level Statistics Streamout Buffer";
9989
9990 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
9991 m_osInterface,
9992 &allocParamsForBufferLinear,
9993 &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource));
9994 m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize = m_hwInterface->m_pakIntTileStatsSize;
9995
9996 uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource(
9997 m_osInterface,
9998 &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
9999 &lockFlagsWriteOnly);
10000 CODECHAL_ENCODE_CHK_NULL_RETURN(pData);
10001
10002 MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes);
10003 m_osInterface->pfnUnlockResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource);
10004 }
10005
10006 if (Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource) || m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_tileRecordSize)
10007 {
10008 if (!Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource))
10009 {
10010 m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource);
10011 }
10012 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
10013 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
10014 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
10015 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
10016 allocParamsForBufferLinear.Format = Format_Buffer;
10017 allocParamsForBufferLinear.dwBytes = m_hwInterface->m_tileRecordSize;
10018 allocParamsForBufferLinear.pBufName = "Tile Record Buffer";
10019
10020 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
10021 m_osInterface,
10022 &allocParamsForBufferLinear,
10023 &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource));
10024 m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize = m_hwInterface->m_tileRecordSize;
10025
10026 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
10027 m_osInterface,
10028 &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource,
10029 &lockFlagsWriteOnly);
10030 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
10031
10032 MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
10033 m_osInterface->pfnUnlockResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource);
10034 }
10035
10036 return eStatus;
10037 }
10038
SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams)10039 void CodechalEncHevcStateG12::SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS &pipeBufAddrParams)
10040 {
10041 CODECHAL_ENCODE_FUNCTION_ENTER;
10042
10043 CodechalEncodeHevcBase::SetHcpPipeBufAddrParams(pipeBufAddrParams);
10044
10045 // SAO Row Store is GEN12 specific
10046 pipeBufAddrParams.presSaoRowStoreBuffer = &m_SAORowStoreBuffer;
10047
10048 PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex];
10049 if (!Mos_ResourceIsNull(&tileStatisticsBuffer->sResource) && (m_numPipe > 1))
10050 {
10051 pipeBufAddrParams.presLcuBaseAddressBuffer = &tileStatisticsBuffer->sResource;
10052 pipeBufAddrParams.dwLcuStreamOutOffset = m_hevcTileStatsOffset.uiHevcSliceStreamout;
10053 pipeBufAddrParams.presFrameStatStreamOutBuffer = &tileStatisticsBuffer->sResource;
10054 pipeBufAddrParams.dwFrameStatStreamOutOffset = m_hevcTileStatsOffset.uiHevcPakStatistics;
10055 }
10056 }
10057
ReadSseStatistics(PMOS_COMMAND_BUFFER cmdBuffer)10058 MOS_STATUS CodechalEncHevcStateG12::ReadSseStatistics(PMOS_COMMAND_BUFFER cmdBuffer)
10059 {
10060 CODECHAL_ENCODE_FUNCTION_ENTER;
10061
10062 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10063
10064 if (!m_sseEnabled)
10065 {
10066 return eStatus;
10067 }
10068
10069 // encodeStatus is offset by 2 DWs in the resource
10070 uint32_t sseOffsetinBytes = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2 + m_encodeStatusBuf.dwSumSquareErrorOffset;
10071 for (auto i = 0; i < 6; i++) // 64 bit SSE values for luma/ chroma channels need to be copied
10072 {
10073 MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
10074 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
10075 miCpyMemMemParams.presSrc = m_hevcPicParams->tiles_enabled_flag && (m_numPipe > 1) ? &m_resHuCPakAggregatedFrameStatsBuffer.sResource : &m_resFrameStatStreamOutBuffer;
10076 miCpyMemMemParams.dwSrcOffset = (HEVC_PAK_STATISTICS_SSE_OFFSET + i) * sizeof(uint32_t); // SSE luma offset is located at DW32 in Frame statistics, followed by chroma
10077 miCpyMemMemParams.presDst = &m_encodeStatusBuf.resStatusBuffer;
10078 miCpyMemMemParams.dwDstOffset = sseOffsetinBytes + i * sizeof(uint32_t);
10079 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
10080 }
10081 return eStatus;
10082 }
10083
SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS & indObjBaseAddrParams)10084 void CodechalEncHevcStateG12::SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS &indObjBaseAddrParams)
10085 {
10086 PCODECHAL_ENCODE_BUFFER tileRecordBuffer = &m_tileRecordBuffer[m_virtualEngineBbIndex];
10087 bool useTileRecordBuffer = !Mos_ResourceIsNull(&tileRecordBuffer->sResource);
10088
10089 MOS_ZeroMemory(&indObjBaseAddrParams, sizeof(indObjBaseAddrParams));
10090 indObjBaseAddrParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
10091 indObjBaseAddrParams.presMvObjectBuffer = IsPanicModePass() ? &m_skipFrameInfo.m_resMbCodeSkipFrameSurface : &m_resMbCodeSurface;
10092 indObjBaseAddrParams.dwMvObjectOffset = m_mvOffset;
10093 indObjBaseAddrParams.dwMvObjectSize = m_mbCodeSize - m_mvOffset;
10094 indObjBaseAddrParams.presPakBaseObjectBuffer = &m_resBitstreamBuffer;
10095 indObjBaseAddrParams.dwPakBaseObjectSize = m_bitstreamUpperBound;
10096 indObjBaseAddrParams.presPakTileSizeStasBuffer = useTileRecordBuffer ? &tileRecordBuffer->sResource : nullptr;
10097 indObjBaseAddrParams.dwPakTileSizeStasBufferSize = useTileRecordBuffer ? m_hwInterface->m_tileRecordSize : 0;
10098 indObjBaseAddrParams.dwPakTileSizeRecordOffset = useTileRecordBuffer ? m_hevcTileStatsOffset.uiTileSizeRecord : 0;
10099 }
10100
UpdateCmdBufAttribute(PMOS_COMMAND_BUFFER cmdBuffer,bool renderEngineInUse)10101 MOS_STATUS CodechalEncHevcStateG12::UpdateCmdBufAttribute(
10102 PMOS_COMMAND_BUFFER cmdBuffer,
10103 bool renderEngineInUse)
10104 {
10105 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10106
10107 // should not be there. Will remove it in the next change
10108 CODECHAL_ENCODE_FUNCTION_ENTER;
10109 if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe)
10110 {
10111 PMOS_CMD_BUF_ATTRI_VE attriExt =
10112 (PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe);
10113
10114 memset(attriExt, 0, sizeof(MOS_CMD_BUF_ATTRI_VE));
10115 attriExt->bUseVirtualEngineHint =
10116 attriExt->VEngineHintParams.NeedSyncWithPrevious = !renderEngineInUse;
10117 }
10118
10119 return eStatus;
10120 }
10121
SetAndPopulateVEHintParams(PMOS_COMMAND_BUFFER cmdBuffer)10122 MOS_STATUS CodechalEncHevcStateG12::SetAndPopulateVEHintParams(
10123 PMOS_COMMAND_BUFFER cmdBuffer)
10124 {
10125 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10126
10127 CODECHAL_ENCODE_FUNCTION_ENTER;
10128
10129 if (!MOS_VE_SUPPORTED(m_osInterface))
10130 {
10131 return eStatus;
10132 }
10133
10134 CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS scalSetParms;
10135 MOS_ZeroMemory(&scalSetParms, sizeof(CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS));
10136
10137 if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
10138 {
10139 scalSetParms.bNeedSyncWithPrevious = true;
10140 }
10141
10142 if (m_numPipe >= 2)
10143 {
10144 int32_t currentPass = GetCurrentPass();
10145 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
10146 {
10147 eStatus = MOS_STATUS_INVALID_PARAMETER;
10148 return eStatus;
10149 }
10150
10151 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
10152 for (auto i = 0; i < m_numPipe; i++)
10153 {
10154 scalSetParms.veBatchBuffer[i] = m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex].OsResource;
10155 }
10156 }
10157
10158 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_SetHintParams(this, m_scalabilityState, &scalSetParms));
10159 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
10160 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_PopulateHintParams(m_scalabilityState, cmdBuffer));
10161
10162 return eStatus;
10163 }
10164
AddMediaVfeCmd(PMOS_COMMAND_BUFFER cmdBuffer,SendKernelCmdsParams * params)10165 MOS_STATUS CodechalEncHevcStateG12::AddMediaVfeCmd(
10166 PMOS_COMMAND_BUFFER cmdBuffer,
10167 SendKernelCmdsParams *params)
10168 {
10169 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
10170
10171 MHW_VFE_PARAMS_G12 vfeParams = {};
10172 vfeParams.pKernelState = params->pKernelState;
10173 vfeParams.eVfeSliceDisable = MHW_VFE_SLICE_ALL;
10174 vfeParams.dwMaximumNumberofThreads = m_encodeVfeMaxThreads;
10175 vfeParams.bFusedEuDispatch = false; // legacy mode
10176
10177 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaVfeCmd(cmdBuffer, &vfeParams));
10178
10179 return MOS_STATUS_SUCCESS;
10180 }
10181
10182 #if USE_CODECHAL_DEBUG_TOOL
DumpFrameStatsBuffer(CodechalDebugInterface * debugInterface)10183 MOS_STATUS CodechalEncHevcStateG12::DumpFrameStatsBuffer(CodechalDebugInterface *debugInterface)
10184 {
10185 CODECHAL_ENCODE_CHK_NULL_RETURN(debugInterface);
10186
10187 PMOS_RESOURCE resBuffer = &m_resFrameStatStreamOutBuffer;
10188 uint32_t offset = 0;
10189 uint32_t num_tiles = 1;
10190 //In scalable mode, HEVC PAK Frame Statistics gets dumped out for each tile
10191 if (m_numPipe > 1)
10192 {
10193 resBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;
10194 offset = m_hevcTileStatsOffset.uiHevcPakStatistics;
10195 num_tiles = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1);
10196 }
10197 uint32_t size = MOS_ALIGN_CEIL(m_sizeOfHcpPakFrameStats * num_tiles, CODECHAL_CACHELINE_SIZE);
10198
10199 CODECHAL_ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
10200 resBuffer,
10201 CodechalDbgAttr::attrFrameState,
10202 "FrameStatus",
10203 size,
10204 offset,
10205 CODECHAL_NUM_MEDIA_STATES));
10206
10207 return MOS_STATUS_SUCCESS;
10208 }
10209
DumpPakOutput()10210 MOS_STATUS CodechalEncHevcStateG12::DumpPakOutput()
10211 {
10212 std::string currPassName = "PAK_PASS" + std::to_string((int)m_currPass);
10213
10214 CODECHAL_DEBUG_TOOL(
10215 int32_t currentPass = GetCurrentPass();
10216 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
10217 &m_resPakcuLevelStreamoutData.sResource,
10218 CodechalDbgAttr::attrCUStreamout,
10219 currPassName.data(),
10220 m_resPakcuLevelStreamoutData.dwSize,
10221 0,
10222 CODECHAL_NUM_MEDIA_STATES));
10223 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
10224 &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
10225 CodechalDbgAttr::attrTileBasedStats,
10226 currPassName.data(),
10227 m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize,
10228 0,
10229 CODECHAL_NUM_MEDIA_STATES));
10230 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
10231 &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite],
10232 CodechalDbgAttr::attrBrcPakStats,
10233 currPassName.data(),
10234 m_hevcBrcPakStatisticsSize,
10235 0,
10236 CODECHAL_NUM_MEDIA_STATES));
10237 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
10238 &m_HucStitchCmdBatchBuffer.OsResource,
10239 CodechalDbgAttr::attr2ndLvlBatchMfx,
10240 currPassName.data(),
10241 m_hwInterface->m_HucStitchCmdBatchBufferSize,
10242 0,
10243 CODECHAL_NUM_MEDIA_STATES));
10244 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
10245 &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass],
10246 CodechalDbgAttr::attrHuCStitchDataBuf,
10247 currPassName.data(),
10248 sizeof(HucCommandData),
10249 0,
10250 CODECHAL_NUM_MEDIA_STATES));
10251 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
10252 &m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass],
10253 sizeof(HucPakStitchDmemEncG12),
10254 currentPass,
10255 hucRegionDumpPakIntegrate));)
10256
10257 return MOS_STATUS_SUCCESS;
10258 }
10259 #endif
10260
EncodeMeKernel()10261 MOS_STATUS CodechalEncHevcStateG12::EncodeMeKernel()
10262 {
10263 CODECHAL_ENCODE_FUNCTION_ENTER;
10264
10265 if (m_hmeKernel && m_hmeKernel->Is4xMeEnabled())
10266 {
10267 CodechalKernelHme::CurbeParam curbeParam;
10268 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMeCurbeParams(curbeParam));
10269
10270 CodechalKernelHme::SurfaceParams surfaceParam;
10271 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMeSurfaceParams(surfaceParam));
10272
10273 m_hmeKernel->setnoMEKernelForPFrame(m_lowDelay);
10274
10275 if (m_hmeKernel->Is16xMeEnabled())
10276 {
10277 if (m_hmeKernel->Is32xMeEnabled())
10278 {
10279 surfaceParam.downScaledWidthInMb = m_downscaledWidthInMb32x;
10280 surfaceParam.downScaledHeightInMb = m_downscaledFrameFieldHeightInMb32x;
10281 surfaceParam.downScaledBottomFieldOffset = m_scaled32xBottomFieldOffset;
10282 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel32x));
10283 }
10284 surfaceParam.downScaledWidthInMb = m_downscaledWidthInMb16x;
10285 surfaceParam.downScaledHeightInMb = m_downscaledFrameFieldHeightInMb16x;
10286 surfaceParam.downScaledBottomFieldOffset = m_scaled16xBottomFieldOffset;
10287 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel16x));
10288 }
10289 surfaceParam.downScaledWidthInMb = m_downscaledWidthInMb4x;
10290 surfaceParam.downScaledHeightInMb = m_downscaledFrameFieldHeightInMb4x;
10291 surfaceParam.downScaledBottomFieldOffset = m_scaledBottomFieldOffset;
10292 surfaceParam.meBrcDistortionSurface = m_brcBuffers.meBrcDistortionSurface;
10293
10294 curbeParam.sumMVThreshold = m_sumMVThreshold;
10295
10296 m_lastTaskInPhase = true;
10297
10298 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel4x));
10299 }
10300
10301 return MOS_STATUS_SUCCESS;
10302 }
10303
ResizeBufferOffset()10304 void CodechalEncHevcStateG12::ResizeBufferOffset()
10305 {
10306 CODECHAL_ENCODE_FUNCTION_ENTER;
10307
10308 uint32_t size = 0;
10309 uint32_t numLcu64 = m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 64 / 64;
10310 MBENC_COMBINED_BUFFER2 fixedBuf;
10311
10312 //Re-Calculate m_encBCombinedBuffer2 Size and Offsets
10313 m_historyOutBufferSize = MOS_ALIGN_CEIL(32 * numLcu64, CODECHAL_CACHELINE_SIZE);
10314 m_threadTaskBufferSize = MOS_ALIGN_CEIL(96 * numLcu64, CODECHAL_CACHELINE_SIZE);
10315
10316 size = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE) + m_historyOutBufferSize + m_threadTaskBufferSize;
10317
10318 m_historyOutBufferOffset = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE);
10319 m_threadTaskBufferOffset = m_historyOutBufferOffset + m_historyOutBufferSize;
10320 }
10321