1 /*
2 * Copyright (c) 2017-2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_encode_hevc_g11.cpp
24 //! \brief HEVC dual-pipe encoder for GEN11.
25 //!
26
27 #include "codechal_encode_hevc_g11.h"
28 #include "codechal_encode_csc_ds_g11.h"
29 #include "codechal_encode_wp_g11.h"
30 #include "codechal_kernel_header_g11.h"
31 #include "codechal_kernel_hme_g11.h"
32 #ifndef _FULL_OPEN_SOURCE
33 #include "igcodeckrn_g11.h"
34 #endif
35 #include "codeckrnheader.h"
36 #include "mhw_vdbox_hcp_g11_X.h"
37 #include "mhw_vdbox_g11_X.h"
38 #include "mos_util_user_interface.h"
39
40 //! \cond SKIP_DOXYGEN
41 #define CRECOST(lambda, mode, lcu, slice) (Map44LutValue((uint32_t)((lambda) * (m_modeBits[(lcu)][(mode)][(slice)]) * (m_modeBitsScale[(mode)][(slice)])), 0x8F))
42 #define RDEBITS62(mode, lcu, slice) (GetU62ModeBits((float)((m_modeBits[(lcu)][(mode)][(slice)]) * (m_modeBitsScale[(mode)][(slice)]))))
43 //! \endcond
44
AddHcpPipeModeSelectCmd(MOS_COMMAND_BUFFER * cmdBuffer)45 MOS_STATUS CodechalEncHevcStateG11::AddHcpPipeModeSelectCmd(MOS_COMMAND_BUFFER* cmdBuffer)
46 {
47 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
48
49 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11 pipeModeSelectParams;
50 SetHcpPipeModeSelectParams(pipeModeSelectParams);
51 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
52
53 return eStatus;
54 }
55
SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS & vdboxPipeModeSelectParams)56 void CodechalEncHevcStateG11::SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS& vdboxPipeModeSelectParams)
57 {
58 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11& pipeModeSelectParams =
59 static_cast<MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11&>(vdboxPipeModeSelectParams);
60 pipeModeSelectParams = {};
61 CodechalEncodeHevcBase::SetHcpPipeModeSelectParams(vdboxPipeModeSelectParams);
62
63 pipeModeSelectParams.pakPiplnStrmoutEnabled = m_pakPiplStrmOutEnable;
64 pipeModeSelectParams.pakFrmLvlStrmoutEnable = (m_brcEnabled && m_numPipe > 1);
65
66 if (m_numPipe > 1)
67 {
68 // Running in the multiple VDBOX mode
69 if (IsFirstPipe())
70 {
71 pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT;
72 }
73 else if (IsLastPipe())
74 {
75 pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT;
76 }
77 else
78 {
79 pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE;
80 }
81 pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_CODEC_BE;
82 }
83 else
84 {
85 pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
86 pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY;
87 }
88 }
89
SetHcpPicStateParams(MHW_VDBOX_HEVC_PIC_STATE & picStateParams)90 void CodechalEncHevcStateG11::SetHcpPicStateParams(MHW_VDBOX_HEVC_PIC_STATE& picStateParams)
91 {
92 CODECHAL_ENCODE_FUNCTION_ENTER;
93
94 CodechalEncodeHevcBase::SetHcpPicStateParams(picStateParams);
95 picStateParams.sseEnabledInVmeEncode = m_sseEnabled;
96
97 }
98
UpdateYUY2SurfaceInfo(MOS_SURFACE & surface,bool is10Bit)99 MOS_STATUS CodechalEncHevcStateG11::UpdateYUY2SurfaceInfo(
100 MOS_SURFACE& surface,
101 bool is10Bit)
102 {
103 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
104
105 CODECHAL_ENCODE_FUNCTION_ENTER;
106
107 if (surface.Format == Format_YUY2V)
108 {
109 // surface has been updated
110 return eStatus;
111 }
112
113 if (surface.Format != Format_YUY2 &&
114 surface.Format != Format_Y210 &&
115 surface.Format != Format_Y216)
116 {
117 eStatus = MOS_STATUS_INVALID_PARAMETER;
118 return eStatus;
119 }
120
121 if (surface.dwWidth < m_oriFrameWidth / 2 || surface.dwHeight < m_oriFrameHeight * 2)
122 {
123 eStatus = MOS_STATUS_INVALID_PARAMETER;
124 return eStatus;
125 }
126
127 surface.Format = is10Bit ? Format_Y216V : Format_YUY2V;
128 surface.dwWidth = m_oriFrameWidth;
129 surface.dwHeight = m_oriFrameHeight;
130
131 surface.YPlaneOffset.iSurfaceOffset = 0;
132 surface.YPlaneOffset.iXOffset = 0;
133 surface.YPlaneOffset.iYOffset = 0;
134
135 surface.UPlaneOffset.iSurfaceOffset = surface.dwHeight * surface.dwPitch;
136 surface.UPlaneOffset.iXOffset = 0;
137 surface.UPlaneOffset.iYOffset = surface.dwHeight;
138
139 surface.VPlaneOffset.iSurfaceOffset = surface.dwHeight * surface.dwPitch;
140 surface.VPlaneOffset.iXOffset = 0;
141 surface.VPlaneOffset.iYOffset = surface.dwHeight;
142
143 return eStatus;
144 }
145
InitializePicture(const EncoderParams & params)146 MOS_STATUS CodechalEncHevcStateG11::InitializePicture(const EncoderParams& params)
147 {
148 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
149
150 CODECHAL_ENCODE_FUNCTION_ENTER;
151
152 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::InitializePicture(params));
153
154 if (m_resolutionChanged)
155 {
156 ResizeBufferOffset();
157 }
158
159 m_sseEnabled = false;
160 // only 420 format support SSE output
161 // see TDR in scalability case, disable SSE for now before HW confirm the capability.
162 if (m_sseSupported &&
163 m_hevcSeqParams->chroma_format_idc == HCP_CHROMA_FORMAT_YUV420 &&
164 m_numPipe == 1)
165 {
166 m_sseEnabled = true;
167 }
168 // for HEVC VME, HUC based WP is not supported.
169 m_hevcPicParams->bEnableGPUWeightedPrediction = false;
170
171 m_pakPiplStrmOutEnable = m_sseEnabled || (m_brcEnabled && m_numPipe > 1);
172
173 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData(m_tileParams, params.dwBitstreamSize));
174 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileStatistics());
175 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResourcesVariableSize());
176
177 return eStatus;
178 }
179
SetPictureStructs()180 MOS_STATUS CodechalEncHevcStateG11::SetPictureStructs()
181 {
182 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
183
184 CODECHAL_ENCODE_FUNCTION_ENTER;
185
186 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::SetPictureStructs());
187
188 m_virtualEngineBbIndex = m_currOriginalPic.FrameIdx;
189
190 if ((uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_chromaFormat &&
191 (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat)
192 {
193 uint8_t currRefIdx = m_hevcPicParams->CurrReconstructedPic.FrameIdx;
194 UpdateYUY2SurfaceInfo(m_refList[currRefIdx]->sRefBuffer, m_is10BitHevc);
195
196 if(m_pictureCodingType != I_TYPE)
197 {
198 for (uint32_t i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
199 {
200 if (!m_picIdx[i].bValid || !m_currUsedRefPic[i])
201 {
202 continue;
203 }
204 uint8_t picIdx = m_picIdx[i].ucPicIdx;
205 CODECHAL_ENCODE_ASSERT(picIdx < 127);
206
207 UpdateYUY2SurfaceInfo((m_refList[picIdx]->sRefBuffer), m_is10BitHevc);
208 }
209 }
210 }
211
212 return eStatus;
213 }
214
SetKernelParams(EncOperation encOperation,MHW_KERNEL_PARAM * kernelParams,uint32_t idx)215 MOS_STATUS CodechalEncHevcStateG11::SetKernelParams(
216 EncOperation encOperation,
217 MHW_KERNEL_PARAM* kernelParams,
218 uint32_t idx)
219 {
220 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
221
222 kernelParams->iThreadCount = m_hwInterface->GetRenderInterface()->GetHwCaps()->dwMaxThreads;
223 kernelParams->iIdCount = 1;
224
225 uint32_t curbeAlignment = m_hwInterface->GetRenderInterface()->m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment();
226 switch (encOperation)
227 {
228 case ENC_MBENC:
229 {
230 switch (idx)
231 {
232 case MBENC_LCU32_KRNIDX:
233 kernelParams->iBTCount = MBENC_B_FRAME_END - MBENC_B_FRAME_BEGIN;
234 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MBENC_LCU32_BTI), (size_t)curbeAlignment);
235 kernelParams->iBlockWidth = CODECHAL_HEVC_MAX_LCU_SIZE_G9;
236 kernelParams->iBlockHeight = CODECHAL_HEVC_MAX_LCU_SIZE_G9;
237 break;
238
239 case MBENC_LCU64_KRNIDX:
240 kernelParams->iBTCount = MBENC_B_FRAME_END - MBENC_B_FRAME_BEGIN;
241 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MBENC_LCU64_BTI), (size_t)curbeAlignment);
242 kernelParams->iBlockWidth = CODECHAL_HEVC_MAX_LCU_SIZE_G10;
243 kernelParams->iBlockHeight = CODECHAL_HEVC_MAX_LCU_SIZE_G10;
244 break;
245
246 default:
247 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported MBENC mode requested");
248 return MOS_STATUS_INVALID_PARAMETER;
249 }
250 }
251 break;
252
253 case ENC_BRC:
254 {
255 switch (idx)
256 {
257 case CODECHAL_HEVC_BRC_INIT:
258 case CODECHAL_HEVC_BRC_RESET:
259 kernelParams->iBTCount = BRC_INIT_RESET_END - BRC_INIT_RESET_BEGIN;
260 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(BRC_INITRESET_CURBE), (size_t)curbeAlignment);
261 kernelParams->iBlockWidth = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
262 kernelParams->iBlockHeight = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
263 break;
264
265 case CODECHAL_HEVC_BRC_FRAME_UPDATE:
266 kernelParams->iBTCount = BRC_UPDATE_END - BRC_UPDATE_BEGIN;
267 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(BRCUPDATE_CURBE), (size_t)curbeAlignment);
268 kernelParams->iBlockWidth = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
269 kernelParams->iBlockHeight = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
270 break;
271
272 case CODECHAL_HEVC_BRC_LCU_UPDATE:
273 kernelParams->iBTCount = BRC_LCU_UPDATE_END - BRC_LCU_UPDATE_BEGIN;
274 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(BRCUPDATE_CURBE), (size_t)curbeAlignment);
275 kernelParams->iBlockWidth = CODECHAL_HEVC_LCU_BRC_BLOCK_SIZE;
276 kernelParams->iBlockHeight = CODECHAL_HEVC_LCU_BRC_BLOCK_SIZE;
277 break;
278
279 default:
280 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested");
281 return MOS_STATUS_INVALID_PARAMETER;
282 }
283 }
284 break;
285
286 default:
287 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
288 return MOS_STATUS_INVALID_PARAMETER;
289 }
290
291 return eStatus;
292 }
293
SetBindingTable(EncOperation encOperation,PCODECHAL_ENCODE_BINDING_TABLE_GENERIC hevcEncBindingTable,uint32_t idx)294 MOS_STATUS CodechalEncHevcStateG11::SetBindingTable(
295 EncOperation encOperation,
296 PCODECHAL_ENCODE_BINDING_TABLE_GENERIC hevcEncBindingTable,
297 uint32_t idx)
298 {
299 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
300
301 CODECHAL_ENCODE_CHK_NULL_RETURN(hevcEncBindingTable);
302
303 MOS_ZeroMemory(hevcEncBindingTable, sizeof(*hevcEncBindingTable));
304
305 switch (encOperation)
306 {
307 case ENC_MBENC:
308 {
309 switch (idx)
310 {
311 case MBENC_LCU32_KRNIDX:
312 case MBENC_LCU64_KRNIDX:
313 hevcEncBindingTable->dwNumBindingTableEntries = MBENC_B_FRAME_END - MBENC_B_FRAME_BEGIN;
314 hevcEncBindingTable->dwBindingTableStartOffset = MBENC_B_FRAME_BEGIN;
315 break;
316
317 default:
318 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported MBENC mode requested");
319 return MOS_STATUS_INVALID_PARAMETER;
320 }
321 }
322 break;
323
324 case ENC_BRC:
325 {
326 switch (idx)
327 {
328 case CODECHAL_HEVC_BRC_INIT:
329 hevcEncBindingTable->dwNumBindingTableEntries = BRC_INIT_RESET_END - BRC_INIT_RESET_BEGIN;
330 hevcEncBindingTable->dwBindingTableStartOffset = BRC_INIT_RESET_BEGIN;
331 break;
332
333 case CODECHAL_HEVC_BRC_RESET:
334 hevcEncBindingTable->dwNumBindingTableEntries = BRC_INIT_RESET_END - BRC_INIT_RESET_BEGIN;
335 hevcEncBindingTable->dwBindingTableStartOffset = BRC_INIT_RESET_BEGIN;
336 break;
337
338 case CODECHAL_HEVC_BRC_FRAME_UPDATE:
339 hevcEncBindingTable->dwNumBindingTableEntries = BRC_UPDATE_END - BRC_UPDATE_BEGIN;
340 hevcEncBindingTable->dwBindingTableStartOffset = BRC_UPDATE_BEGIN;
341 break;
342
343 case CODECHAL_HEVC_BRC_LCU_UPDATE:
344 hevcEncBindingTable->dwNumBindingTableEntries = BRC_LCU_UPDATE_END - BRC_LCU_UPDATE_BEGIN;
345 hevcEncBindingTable->dwBindingTableStartOffset = BRC_LCU_UPDATE_BEGIN;
346 break;
347
348 default:
349 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested");
350 return MOS_STATUS_INVALID_PARAMETER;
351 }
352 }
353 break;
354
355 default:
356 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
357 return MOS_STATUS_INVALID_PARAMETER;
358 }
359
360 for (uint32_t i = 0; i < hevcEncBindingTable->dwNumBindingTableEntries; i++)
361 {
362 hevcEncBindingTable->dwBindingTableEntries[i] = i;
363 }
364
365 return eStatus;
366 }
367
AllocateEncResources()368 MOS_STATUS CodechalEncHevcStateG11::AllocateEncResources()
369 {
370 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
371
372 CODECHAL_ENCODE_FUNCTION_ENTER;
373
374 // Surfaces used by I & B Kernels
375 uint32_t width = 0, height = 0;
376 uint32_t size = 0;
377
378 // Intermediate CU Record surface
379 if (Mos_ResourceIsNull(&m_intermediateCuRecordSurfaceLcu32.OsResource))
380 {
381 width = m_widthAlignedLcu32;
382 height = m_heightAlignedLcu32 >> 1;
383
384 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
385 &m_intermediateCuRecordSurfaceLcu32,
386 width,
387 height,
388 "Intermediate CU record surface",
389 MOS_TILE_Y));
390 }
391
392 // Scratch surface for I-kernel
393 if (Mos_ResourceIsNull(&m_scratchSurface.OsResource))
394 {
395 width = m_widthAlignedLcu32 >> 3;
396 height = m_heightAlignedLcu32 >> 5;
397
398 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
399 &m_scratchSurface,
400 width,
401 height,
402 "Scratch surface for I and B Kernels"));
403 }
404
405 // LCU Level Input Data
406 for(uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_lcuLevelInputDataSurface); i++)
407 {
408 if (Mos_ResourceIsNull(&m_lcuLevelInputDataSurface[i].OsResource))
409 {
410 width = 16 * ((m_widthAlignedMaxLcu >> 6) << 1);
411 height = ((m_heightAlignedMaxLcu >> 6) << 1);
412
413 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
414 &m_lcuLevelInputDataSurface[i],
415 width,
416 height,
417 "Lcu Level Data Input surface"));
418 }
419 }
420
421 m_brcInputForEncKernelBuffer = nullptr;
422
423 //Current Picture Y with Reconstructed boundary pixels
424 if (Mos_ResourceIsNull(&m_currPicWithReconBoundaryPix.OsResource))
425 {
426 width = m_widthAlignedLcu32;
427 height = m_heightAlignedLcu32;
428
429 if (m_isMaxLcu64)
430 {
431 width = m_widthAlignedMaxLcu;
432 height = m_heightAlignedMaxLcu;
433 }
434
435 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurface(
436 &m_currPicWithReconBoundaryPix,
437 width,
438 height,
439 "Current Picture Y with Reconstructed Boundary Pixels surface"));
440 }
441
442 //Debug surface
443 for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_debugSurface); i++)
444 {
445 if (Mos_ResourceIsNull(&m_debugSurface[i].sResource))
446 {
447 size = m_debugSurfaceSize;
448
449 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
450 &m_debugSurface[i],
451 size,
452 "Kernel debug surface"));
453 }
454 }
455
456 // Surfaces used by B Kernels
457 // Enc constant table for B LCU32
458 if (Mos_ResourceIsNull(&m_encConstantTableForB.sResource))
459 {
460 size = m_encConstantDataLutSize;
461
462 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
463 &m_encConstantTableForB,
464 size,
465 "Enc Constant Table surface For LCU32/LCU64"));
466 }
467
468 if (m_hmeSupported)
469 {
470 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->AllocateResources());
471
472 // BRC Distortion surface
473 if (Mos_ResourceIsNull(&m_brcBuffers.sMeBrcDistortionBuffer.OsResource))
474 {
475 width = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x << 3), 64);
476 height = MOS_ALIGN_CEIL((m_downscaledHeightInMb4x << 2), 8) << 1;
477
478 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
479 &m_brcBuffers.sMeBrcDistortionBuffer,
480 width,
481 height,
482 "Brc Distortion surface Buffer"));
483 }
484 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateMeResources());
485 }
486
487 for(uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer1); i++)
488 {
489 if (Mos_ResourceIsNull(&m_encBCombinedBuffer1[i].sResource))
490 {
491 size = sizeof(MBENC_COMBINED_BUFFER1);
492
493 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
494 &m_encBCombinedBuffer1[i],
495 size,
496 "Enc B combined buffer1"));
497 // no intialization needed here
498 // driver will write the curbe into this surface in the SetCurbeMbEncKernel
499
500 }
501 }
502
503 for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer2); i++)
504 {
505 if (Mos_ResourceIsNull(&m_encBCombinedBuffer2[i].sResource))
506 {
507 uint32_t numLcu64 = m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 64 / 64;
508 MBENC_COMBINED_BUFFER2 fixedBuf;
509
510 m_historyOutBufferSize = MOS_ALIGN_CEIL(32 * numLcu64, CODECHAL_CACHELINE_SIZE);
511 m_threadTaskBufferSize = MOS_ALIGN_CEIL(96 * numLcu64, CODECHAL_CACHELINE_SIZE);
512 m_encFrameLevelDistortionBufferSize = ENC_FRAME_LEVEL_DISTORTION_BUFFER;
513 m_encCtuLevelDistortionBufferSize = MOS_ALIGN_CEIL(16 * numLcu64, CODECHAL_CACHELINE_SIZE);
514 size = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE) + m_historyOutBufferSize + m_threadTaskBufferSize +
515 m_encFrameLevelDistortionBufferSize + m_encCtuLevelDistortionBufferSize;
516 m_historyOutBufferOffset = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE);
517 m_threadTaskBufferOffset = m_historyOutBufferOffset + m_historyOutBufferSize;
518
519 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
520 &m_encBCombinedBuffer2[i],
521 size,
522 "Enc B combined buffer2"));
523 // no intialization needed here
524 // DS kernel will initialize the multi-thread task buffer to 0 (part of m_encBCombinedBuffer2)
525 }
526 }
527
528 return eStatus;
529 }
530
FreeEncResources()531 MOS_STATUS CodechalEncHevcStateG11::FreeEncResources()
532 {
533 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
534
535 CODECHAL_ENCODE_FUNCTION_ENTER;
536
537 MOS_DeleteArray(m_mbEncKernelStates);
538 m_mbEncKernelStates = nullptr;
539 MOS_FreeMemory(m_mbEncKernelBindingTable);
540 m_mbEncKernelBindingTable = nullptr;
541
542 MOS_DeleteArray(m_brcKernelStates);
543 m_brcKernelStates = nullptr;
544 MOS_FreeMemory(m_brcKernelBindingTable);
545 m_brcKernelBindingTable = nullptr;
546
547 HmeParams hmeParams;
548 MOS_ZeroMemory(&hmeParams, sizeof(hmeParams));
549 hmeParams.presMvAndDistortionSumSurface = &m_mvAndDistortionSumSurface.sResource;
550 CODECHAL_ENCODE_CHK_STATUS_RETURN(DestroyMEResources(&hmeParams));
551
552 // Surfaces used by I kernel
553 // Release Intermediate CU Record surface
554 m_osInterface->pfnFreeResource(
555 m_osInterface,
556 &m_intermediateCuRecordSurfaceLcu32.OsResource);
557
558 // Release Scratch surface for I-kernel
559 m_osInterface->pfnFreeResource(
560 m_osInterface,
561 &m_scratchSurface.OsResource);
562
563 // Release LCU Level Input Data
564 for(uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_lcuLevelInputDataSurface); i++)
565 {
566 m_osInterface->pfnFreeResource(
567 m_osInterface,
568 &m_lcuLevelInputDataSurface[i].OsResource);
569 }
570
571 // Release Current Picture Y with Reconstructed boundary pixels surface
572 m_osInterface->pfnFreeResource(
573 m_osInterface,
574 &m_currPicWithReconBoundaryPix.OsResource);
575
576 // Release Debug surface
577 for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_debugSurface); i++)
578 {
579 m_osInterface->pfnFreeResource(
580 m_osInterface,
581 &m_debugSurface[i].sResource);
582 }
583
584 // Surfaces used by B Kernels
585 // Enc constant table for B LCU32
586 m_osInterface->pfnFreeResource(
587 m_osInterface,
588 &m_encConstantTableForB.sResource);
589
590 CODECHAL_ENCODE_CHK_STATUS_RETURN(FreeMeResources());
591
592 for(uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer1); i++)
593 {
594 m_osInterface->pfnFreeResource(
595 m_osInterface,
596 &m_encBCombinedBuffer1[i].sResource);
597 }
598
599 for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer2); i++)
600 {
601 m_osInterface->pfnFreeResource(
602 m_osInterface,
603 &m_encBCombinedBuffer2[i].sResource);
604 }
605
606 if (m_swScoreboard)
607 {
608 MOS_FreeMemory(m_swScoreboard);
609 m_swScoreboard = nullptr;
610 }
611
612 return eStatus;
613 }
614
AllocateMeResources()615 MOS_STATUS CodechalEncHevcStateG11::AllocateMeResources()
616 {
617 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
618
619 CODECHAL_ENCODE_FUNCTION_ENTER;
620
621 // Mv and Distortion Summation surface
622 if (Mos_ResourceIsNull(&m_mvAndDistortionSumSurface.sResource))
623 {
624 uint32_t size = m_mvdistSummationSurfSize;
625
626 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
627 &m_mvAndDistortionSumSurface,
628 size,
629 "Mv and Distortion Summation surface"));
630
631 // Initialize the surface to zero for now till HME is updated to output the data into this surface
632 MOS_LOCK_PARAMS lockFlags;
633 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
634 lockFlags.WriteOnly = 1;
635 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(
636 m_osInterface,
637 &m_mvAndDistortionSumSurface.sResource,
638 &lockFlags);
639 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
640
641 MOS_ZeroMemory(data, size);
642
643 m_osInterface->pfnUnlockResource(
644 m_osInterface,
645 &m_mvAndDistortionSumSurface.sResource);
646 }
647
648 return eStatus;
649 }
650
FreeMeResources()651 MOS_STATUS CodechalEncHevcStateG11::FreeMeResources()
652 {
653 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
654
655 CODECHAL_ENCODE_FUNCTION_ENTER;
656
657 m_osInterface->pfnFreeResource(
658 m_osInterface,
659 &m_brcBuffers.sMeBrcDistortionBuffer.OsResource);
660
661 return eStatus;
662 }
663
AllocatePakResources()664 MOS_STATUS CodechalEncHevcStateG11::AllocatePakResources()
665 {
666 CODECHAL_ENCODE_FUNCTION_ENTER;
667
668 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
669
670 uint32_t mvt_size = MOS_ALIGN_CEIL(((m_frameWidth + 63) >> 6)*((m_frameHeight + 15) >> 4), 2) * CODECHAL_CACHELINE_SIZE;
671 uint32_t mvtb_size = MOS_ALIGN_CEIL(((m_frameWidth + 31) >> 5)*((m_frameHeight + 31) >> 5), 2) * CODECHAL_CACHELINE_SIZE;
672 m_sizeOfMvTemporalBuffer = MOS_MAX(mvt_size, mvtb_size);
673
674 const uint32_t picWidthInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_LCU_SIZE); //assume smallest LCU to get max width
675 const uint32_t picHeightInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameHeight, CODECHAL_HEVC_MIN_LCU_SIZE); //assume smallest LCU to get max height
676
677 MHW_VDBOX_HCP_BUFFER_SIZE_PARAMS hcpBufSizeParam;
678 MOS_ZeroMemory(&hcpBufSizeParam, sizeof(hcpBufSizeParam));
679 hcpBufSizeParam.ucMaxBitDepth = m_bitDepth;
680 hcpBufSizeParam.ucChromaFormat = m_chromaFormat;
681 // We should move the buffer allocation to picture level if the size is dependent on LCU size
682 hcpBufSizeParam.dwCtbLog2SizeY = 6; //assume Max LCU size
683 hcpBufSizeParam.dwPicWidth = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE);
684 hcpBufSizeParam.dwPicHeight = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE);
685
686 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
687 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
688 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
689 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
690 allocParamsForBufferLinear.Format = Format_Buffer;
691
692 // Deblocking Filter Row Store Scratch data surface
693 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
694 MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_LINE,
695 &hcpBufSizeParam);
696
697 if (eStatus != MOS_STATUS_SUCCESS)
698 {
699 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Row Store Scratch Buffer.");
700 return eStatus;
701 }
702
703 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
704 allocParamsForBufferLinear.pBufName = "DeblockingScratchBuffer";
705
706 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
707 m_osInterface,
708 &allocParamsForBufferLinear,
709 &m_resDeblockingFilterRowStoreScratchBuffer);
710
711 if (eStatus != MOS_STATUS_SUCCESS)
712 {
713 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Row Store Scratch Buffer.");
714 return eStatus;
715 }
716
717 // Deblocking Filter Tile Row Store Scratch data surface
718 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
719 MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_LINE,
720 &hcpBufSizeParam);
721
722 if (eStatus != MOS_STATUS_SUCCESS)
723 {
724 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Row Store Scratch Buffer.");
725 return eStatus;
726 }
727
728 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
729 allocParamsForBufferLinear.pBufName = "DeblockingTileRowScratchBuffer";
730
731 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
732 m_osInterface,
733 &allocParamsForBufferLinear,
734 &m_resDeblockingFilterTileRowStoreScratchBuffer);
735
736 if (eStatus != MOS_STATUS_SUCCESS)
737 {
738 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Row Store Scratch Buffer.");
739 return eStatus;
740 }
741
742 // Deblocking Filter Column Row Store Scratch data surface
743 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
744 MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_COL,
745 &hcpBufSizeParam);
746
747 if (eStatus != MOS_STATUS_SUCCESS)
748 {
749 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Column Store Scratch Buffer.");
750 return eStatus;
751 }
752
753 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
754 allocParamsForBufferLinear.pBufName = "DeblockingColumnScratchBuffer";
755
756 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
757 m_osInterface,
758 &allocParamsForBufferLinear,
759 &m_resDeblockingFilterColumnRowStoreScratchBuffer);
760
761 if (eStatus != MOS_STATUS_SUCCESS)
762 {
763 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Column Row Store Scratch Buffer.");
764 return eStatus;
765 }
766
767 // Metadata Line buffer
768 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
769 MHW_VDBOX_HCP_INTERNAL_BUFFER_META_LINE,
770 &hcpBufSizeParam);
771
772 if (eStatus != MOS_STATUS_SUCCESS)
773 {
774 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Line Buffer.");
775 return eStatus;
776 }
777
778 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
779 allocParamsForBufferLinear.pBufName = "MetadataLineBuffer";
780
781 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
782 m_osInterface,
783 &allocParamsForBufferLinear,
784 &m_resMetadataLineBuffer);
785
786 if (eStatus != MOS_STATUS_SUCCESS)
787 {
788 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Line Buffer.");
789 return eStatus;
790 }
791
792 // Metadata Tile Line buffer
793 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
794 MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_LINE,
795 &hcpBufSizeParam);
796
797 if (eStatus != MOS_STATUS_SUCCESS)
798 {
799 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Line Buffer.");
800 return eStatus;
801 }
802
803 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
804 allocParamsForBufferLinear.pBufName = "MetadataTileLineBuffer";
805
806 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
807 m_osInterface,
808 &allocParamsForBufferLinear,
809 &m_resMetadataTileLineBuffer);
810
811 if (eStatus != MOS_STATUS_SUCCESS)
812 {
813 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Line Buffer.");
814 return eStatus;
815 }
816
817 // Metadata Tile Column buffer
818 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
819 MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_COL,
820 &hcpBufSizeParam);
821
822 if (eStatus != MOS_STATUS_SUCCESS)
823 {
824 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Column Buffer.");
825 return eStatus;
826 }
827
828 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
829 allocParamsForBufferLinear.pBufName = "MetadataTileColumnBuffer";
830
831 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
832 m_osInterface,
833 &allocParamsForBufferLinear,
834 &m_resMetadataTileColumnBuffer);
835
836 if (eStatus != MOS_STATUS_SUCCESS)
837 {
838 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Column Buffer.");
839 return eStatus;
840 }
841
842 // SAO Line buffer
843 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
844 MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_LINE,
845 &hcpBufSizeParam);
846
847 if (eStatus != MOS_STATUS_SUCCESS)
848 {
849 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Line Buffer.");
850 return eStatus;
851 }
852
853 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
854 allocParamsForBufferLinear.pBufName = "SaoLineBuffer";
855
856 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
857 m_osInterface,
858 &allocParamsForBufferLinear,
859 &m_resSaoLineBuffer);
860
861 if (eStatus != MOS_STATUS_SUCCESS)
862 {
863 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Line Buffer.");
864 return eStatus;
865 }
866
867 // SAO Tile Line buffer
868 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
869 MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_LINE,
870 &hcpBufSizeParam);
871
872 if (eStatus != MOS_STATUS_SUCCESS)
873 {
874 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Line Buffer.");
875 return eStatus;
876 }
877
878 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
879 allocParamsForBufferLinear.pBufName = "SaoTileLineBuffer";
880
881 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
882 m_osInterface,
883 &allocParamsForBufferLinear,
884 &m_resSaoTileLineBuffer);
885
886 if (eStatus != MOS_STATUS_SUCCESS)
887 {
888 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Line Buffer.");
889 return eStatus;
890 }
891
892 // SAO Tile Column buffer
893 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
894 MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_COL,
895 &hcpBufSizeParam);
896
897 if (eStatus != MOS_STATUS_SUCCESS)
898 {
899 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Column Buffer.");
900 return eStatus;
901 }
902
903 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
904 allocParamsForBufferLinear.pBufName = "SaoTileColumnBuffer";
905
906 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
907 m_osInterface,
908 &allocParamsForBufferLinear,
909 &m_resSaoTileColumnBuffer);
910
911 if (eStatus != MOS_STATUS_SUCCESS)
912 {
913 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Column Buffer.");
914 return eStatus;
915 }
916
917 // Lcu ILDB StreamOut buffer
918 // This is not enabled with HCP_PIPE_MODE_SELECT yet, placeholder here
919 allocParamsForBufferLinear.dwBytes = CODECHAL_CACHELINE_SIZE;
920 allocParamsForBufferLinear.pBufName = "LcuILDBStreamOutBuffer";
921
922 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
923 m_osInterface,
924 &allocParamsForBufferLinear,
925 &m_resLcuIldbStreamOutBuffer);
926
927 if (eStatus != MOS_STATUS_SUCCESS)
928 {
929 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU ILDB StreamOut Buffer.");
930 return eStatus;
931 }
932
933 // Lcu Base Address buffer
934 // HEVC Encoder Mode: Slice size is written to this buffer when slice size conformance is enabled.
935 // 1 CL (= 16 DWs = 64 bytes) per slice * Maximum number of slices in a frame.
936 // Align to page for HUC requirement
937 uint32_t maxLcu = picWidthInMinLCU * picHeightInMinLCU;
938 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(maxLcu * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
939 allocParamsForBufferLinear.pBufName = "LcuBaseAddressBuffer";
940
941 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
942 m_osInterface,
943 &allocParamsForBufferLinear,
944 &m_resLcuBaseAddressBuffer);
945
946 if (eStatus != MOS_STATUS_SUCCESS)
947 {
948 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU Base Address Buffer.");
949 return eStatus;
950 }
951 // SAO StreamOut buffer
952 // size = MOS_ALIGN_CEIL(picWidthInMinLCU, 4) * 16
953 uint32_t size = MOS_ALIGN_CEIL(picWidthInMinLCU, 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU;
954 //extra added size to cover tile enabled case, per tile width aligned to 4. 20: max tile column No.
955 size += 3 * 20 * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU;
956 allocParamsForBufferLinear.dwBytes = size;
957 allocParamsForBufferLinear.pBufName = "SaoStreamOutBuffer";
958
959 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
960 m_osInterface,
961 &allocParamsForBufferLinear,
962 &m_resSaoStreamOutBuffer);
963
964 if (eStatus != MOS_STATUS_SUCCESS)
965 {
966 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO StreamOut Buffer.");
967 return eStatus;
968 }
969
970 uint32_t maxTileNumber = (MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE) / CODECHAL_HEVC_MIN_TILE_SIZE) *
971 (MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_HEVC_MIN_TILE_SIZE) / CODECHAL_HEVC_MIN_TILE_SIZE);
972
973 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
974 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
975 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
976 allocParamsForBufferLinear.Format = Format_Buffer;
977
978 // Allocate Frame Statistics Streamout Data Destination Buffer. DW98-100 in HCP pipe buffer address command
979 allocParamsForBufferLinear.dwBytes = m_sizeOfHcpPakFrameStats * maxTileNumber; //Each tile has 8 cache size bytes of data
980 allocParamsForBufferLinear.pBufName = "FrameStatStreamOutBuffer";
981
982 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
983 m_osInterface,
984 &allocParamsForBufferLinear,
985 &m_resFrameStatStreamOutBuffer));
986
987 // PAK CU Level Streamout Data: DW57-59 in HCP pipe buffer address command
988 // One CU has 16-byte. But, each tile needs to be aliged to the cache line
989 uint32_t frameWidthInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MIN_CU_SIZE);
990 uint32_t frameHeightInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MIN_CU_SIZE);
991 size = MOS_ALIGN_CEIL(frameWidthInCus * frameHeightInCus * 16, CODECHAL_CACHELINE_SIZE);
992 allocParamsForBufferLinear.dwBytes = size;
993 allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
994
995 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
996 m_osInterface,
997 &allocParamsForBufferLinear,
998 &m_resPakcuLevelStreamoutData.sResource));
999 m_resPakcuLevelStreamoutData.dwSize = size;
1000 CODECHAL_ENCODE_VERBOSEMESSAGE("first allocate cu steam out buffer, size=0x%x.\n", size);
1001
1002 // Allocate SSE Source Pixel Row Store Buffer. Implementation for each tile column is shown as below:
1003 // tileWidthInLCU = ((tileWidthInLCU+3) * BYTES_PER_CACHE_LINE)*(4+4) ; tileWidthInLCU <<= 1; // double the size as RTL treats it as 10 bit data
1004 // Here, we consider each LCU column is one tile column.
1005 m_sizeOfSseSrcPixelRowStoreBufferPerLcu = (CODECHAL_CACHELINE_SIZE * (4 + 4)) << 1; //size per LCU plus 10-bit
1006 size = m_sizeOfSseSrcPixelRowStoreBufferPerLcu * (picWidthInMinLCU + 3); // already aligned to cacheline size
1007 allocParamsForBufferLinear.dwBytes = size;
1008 allocParamsForBufferLinear.pBufName = "SseSrcPixelRowStoreBuffer";
1009
1010 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
1011 m_osInterface,
1012 &allocParamsForBufferLinear,
1013 &m_resSseSrcPixelRowStoreBuffer));
1014
1015 //HCP scalability Sync buffer
1016 size = CODECHAL_HEVC_MAX_NUM_HCP_PIPE * CODECHAL_CACHELINE_SIZE;
1017 allocParamsForBufferLinear.dwBytes = size;
1018 allocParamsForBufferLinear.pBufName = "GEN11 Hcp scalability Sync buffer ";
1019
1020 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
1021 m_osInterface,
1022 &allocParamsForBufferLinear,
1023 &m_resHcpScalabilitySyncBuffer.sResource));
1024 m_resHcpScalabilitySyncBuffer.dwSize = size;
1025
1026 // create the tile coding state parameters
1027 m_tileParams = (PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11)MOS_AllocAndZeroMemory
1028 (sizeof(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G11)* maxTileNumber);
1029
1030 if(m_enableHWSemaphore)
1031 {
1032 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1033 allocParamsForBufferLinear.pBufName = "SemaphoreMemory";
1034
1035 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1036 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1037 lockFlagsWriteOnly.WriteOnly = 1;
1038
1039 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resBrcSemaphoreMem); i++)
1040 {
1041 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1042 m_osInterface,
1043 &allocParamsForBufferLinear,
1044 &m_resBrcSemaphoreMem[i].sResource);
1045 m_resBrcSemaphoreMem[i].dwSize = allocParamsForBufferLinear.dwBytes;
1046 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Cannot create BRC HW Semaphore Memory.");
1047
1048 uint32_t *data = (uint32_t *)m_osInterface->pfnLockResource(
1049 m_osInterface,
1050 &m_resBrcSemaphoreMem[i].sResource,
1051 &lockFlagsWriteOnly);
1052
1053 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1054
1055 *data = 1;
1056
1057 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1058 m_osInterface,
1059 &m_resBrcSemaphoreMem[i].sResource));
1060 }
1061
1062 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1063 m_osInterface,
1064 &allocParamsForBufferLinear,
1065 &m_resPipeStartSemaMem);
1066 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Cannot create Scalability pipe start sync HW semaphore.");
1067
1068 uint32_t *data = (uint32_t *)m_osInterface->pfnLockResource(
1069 m_osInterface,
1070 &m_resPipeStartSemaMem,
1071 &lockFlagsWriteOnly);
1072
1073 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1074 *data = 0;
1075 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1076 m_osInterface,
1077 &m_resPipeStartSemaMem));
1078
1079 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
1080 m_osInterface,
1081 &allocParamsForBufferLinear,
1082 &m_resPipeCompleteSemaMem);
1083 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Cannot create Scalability pipe completion sync HW semaphore.");
1084
1085 data = (uint32_t *)m_osInterface->pfnLockResource(
1086 m_osInterface,
1087 &m_resPipeCompleteSemaMem,
1088 &lockFlagsWriteOnly);
1089
1090 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1091 *data = 0;
1092 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1093 m_osInterface,
1094 &m_resPipeCompleteSemaMem));
1095
1096 }
1097
1098 if (m_hucPakStitchEnabled)
1099 {
1100 if (Mos_ResourceIsNull(&m_resHucStatus2Buffer))
1101 {
1102 // HUC STATUS 2 Buffer for HuC status check in COND_BB_END
1103 allocParamsForBufferLinear.dwBytes = sizeof(uint64_t);
1104 allocParamsForBufferLinear.pBufName = "HUC STATUS 2 Buffer";
1105
1106 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1107 m_osInterface->pfnAllocateResource(
1108 m_osInterface,
1109 &allocParamsForBufferLinear,
1110 &m_resHucStatus2Buffer),
1111 "%s: Failed to allocate HUC STATUS 2 Buffer\n", __FUNCTION__);
1112 }
1113
1114 uint8_t* data;
1115
1116 // Pak stitch DMEM
1117 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemEncG11), CODECHAL_CACHELINE_SIZE);
1118 allocParamsForBufferLinear.pBufName = "PAK Stitch Dmem Buffer";
1119 auto numOfPasses = CODECHAL_DP_MAX_NUM_BRC_PASSES;
1120 for (auto j = 0; j < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; j++)
1121 {
1122 for (auto i = 0; i < numOfPasses; i++)
1123 {
1124 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1125 m_osInterface->pfnAllocateResource(
1126 m_osInterface,
1127 &allocParamsForBufferLinear,
1128 &m_resHucPakStitchDmemBuffer[j][i]),
1129 "Failed to allocate PAK Stitch Dmem Buffer.");
1130
1131 }
1132 }
1133 // BRC Data Buffer
1134 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
1135 allocParamsForBufferLinear.pBufName = "BRC Data Buffer";
1136
1137 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1138 m_osInterface->pfnAllocateResource(
1139 m_osInterface,
1140 &allocParamsForBufferLinear,
1141 &m_resBrcDataBuffer),
1142 "Failed to allocate BRC Data Buffer Buffer.");
1143
1144 MOS_LOCK_PARAMS lockFlags;
1145 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1146 lockFlags.WriteOnly = 1;
1147
1148 data = (uint8_t*)m_osInterface->pfnLockResource(
1149 m_osInterface,
1150 &m_resBrcDataBuffer,
1151 &lockFlags);
1152
1153 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1154
1155 MOS_ZeroMemory(
1156 data,
1157 allocParamsForBufferLinear.dwBytes);
1158
1159 m_osInterface->pfnUnlockResource(m_osInterface, &m_resBrcDataBuffer);
1160
1161 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
1162 {
1163 for (auto j = 0; j < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; j++)
1164 {
1165 // HuC stitching Data buffer
1166 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE);
1167 allocParamsForBufferLinear.pBufName = "HEVC HuC Stitch Data Buffer";
1168 CODECHAL_ENCODE_CHK_STATUS_RETURN(
1169 m_osInterface->pfnAllocateResource(
1170 m_osInterface,
1171 &allocParamsForBufferLinear,
1172 &m_resHucStitchDataBuffer[i][j]));
1173
1174 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1175 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1176 lockFlagsWriteOnly.WriteOnly = 1;
1177
1178 uint8_t* pData = (uint8_t*)m_osInterface->pfnLockResource(
1179 m_osInterface,
1180 &m_resHucStitchDataBuffer[i][j],
1181 &lockFlagsWriteOnly);
1182 CODECHAL_ENCODE_CHK_NULL_RETURN(pData);
1183 MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes);
1184 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[i][j]);
1185 }
1186 }
1187
1188 //Second level BB for huc stitching cmd
1189 MOS_ZeroMemory(&m_HucStitchCmdBatchBuffer, sizeof(m_HucStitchCmdBatchBuffer));
1190 m_HucStitchCmdBatchBuffer.bSecondLevel = true;
1191 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
1192 m_osInterface,
1193 &m_HucStitchCmdBatchBuffer,
1194 nullptr,
1195 m_hwInterface->m_HucStitchCmdBatchBufferSize));
1196 }
1197
1198 if (m_numDelay)
1199 {
1200 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1201 allocParamsForBufferLinear.pBufName = "DelayMinusMemory";
1202
1203 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1204 m_osInterface,
1205 &allocParamsForBufferLinear,
1206 &m_resDelayMinus), "Failed to allocate delay minus memory.");
1207
1208 uint8_t* data;
1209 MOS_LOCK_PARAMS lockFlags;
1210 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1211 lockFlags.WriteOnly = 1;
1212 data = (uint8_t*)m_osInterface->pfnLockResource(
1213 m_osInterface,
1214 &m_resDelayMinus,
1215 &lockFlags);
1216
1217 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1218
1219 MOS_ZeroMemory(data, sizeof(uint32_t));
1220
1221 m_osInterface->pfnUnlockResource(m_osInterface, &m_resDelayMinus);
1222 }
1223
1224
1225 return eStatus;
1226 }
1227
FreePakResources()1228 MOS_STATUS CodechalEncHevcStateG11::FreePakResources()
1229 {
1230 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1231
1232 CODECHAL_ENCODE_FUNCTION_ENTER;
1233
1234 // Release Frame Statistics Streamout Data Destination Buffer
1235 m_osInterface->pfnFreeResource(
1236 m_osInterface,
1237 &m_resFrameStatStreamOutBuffer);
1238
1239 // PAK CU Level Stream out buffer
1240 m_osInterface->pfnFreeResource(
1241 m_osInterface,
1242 &m_resPakcuLevelStreamoutData.sResource);
1243
1244 // Release SSE Source Pixel Row Store Buffer
1245 m_osInterface->pfnFreeResource(
1246 m_osInterface,
1247 &m_resSseSrcPixelRowStoreBuffer);
1248
1249 // Release Hcp scalability Sync buffer
1250 m_osInterface->pfnFreeResource(
1251 m_osInterface,
1252 &m_resHcpScalabilitySyncBuffer.sResource);
1253
1254 m_osInterface->pfnFreeResource(
1255 m_osInterface,
1256 &m_resPakcuLevelStreamoutData.sResource);
1257
1258 m_osInterface->pfnFreeResource(
1259 m_osInterface,
1260 &m_resPakSliceLevelStreamoutData.sResource);
1261
1262 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resTileBasedStatisticsBuffer); i++)
1263 {
1264 m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[i].sResource);
1265 }
1266 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileRecordBuffer); i++)
1267 {
1268 m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[i].sResource);
1269 }
1270 m_osInterface->pfnFreeResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource);
1271
1272 MOS_FreeMemory(m_tileParams);
1273
1274 if (m_useVirtualEngine)
1275 {
1276 for(auto i = 0; i < CODECHAL_NUM_UNCOMPRESSED_SURFACE_HEVC; i++)
1277 {
1278 for(auto j = 0; j < CODECHAL_HEVC_MAX_NUM_HCP_PIPE; j++)
1279 {
1280 for (auto k = 0; k < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; k++)
1281 {
1282 PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[i][j][k];
1283 if (cmdBuffer->pCmdBase)
1284 {
1285 m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
1286 }
1287 m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
1288 }
1289 }
1290 }
1291 }
1292
1293 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_refSync); i++)
1294 {
1295 auto sync = &m_refSync[i];
1296
1297 if (!Mos_ResourceIsNull(&sync->resSyncObject))
1298 {
1299 // if this object has been signaled before, we need to wait to ensure singal-wait is in pair.
1300 if (sync->uiSemaphoreObjCount || sync->bInUsed)
1301 {
1302 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
1303 syncParams.GpuContext = m_renderContext;
1304 syncParams.presSyncResource = &sync->resSyncObject;
1305 syncParams.uiSemaphoreCount = sync->uiSemaphoreObjCount;
1306 m_osInterface->pfnEngineWait(m_osInterface, &syncParams);
1307 }
1308 }
1309 m_osInterface->pfnFreeResource(m_osInterface, &sync->resSemaphoreMem.sResource);
1310 }
1311
1312 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resBrcSemaphoreMem); i++)
1313 {
1314 m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcSemaphoreMem[i].sResource);
1315 }
1316 m_osInterface->pfnFreeResource(m_osInterface, &m_resPipeStartSemaMem);
1317 m_osInterface->pfnFreeResource(m_osInterface, &m_resPipeCompleteSemaMem);
1318
1319 if (m_hucPakStitchEnabled)
1320 {
1321 m_osInterface->pfnFreeResource(m_osInterface, &m_resHucStatus2Buffer);
1322 m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcDataBuffer);
1323
1324 for (int i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
1325 {
1326 for (int j = 0; j < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; j++)
1327 {
1328 m_osInterface->pfnFreeResource(m_osInterface, &m_resHucPakStitchDmemBuffer[i][j]);
1329 m_osInterface->pfnFreeResource(m_osInterface, &m_resHucStitchDataBuffer[i][j]);
1330 }
1331 }
1332 Mhw_FreeBb(m_osInterface, &m_HucStitchCmdBatchBuffer, nullptr);
1333 }
1334
1335 if (m_numDelay)
1336 {
1337 m_osInterface->pfnFreeResource(m_osInterface, &m_resDelayMinus);
1338 }
1339
1340 return CodechalEncHevcState::FreePakResources();
1341 }
1342
GetKernelHeaderAndSize(void * binary,EncOperation operation,uint32_t krnStateIdx,void * krnHeader,uint32_t * krnSize)1343 MOS_STATUS CodechalEncHevcStateG11::GetKernelHeaderAndSize(
1344 void *binary,
1345 EncOperation operation,
1346 uint32_t krnStateIdx,
1347 void *krnHeader,
1348 uint32_t *krnSize)
1349 {
1350 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1351
1352 CODECHAL_ENCODE_FUNCTION_ENTER;
1353
1354 CODECHAL_ENCODE_CHK_NULL_RETURN(binary);
1355 CODECHAL_ENCODE_CHK_NULL_RETURN(krnHeader);
1356 CODECHAL_ENCODE_CHK_NULL_RETURN(krnSize);
1357
1358 PCODECHAL_HEVC_KERNEL_HEADER kernelHeaderTable = (PCODECHAL_HEVC_KERNEL_HEADER)binary;
1359
1360 PCODECHAL_KERNEL_HEADER currKrnHeader = nullptr;
1361 switch (operation)
1362 {
1363 case ENC_MBENC:
1364 {
1365 switch (krnStateIdx)
1366 {
1367 case MBENC_LCU32_KRNIDX:
1368 currKrnHeader = &kernelHeaderTable->HEVC_Enc_LCU32;
1369 break;
1370
1371 case MBENC_LCU64_KRNIDX:
1372 currKrnHeader = &kernelHeaderTable->HEVC_Enc_LCU64;
1373 break;
1374
1375 default:
1376 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported MBENC mode requested");
1377 return MOS_STATUS_INVALID_PARAMETER;
1378 }
1379 }
1380 break;
1381
1382 case ENC_BRC:
1383 {
1384 switch (krnStateIdx)
1385 {
1386 case CODECHAL_HEVC_BRC_INIT:
1387 currKrnHeader = &kernelHeaderTable->HEVC_brc_init;
1388 break;
1389
1390 case CODECHAL_HEVC_BRC_RESET:
1391 currKrnHeader = &kernelHeaderTable->HEVC_brc_reset;
1392 break;
1393
1394 case CODECHAL_HEVC_BRC_FRAME_UPDATE:
1395 currKrnHeader = &kernelHeaderTable->HEVC_brc_update;
1396 break;
1397
1398 case CODECHAL_HEVC_BRC_LCU_UPDATE:
1399 currKrnHeader = &kernelHeaderTable->HEVC_brc_lcuqp;
1400 break;
1401
1402 default:
1403 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested");
1404 return MOS_STATUS_INVALID_PARAMETER;
1405 }
1406 }
1407 break;
1408
1409 default:
1410 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
1411 return MOS_STATUS_INVALID_PARAMETER;
1412 }
1413
1414 *((PCODECHAL_KERNEL_HEADER)krnHeader) = *currKrnHeader;
1415
1416 PCODECHAL_KERNEL_HEADER nextKrnHeader = (currKrnHeader + 1);
1417 PCODECHAL_KERNEL_HEADER invalidEntry = &(kernelHeaderTable->HEVC_brc_lcuqp) + 1;
1418 uint32_t nextKrnOffset = *krnSize;
1419 if (nextKrnHeader < invalidEntry)
1420 {
1421 nextKrnOffset = nextKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT;
1422 }
1423 *krnSize = nextKrnOffset - (currKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1424
1425 return eStatus;
1426 }
1427
InitKernelStateMbEnc()1428 MOS_STATUS CodechalEncHevcStateG11::InitKernelStateMbEnc()
1429 {
1430 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1431
1432 CODECHAL_ENCODE_FUNCTION_ENTER;
1433
1434 PMHW_STATE_HEAP_INTERFACE stateHeapInterface = m_hwInterface->GetRenderInterface()->m_stateHeapInterface;
1435 m_numMbEncEncKrnStates = MBENC_NUM_KRN;
1436
1437 m_mbEncKernelStates = MOS_NewArray(MHW_KERNEL_STATE, m_numMbEncEncKrnStates);
1438 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
1439
1440 m_mbEncKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory(
1441 sizeof(GenericBindingTable) *
1442 m_numMbEncEncKrnStates);
1443 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelBindingTable);
1444
1445 PMHW_KERNEL_STATE kernelStatePtr = m_mbEncKernelStates;
1446
1447 for (uint32_t krnStateIdx = 0; krnStateIdx < m_numMbEncEncKrnStates; krnStateIdx++)
1448 {
1449 auto kernelSize = m_combinedKernelSize;
1450 CODECHAL_KERNEL_HEADER currKrnHeader;
1451 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetKernelHeaderAndSize(
1452 m_kernelBinary,
1453 ENC_MBENC,
1454 krnStateIdx,
1455 &currKrnHeader,
1456 (uint32_t*)&kernelSize));
1457
1458 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
1459 ENC_MBENC,
1460 &kernelStatePtr->KernelParams,
1461 krnStateIdx));
1462
1463 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
1464 ENC_MBENC,
1465 &m_mbEncKernelBindingTable[krnStateIdx],
1466 krnStateIdx));
1467
1468 kernelStatePtr->dwCurbeOffset = stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
1469 kernelStatePtr->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1470 kernelStatePtr->KernelParams.iSize = kernelSize;
1471
1472 CODECHAL_ENCODE_CHK_STATUS_RETURN(stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
1473 stateHeapInterface,
1474 kernelStatePtr->KernelParams.iBTCount,
1475 &kernelStatePtr->dwSshSize,
1476 &kernelStatePtr->dwBindingTableSize));
1477
1478 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(stateHeapInterface, kernelStatePtr));
1479
1480 kernelStatePtr++;
1481 }
1482
1483 return eStatus;
1484 }
1485
InitKernelStateBrc()1486 MOS_STATUS CodechalEncHevcStateG11::InitKernelStateBrc()
1487 {
1488 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1489
1490 CODECHAL_ENCODE_FUNCTION_ENTER;
1491
1492 PMHW_STATE_HEAP_INTERFACE stateHeapInterface = m_hwInterface->GetRenderInterface()->m_stateHeapInterface;
1493 m_numBrcKrnStates = CODECHAL_HEVC_BRC_NUM;
1494
1495 m_brcKernelStates = MOS_NewArray(MHW_KERNEL_STATE, m_numBrcKrnStates);
1496 CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
1497
1498 m_brcKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory(
1499 sizeof(GenericBindingTable) *
1500 m_numBrcKrnStates);
1501
1502 PMHW_KERNEL_STATE kernelStatePtr = m_brcKernelStates;
1503
1504 kernelStatePtr++; // Skipping BRC_COARSE_INTRA as it not in Gen11
1505
1506 // KrnStateIdx initialization starts at 1 as Gen11 does not support BRC_COARSE_INTRA kernel in BRC. It is part of the Combined Common Kernel
1507 for (uint32_t krnStateIdx = 1; krnStateIdx < m_numBrcKrnStates; krnStateIdx++)
1508 {
1509 auto kernelSize = m_combinedKernelSize;
1510 CODECHAL_KERNEL_HEADER currKrnHeader;
1511
1512 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetKernelHeaderAndSize(
1513 m_kernelBinary,
1514 ENC_BRC,
1515 krnStateIdx,
1516 &currKrnHeader,
1517 (uint32_t*)&kernelSize));
1518
1519 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
1520 ENC_BRC,
1521 &kernelStatePtr->KernelParams,
1522 krnStateIdx));
1523
1524 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
1525 ENC_BRC,
1526 &m_brcKernelBindingTable[krnStateIdx],
1527 krnStateIdx));
1528
1529 kernelStatePtr->dwCurbeOffset = stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
1530 kernelStatePtr->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1531 kernelStatePtr->KernelParams.iSize = kernelSize;
1532
1533 CODECHAL_ENCODE_CHK_STATUS_RETURN(stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
1534 stateHeapInterface,
1535 kernelStatePtr->KernelParams.iBTCount,
1536 &kernelStatePtr->dwSshSize,
1537 &kernelStatePtr->dwBindingTableSize));
1538
1539 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(stateHeapInterface, kernelStatePtr));
1540
1541 kernelStatePtr++;
1542 }
1543
1544 return eStatus;
1545 }
1546
GetMaxBtCount()1547 uint32_t CodechalEncHevcStateG11::GetMaxBtCount()
1548 {
1549
1550 uint16_t btIdxAlignment = m_hwInterface->GetRenderInterface()->m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
1551
1552 // BRC Init kernel
1553 uint32_t btCountPhase1 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_INIT].KernelParams.iBTCount, btIdxAlignment);
1554
1555 // SwScoreboard kernel
1556 uint32_t btCountPhase2 = MOS_ALIGN_CEIL(m_swScoreboardState->GetBTCount(), btIdxAlignment);
1557
1558 // Csc+Ds+Conversion kernel
1559 btCountPhase2 += MOS_ALIGN_CEIL(m_cscDsState->GetBTCount(), btIdxAlignment);
1560
1561 // Intra Distortion kernel
1562 if (m_intraDistKernel)
1563 {
1564 btCountPhase2 += MOS_ALIGN_CEIL(m_intraDistKernel->GetBTCount(), btIdxAlignment);
1565 }
1566 // HME 4x, 16x, 32x kernel
1567 if (m_hmeKernel)
1568 {
1569 btCountPhase2 += (MOS_ALIGN_CEIL(m_hmeKernel->GetBTCount(), btIdxAlignment)) * 3;
1570 }
1571
1572 // Weighted prediction kernel
1573 btCountPhase2 += MOS_ALIGN_CEIL(m_wpState->GetBTCount(), btIdxAlignment);
1574
1575 // LCU32 kernel, BRC Frame Update kernel, BRC LCU Update kernel
1576 uint32_t btCountPhase3 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
1577 MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
1578 MOS_ALIGN_CEIL(m_mbEncKernelStates[MBENC_LCU32_KRNIDX].KernelParams.iBTCount, btIdxAlignment);
1579
1580 // LCU64 kernel, BRC Frame Update kernel, BRC LCU Update kernel
1581 uint32_t btCountPhase4 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
1582 MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
1583 MOS_ALIGN_CEIL(m_mbEncKernelStates[MBENC_LCU64_KRNIDX].KernelParams.iBTCount, btIdxAlignment);
1584
1585 uint32_t maxBtCount = MOS_MAX(btCountPhase1, btCountPhase2);
1586 maxBtCount = MOS_MAX(maxBtCount, btCountPhase3);
1587 maxBtCount = MOS_MAX(maxBtCount, btCountPhase4);
1588
1589 return maxBtCount;
1590 }
1591
CalcScaledDimensions()1592 MOS_STATUS CodechalEncHevcStateG11::CalcScaledDimensions()
1593 {
1594 return MOS_STATUS_SUCCESS;
1595 }
1596
GetMaxRefFrames(uint8_t & maxNumRef0,uint8_t & maxNumRef1)1597 void CodechalEncHevcStateG11::GetMaxRefFrames(uint8_t& maxNumRef0, uint8_t& maxNumRef1)
1598 {
1599 maxNumRef0 = m_maxNumVmeL0Ref;
1600 maxNumRef1 = m_maxNumVmeL1Ref;
1601
1602 return;
1603 }
1604
GetStatusReport(EncodeStatus * encodeStatus,EncodeStatusReport * encodeStatusReport)1605 MOS_STATUS CodechalEncHevcStateG11::GetStatusReport(
1606 EncodeStatus *encodeStatus,
1607 EncodeStatusReport *encodeStatusReport)
1608 {
1609 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1610
1611 CODECHAL_ENCODE_FUNCTION_ENTER;
1612
1613 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus);
1614 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport);
1615
1616 if(encodeStatusReport->UsedVdBoxNumber <= 1)
1617 {
1618 m_syntaxElementOnlyBitCnt = encodeStatus->dwMFCBitstreamSyntaxElementOnlyBitCount;
1619 return CodechalEncodeHevcBase::GetStatusReport(encodeStatus, encodeStatusReport);
1620 }
1621
1622 PCODECHAL_ENCODE_BUFFER tileSizeStatusReport = &m_tileRecordBuffer[encodeStatusReport->CurrOriginalPic.FrameIdx];
1623
1624 MOS_LOCK_PARAMS lockFlags;
1625 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1626 CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface);
1627 HCPPakHWTileSizeRecord_G11* tileStatusReport = (HCPPakHWTileSizeRecord_G11*)m_osInterface->pfnLockResource(
1628 m_osInterface,
1629 &tileSizeStatusReport->sResource,
1630 &lockFlags);
1631 CODECHAL_ENCODE_CHK_NULL_RETURN(tileStatusReport);
1632
1633 encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
1634 encodeStatusReport->PanicMode = false;
1635 encodeStatusReport->AverageQp = 0;
1636 encodeStatusReport->QpY = 0;
1637 encodeStatusReport->SuggestedQpYDelta = 0;
1638 encodeStatusReport->NumberPasses = 1;
1639 encodeStatusReport->bitstreamSize = 0;
1640 encodeStatus->ImageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQp = 0;
1641
1642 uint32_t totalCU = 0;
1643 double sumQp = 0.0;
1644 for(uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
1645 {
1646 if(tileStatusReport[i].Length == 0)
1647 {
1648 encodeStatusReport->CodecStatus = CODECHAL_STATUS_INCOMPLETE;
1649 return eStatus;
1650 }
1651
1652 encodeStatusReport->bitstreamSize += tileStatusReport[i].Length;
1653 totalCU += (m_tileParams[i].TileHeightInMinCbMinus1 + 1) * (m_tileParams[i].TileWidthInMinCbMinus1 + 1);
1654 sumQp += tileStatusReport[i].Hcp_Qp_Status_Count;
1655 }
1656 encodeStatusReport->NumberPasses = (uint8_t)encodeStatus->dwNumberPasses + 1;
1657 CODECHAL_ENCODE_VERBOSEMESSAGE("Scalability Mode Exectued PAK Pass number: %d.\n", encodeStatusReport->NumberPasses);
1658
1659 if (encodeStatusReport->bitstreamSize == 0 ||
1660 encodeStatusReport->bitstreamSize >m_bitstreamUpperBound)
1661 {
1662 encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR;
1663 encodeStatusReport->bitstreamSize = 0;
1664 CODECHAL_ENCODE_ASSERTMESSAGE("Bit-stream size exceeds upper bound!");
1665 return MOS_STATUS_INVALID_FILE_SIZE;
1666 }
1667
1668 if (m_sseEnabled)
1669 {
1670 CODECHAL_ENCODE_CHK_STATUS_RETURN(CalculatePSNR(encodeStatus, encodeStatusReport));
1671 }
1672
1673 CODECHAL_ENCODE_CHK_COND_RETURN(totalCU == 0, "ERROR - totalCU cannot be zero.");
1674 encodeStatusReport->QpY = encodeStatusReport->AverageQp =
1675 (uint8_t)((sumQp / (double)totalCU) / 4.0); // due to TU is 4x4 and there are 4 TUs in one CU
1676
1677 if(m_enableTileStitchByHW)
1678 {
1679 return eStatus;
1680 }
1681
1682 uint8_t *tempBsBuffer = nullptr,*bufPtr = nullptr;
1683 tempBsBuffer = bufPtr = (uint8_t*)MOS_AllocAndZeroMemory(encodeStatusReport->bitstreamSize);
1684 CODECHAL_ENCODE_CHK_NULL_RETURN(tempBsBuffer);
1685
1686 CODEC_REF_LIST currRefList = *(encodeStatus->encodeStatusReport.pCurrRefList);
1687 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1688 lockFlags.ReadOnly = 1;
1689 uint8_t* bitstream = (uint8_t*)m_osInterface->pfnLockResource(
1690 m_osInterface,
1691 &currRefList.resBitstreamBuffer,
1692 &lockFlags);
1693 if (bitstream == nullptr)
1694 {
1695 MOS_SafeFreeMemory(tempBsBuffer);
1696 CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr);
1697 }
1698
1699 for(uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
1700 {
1701 uint32_t offset = m_tileParams[i].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
1702 uint32_t len = tileStatusReport[i].Length;
1703
1704 MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len);
1705 bufPtr += len;
1706 }
1707
1708 MOS_SecureMemcpy(bitstream, encodeStatusReport->bitstreamSize, tempBsBuffer, encodeStatusReport->bitstreamSize);
1709 MOS_ZeroMemory(&bitstream[encodeStatusReport->bitstreamSize],
1710 m_bitstreamUpperBound - encodeStatusReport->bitstreamSize);
1711
1712 if(tempBsBuffer)
1713 {
1714 MOS_FreeMemory(tempBsBuffer);
1715 }
1716
1717 if(m_osInterface && bitstream)
1718 {
1719 m_osInterface->pfnUnlockResource(m_osInterface, &currRefList.resBitstreamBuffer);
1720 }
1721
1722 if(m_osInterface && tileStatusReport)
1723 {
1724 // clean-up the tile status report buffer
1725 MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * encodeStatusReport->NumberTilesInFrame);
1726
1727 m_osInterface->pfnUnlockResource(m_osInterface, &tileSizeStatusReport->sResource);
1728 }
1729
1730 return eStatus;
1731 }
1732
AllocateResourcesVariableSize()1733 MOS_STATUS CodechalEncHevcStateG11::AllocateResourcesVariableSize()
1734 {
1735 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1736
1737 CODECHAL_ENCODE_FUNCTION_ENTER;
1738
1739 if (!m_hevcPicParams->tiles_enabled_flag)
1740 {
1741 return eStatus;
1742 }
1743
1744 uint32_t bufSize = 0;
1745 if (m_pakPiplStrmOutEnable)
1746 {
1747 // PAK CU Level Streamout Data: DW57-59 in HCP pipe buffer address command
1748 // One CU has 16-byte. But, each tile needs to be aliged to the cache line
1749 uint32_t tileWidthInCus = 0;
1750 uint32_t tileHeightInCus = 0;
1751 uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
1752 uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
1753 for(uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
1754 {
1755 for(uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
1756 {
1757 uint32_t idx = tileRow * numTileColumns + tileCol;
1758
1759 tileHeightInCus = m_tileParams[idx].TileHeightInMinCbMinus1 + 1;
1760 tileWidthInCus = m_tileParams[idx].TileWidthInMinCbMinus1 + 1;
1761 bufSize += (tileWidthInCus * tileHeightInCus * 16);
1762 bufSize = MOS_ALIGN_CEIL(bufSize, CODECHAL_CACHELINE_SIZE);
1763 }
1764 }
1765 if (Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource) ||
1766 (bufSize > m_resPakcuLevelStreamoutData.dwSize))
1767 {
1768 if (!Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource))
1769 {
1770 m_osInterface->pfnFreeResource(m_osInterface, &m_resPakcuLevelStreamoutData.sResource);
1771 }
1772
1773 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
1774 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1775 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
1776 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1777 allocParamsForBufferLinear.Format = Format_Buffer;
1778 allocParamsForBufferLinear.dwBytes = bufSize;
1779 allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
1780
1781 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
1782 m_osInterface,
1783 &allocParamsForBufferLinear,
1784 &m_resPakcuLevelStreamoutData.sResource));
1785 m_resPakcuLevelStreamoutData.dwSize = bufSize;
1786 CODECHAL_ENCODE_VERBOSEMESSAGE("reallocate cu steam out buffer, size=0x%x.\n", bufSize);
1787 }
1788 }
1789
1790 return eStatus;
1791 }
1792
ExecutePictureLevel()1793 MOS_STATUS CodechalEncHevcStateG11::ExecutePictureLevel()
1794 {
1795 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1796
1797 CODECHAL_ENCODE_FUNCTION_ENTER;
1798
1799 m_firstTaskInPhase = m_singleTaskPhaseSupported? IsFirstPass(): true;
1800 m_lastTaskInPhase = m_singleTaskPhaseSupported? IsLastPass(): true;
1801 PerfTagSetting perfTag;
1802 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE);
1803
1804 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifyCommandBufferSize());
1805
1806 if (!m_singleTaskPhaseSupportedInPak)
1807 {
1808 // Command buffer or patch list size are too small and so we cannot submit multiple pass of PAKs together
1809 m_firstTaskInPhase = true;
1810 m_lastTaskInPhase = true;
1811 }
1812
1813 if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex())
1814 {
1815 CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum");
1816 eStatus = MOS_STATUS_INVALID_PARAMETER;
1817 return eStatus;
1818 }
1819
1820 MOS_COMMAND_BUFFER cmdBuffer;
1821 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
1822
1823 if ((!m_singleTaskPhaseSupported) || m_firstTaskInPhase)
1824 {
1825 // Send command buffer header at the beginning (OS dependent)
1826 // frame tracking tag is only added in the last command buffer header
1827 bool bRequestFrameTracking = m_singleTaskPhaseSupported ?
1828 m_firstTaskInPhase :
1829 m_lastTaskInPhase;
1830
1831 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, bRequestFrameTracking));
1832 }
1833
1834 // clean-up per VDBOX semaphore memory
1835 int32_t currentPipe = GetCurrentPipe();
1836 if (currentPipe < 0)
1837 {
1838 eStatus = MOS_STATUS_INVALID_PARAMETER;
1839 return eStatus;
1840 }
1841
1842 if (m_numPipe >= 2 &&
1843 ((m_singleTaskPhaseSupported && IsFirstPass()) ||
1844 !m_singleTaskPhaseSupported))
1845 {
1846 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStopCmd(&cmdBuffer));
1847
1848 //HW Semaphore cmd to make sure all pipes start encode at the same time
1849 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeStartSemaMem, 1, MHW_MI_ATOMIC_INC, &cmdBuffer));
1850 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
1851 &m_resPipeStartSemaMem,
1852 &cmdBuffer,
1853 m_numPipe));
1854
1855 // Program some placeholder cmds to resolve the hazard between BEs sync
1856 MHW_MI_STORE_DATA_PARAMS dataParams;
1857 dataParams.pOsResource = &m_resDelayMinus;
1858 dataParams.dwResourceOffset = 0;
1859 dataParams.dwValue = 0xDE1A;
1860 for (uint32_t i = 0; i < m_numDelay; i++)
1861 {
1862 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
1863 &cmdBuffer,
1864 &dataParams));
1865 }
1866 //clean HW semaphore memory
1867 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeStartSemaMem, 1, MHW_MI_ATOMIC_DEC, &cmdBuffer));
1868
1869 //Start Watchdog Timer
1870 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStartCmd(&cmdBuffer));
1871 //To help test media reset, this hw semaphore wait will never be reached.
1872 if (m_enableTestMediaReset)
1873 {
1874 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
1875 &m_resPipeStartSemaMem,
1876 &cmdBuffer,
1877 m_numPipe + 2));
1878 }
1879 }
1880
1881 if (m_brcEnabled && !IsFirstPass()) // Only the regular BRC passes have the conditional batch buffer end
1882 {
1883 // Ensure the previous PAK BRC pass is done, mainly for pipes other than pipe0.
1884 if (m_singleTaskPhaseSupported && m_numPipe >= 2 &&
1885 !Mos_ResourceIsNull(&m_resBrcSemaphoreMem[currentPipe].sResource))
1886 {
1887 CODECHAL_ENCODE_CHK_STATUS_RETURN(
1888 SendHWWaitCommand(
1889 &m_resBrcSemaphoreMem[currentPipe].sResource,
1890 &cmdBuffer,
1891 1));
1892 }
1893
1894 // Insert conditional batch buffer end
1895 MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
1896 MOS_ZeroMemory(
1897 &miConditionalBatchBufferEndParams,
1898 sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
1899 uint32_t BaseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
1900 sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource ;
1901
1902 if (m_hucPakStitchEnabled && m_numPipe >= 2) //BRC scalability
1903 {
1904 CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwHuCStatusMaskOffset & 7) == 0); // Make sure uint64_t aligned
1905 CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwHuCStatusMaskOffset + sizeof(uint32_t)) == m_encodeStatusBuf.dwHuCStatusRegOffset);
1906
1907 miConditionalBatchBufferEndParams.presSemaphoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
1908 miConditionalBatchBufferEndParams.dwOffset = BaseOffset + m_encodeStatusBuf.dwHuCStatusMaskOffset;
1909 }
1910 else
1911 {
1912 CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwImageStatusMaskOffset & 7) == 0); // Make sure uint64_t aligned
1913 CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwImageStatusMaskOffset + sizeof(uint32_t)) == m_encodeStatusBuf.dwImageStatusCtrlOffset);
1914
1915 miConditionalBatchBufferEndParams.presSemaphoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
1916 miConditionalBatchBufferEndParams.dwOffset = BaseOffset + m_encodeStatusBuf.dwImageStatusMaskOffset;
1917 }
1918
1919 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
1920 &cmdBuffer,
1921 &miConditionalBatchBufferEndParams));
1922
1923 auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
1924 CODECHAL_ENCODE_CHK_NULL_RETURN(mmioRegisters);
1925 MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
1926 MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
1927 if (m_hucPakStitchEnabled && m_numPipe >= 2)
1928 {
1929 // Write back the HCP image control register with HUC PAK Int Kernel output
1930 MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
1931 MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
1932 miLoadRegMemParams.presStoreBuffer = &m_resBrcDataBuffer;
1933 miLoadRegMemParams.dwOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, HCP_ImageStatusControl);
1934 miLoadRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1935 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(&cmdBuffer, &miLoadRegMemParams));
1936
1937 if (IsFirstPipe())
1938 {
1939 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
1940 miCpyMemMemParams.presSrc = &m_resBrcDataBuffer;
1941 miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, HCP_ImageStatusControl);
1942 miCpyMemMemParams.presDst = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
1943 miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
1944 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
1945
1946 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
1947 miStoreRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
1948 miStoreRegMemParams.dwOffset = BaseOffset + m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset;
1949 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1950 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
1951 }
1952 }
1953 else
1954 {
1955 // Write back the HCP image control register for RC6 may clean it out
1956 MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
1957 MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
1958 miLoadRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
1959 miLoadRegMemParams.dwOffset = BaseOffset + m_encodeStatusBuf.dwImageStatusCtrlOffset;
1960 miLoadRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1961 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(&cmdBuffer, &miLoadRegMemParams));
1962
1963 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
1964 miStoreRegMemParams.presStoreBuffer = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
1965 miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
1966 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1967 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
1968
1969 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
1970 miStoreRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
1971 miStoreRegMemParams.dwOffset = BaseOffset + m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset;
1972 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1973 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
1974 }
1975 }
1976
1977 if (IsFirstPipe() && IsFirstPass() && m_osInterface->bTagResourceSync)
1978 {
1979 // This is a short term solution to solve the sync tag issue: the sync tag write for PAK is inserted at the end of 2nd pass PAK BB
1980 // which may be skipped in multi-pass PAK enabled case. The idea here is to insert the previous frame's tag at the beginning
1981 // of the BB and keep the current frame's tag at the end of the BB. There will be a delay for tag update but it should be fine
1982 // as long as Dec/VP/Enc won't depend on this PAK so soon.
1983
1984 PMOS_RESOURCE globalGpuContextSyncTagBuffer = nullptr;
1985 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource(
1986 m_osInterface,
1987 globalGpuContextSyncTagBuffer));
1988 CODECHAL_ENCODE_CHK_NULL_RETURN(globalGpuContextSyncTagBuffer);
1989
1990 MHW_MI_STORE_DATA_PARAMS params;
1991 params.pOsResource = globalGpuContextSyncTagBuffer;
1992 params.dwResourceOffset = m_osInterface->pfnGetGpuStatusTagOffset(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
1993 uint32_t value = m_osInterface->pfnGetGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
1994 params.dwValue = (value > 0) ? (value - 1) : 0;
1995 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, ¶ms));
1996 }
1997
1998 if (IsFirstPipe())
1999 {
2000 CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2001 }
2002
2003 if (m_numPipe >= 2)
2004 {
2005 // clean up hw semaphore for BRC PAK pass sync, used only in single task phase.
2006 if (m_singleTaskPhaseSupported &&
2007 m_brcEnabled &&
2008 !Mos_ResourceIsNull(&m_resBrcSemaphoreMem[currentPipe].sResource))
2009 {
2010 MHW_MI_STORE_DATA_PARAMS storeDataParams;
2011 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2012 storeDataParams.pOsResource = &m_resBrcSemaphoreMem[currentPipe].sResource;
2013 storeDataParams.dwResourceOffset = 0;
2014 storeDataParams.dwValue = 0;
2015
2016 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2017 &cmdBuffer,
2018 &storeDataParams));
2019 }
2020 }
2021
2022 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPipeModeSelectCmd(&cmdBuffer));
2023
2024 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpSurfaceStateCmds(&cmdBuffer));
2025
2026 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPipeBufAddrCmd(&cmdBuffer));
2027
2028 MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
2029 SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
2030 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams));
2031
2032 MHW_VDBOX_QM_PARAMS fqmParams, qmParams;
2033 SetHcpQmStateParams(fqmParams, qmParams);
2034 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpFqmStateCmd(&cmdBuffer, &fqmParams));
2035 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpQmStateCmd(&cmdBuffer, &qmParams));
2036
2037 if (m_brcEnabled)
2038 {
2039 uint32_t picStateCmdOffset;
2040 if (m_hucPakStitchEnabled && m_numPipe >= 2)
2041 {
2042 //for non fist PAK pass, always use the 2nd HCP PIC STATE cmd buffer
2043 picStateCmdOffset = IsFirstPass() ? 0 : 1;
2044 }
2045 else
2046 {
2047 picStateCmdOffset = GetCurrentPass();
2048 }
2049
2050 MHW_BATCH_BUFFER batchBuffer;
2051 MOS_ZeroMemory(&batchBuffer, sizeof(batchBuffer));
2052 batchBuffer.OsResource = m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx];
2053 batchBuffer.dwOffset = picStateCmdOffset * BRC_IMG_STATE_SIZE_PER_PASS_G11;
2054 batchBuffer.bSecondLevel = true;
2055
2056 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
2057 &cmdBuffer,
2058 &batchBuffer));
2059 }
2060 else
2061 {
2062 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPictureStateCmd(&cmdBuffer));
2063 }
2064
2065 // Send HEVC_VP9_RDOQ_STATE command
2066 if (m_hevcRdoqEnabled)
2067 {
2068 MHW_VDBOX_HEVC_PIC_STATE picStateParams;
2069 SetHcpPicStateParams(picStateParams);
2070
2071 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&cmdBuffer, &picStateParams));
2072 }
2073
2074 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2075
2076 return eStatus;
2077 }
2078
SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceState)2079 void CodechalEncHevcStateG11::SetHcpSliceStateCommonParams(
2080 MHW_VDBOX_HEVC_SLICE_STATE& sliceState)
2081 {
2082 CodechalEncHevcState::SetHcpSliceStateCommonParams(sliceState);
2083
2084 sliceState.RoundingIntra = m_roundingIntraInUse;
2085 sliceState.RoundingInter = m_roundingInterInUse;
2086
2087 if ((m_hevcSliceParams->slice_type == CODECHAL_HEVC_P_SLICE && m_hevcPicParams->weighted_pred_flag) ||
2088 (m_hevcSliceParams->slice_type == CODECHAL_HEVC_B_SLICE && m_hevcPicParams->weighted_bipred_flag))
2089 {
2090 sliceState.bWeightedPredInUse = true;
2091 }
2092 else
2093 {
2094 sliceState.bWeightedPredInUse = false;
2095 }
2096
2097 static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G11 &>(sliceState).dwNumPipe = m_numPipe;
2098 }
2099
SetHcpSliceStateParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceState,PCODEC_ENCODER_SLCDATA slcData,uint16_t slcCount,PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 tileCodingParams,bool lastSliceInTile,uint32_t idx)2100 void CodechalEncHevcStateG11::SetHcpSliceStateParams(
2101 MHW_VDBOX_HEVC_SLICE_STATE& sliceState,
2102 PCODEC_ENCODER_SLCDATA slcData,
2103 uint16_t slcCount,
2104 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 tileCodingParams,
2105 bool lastSliceInTile,
2106 uint32_t idx)
2107 {
2108 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2109
2110 sliceState.pEncodeHevcSliceParams = &m_hevcSliceParams[slcCount];
2111 sliceState.dwDataBufferOffset = slcData[slcCount].CmdOffset;
2112 sliceState.dwOffset = slcData[slcCount].SliceOffset;
2113 sliceState.dwLength = slcData[slcCount].BitSize;
2114 sliceState.uiSkipEmulationCheckCount = slcData[slcCount].SkipEmulationByteCount;
2115 sliceState.dwSliceIndex = (uint32_t)slcCount;
2116 sliceState.bLastSlice = (slcCount == m_numSlices - 1);
2117 sliceState.bLastSliceInTile = lastSliceInTile ? true : false;
2118 sliceState.bLastSliceInTileColumn = (lastSliceInTile & tileCodingParams[idx].IsLastTileofColumn) ? true : false;
2119 sliceState.bFirstPass = IsFirstPass();
2120 sliceState.bLastPass = IsLastPass();
2121 sliceState.bInsertBeforeSliceHeaders = (slcCount == 0);
2122 sliceState.bSaoLumaFlag = (m_hevcSeqParams->SAO_enabled_flag) ? m_hevcSliceParams[slcCount].slice_sao_luma_flag : 0;
2123 sliceState.bSaoChromaFlag = (m_hevcSeqParams->SAO_enabled_flag) ? m_hevcSliceParams[slcCount].slice_sao_chroma_flag : 0;
2124 static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G11&>(sliceState).pTileCodingParams = tileCodingParams + idx;
2125 static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G11&>(sliceState).dwTileID = idx;
2126
2127 CalcTransformSkipParameters(sliceState.EncodeHevcTransformSkipParams);
2128 }
2129
ExecuteSliceLevel()2130 MOS_STATUS CodechalEncHevcStateG11::ExecuteSliceLevel()
2131 {
2132 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2133
2134 CODECHAL_ENCODE_FUNCTION_ENTER;
2135 CODECHAL_ENCODE_CHK_NULL_RETURN(m_slcData);
2136
2137 if (m_pakOnlyTest)
2138 {
2139 CODECHAL_ENCODE_CHK_STATUS_RETURN(LoadPakCommandAndCuRecordFromFile());
2140 }
2141
2142 if (!m_hevcPicParams->tiles_enabled_flag)
2143 {
2144 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::ExecuteSliceLevel());
2145 }
2146 else
2147 {
2148 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncTileLevel());
2149 }
2150
2151 return eStatus;
2152 }
2153
EncTileLevel()2154 MOS_STATUS CodechalEncHevcStateG11::EncTileLevel()
2155 {
2156 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2157
2158 CODECHAL_ENCODE_FUNCTION_ENTER;
2159
2160 int32_t currentPipe = GetCurrentPipe();
2161 int32_t currentPass = GetCurrentPass();
2162
2163 if(currentPipe < 0 || currentPass < 0)
2164 {
2165 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid pipe number or pass number");
2166 return MOS_STATUS_INVALID_PARAMETER;
2167 }
2168
2169 MHW_VDBOX_HEVC_SLICE_STATE_G11 sliceState;
2170 SetHcpSliceStateCommonParams(sliceState);
2171
2172 MOS_COMMAND_BUFFER cmdBuffer;
2173 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2174
2175 uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
2176 uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
2177
2178 for(uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
2179 {
2180 for(uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
2181 {
2182 PCODEC_ENCODER_SLCDATA slcData = m_slcData;
2183 uint32_t slcCount, idx, sliceNumInTile = 0;
2184
2185 idx = tileRow * numTileColumns + tileCol;
2186
2187 if ((m_numPipe > 1) && (tileCol != currentPipe))
2188 {
2189 continue;
2190 }
2191
2192 // HCP_TILE_CODING commmand
2193 CODECHAL_ENCODE_CHK_STATUS_RETURN(static_cast<MhwVdboxHcpInterfaceG11*>(m_hcpInterface)->AddHcpTileCodingCmd(&cmdBuffer, &m_tileParams[idx]));
2194
2195 for (slcCount = 0; slcCount < m_numSlices; slcCount++)
2196 {
2197 bool lastSliceInTile = false, sliceInTile = false;
2198
2199 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
2200 &m_tileParams[idx],
2201 &sliceInTile,
2202 &lastSliceInTile));
2203
2204 if(!sliceInTile)
2205 {
2206 continue;
2207 }
2208
2209 if (IsFirstPass())
2210 {
2211 uint32_t startLCU = 0;
2212 for(uint32_t ii = 0; ii < slcCount; ii++)
2213 {
2214 startLCU += m_hevcSliceParams[ii].NumLCUsInSlice;
2215 }
2216 slcData[slcCount].CmdOffset = startLCU * (m_hwInterface->GetHcpInterface()->GetHcpPakObjSize()) * sizeof(uint32_t);
2217 }
2218
2219 SetHcpSliceStateParams(sliceState, slcData, (uint16_t)slcCount, m_tileParams, lastSliceInTile, idx);
2220
2221 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHwSliceEncodeCommand(&cmdBuffer, &sliceState));
2222
2223 sliceNumInTile++;
2224 } // end of slice
2225
2226 if(0 == sliceNumInTile)
2227 {
2228 // One tile must have at least one slice
2229 CODECHAL_ENCODE_ASSERT(false);
2230 eStatus = MOS_STATUS_INVALID_PARAMETER;
2231 return eStatus;
2232 }
2233 } // end of row tile
2234 } // end of column tile
2235
2236 // Insert end of sequence/stream if set
2237 if ((m_lastPicInStream || m_lastPicInSeq) && IsLastPipe())
2238 {
2239 MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
2240 MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
2241 pakInsertObjectParams.bLastPicInSeq = m_lastPicInSeq;
2242 pakInsertObjectParams.bLastPicInStream = m_lastPicInStream;
2243 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&cmdBuffer, &pakInsertObjectParams));
2244 }
2245
2246 // Send VD_PIPELINE_FLUSH command
2247 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2248 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2249 vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
2250 vdPipelineFlushParams.Flags.bFlushHEVC = 1;
2251 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2252 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
2253
2254 // Send MI_FLUSH command
2255 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2256 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2257 flushDwParams.bVideoPipelineCacheInvalidate = true;
2258 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2259
2260 //HW Semaphore cmd to make sure all pipes completion encode
2261 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeCompleteSemaMem, 1, MHW_MI_ATOMIC_INC, &cmdBuffer));
2262
2263 if(IsFirstPipe())
2264 {
2265 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
2266 &m_resPipeCompleteSemaMem,
2267 &cmdBuffer,
2268 m_numPipe));
2269
2270 //clean HW semaphore memory
2271 MHW_MI_STORE_DATA_PARAMS storeDataParams;
2272 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2273 storeDataParams.pOsResource = &m_resPipeCompleteSemaMem;
2274 storeDataParams.dwValue = 0;
2275 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2276 &cmdBuffer,
2277 &storeDataParams));
2278
2279 // Use HW stitch commands only in the scalable mode
2280 if (m_numPipe > 1 && m_enableTileStitchByHW)
2281 {
2282 //call PAK Int Kernel in scalability case
2283 if (m_hucPakStitchEnabled)
2284 {
2285 CODECHAL_ENCODE_CHK_STATUS_RETURN(HucPakIntegrate(&cmdBuffer));
2286 // 2nd level BB buffer for stitching cmd
2287 // current location to add cmds in 2nd level batch buffer
2288 m_HucStitchCmdBatchBuffer.iCurrent = 0;
2289 // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
2290 m_HucStitchCmdBatchBuffer.dwOffset = 0;
2291 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_HucStitchCmdBatchBuffer));
2292 // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases
2293 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, m_osInterface->osCpInterface->IsCpEnabled() ? true : false));
2294 }
2295 }
2296
2297 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(&cmdBuffer));
2298
2299 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2300
2301 if (m_numPipe <= 1) // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer
2302 {
2303 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
2304
2305 // BRC PAK statistics different for each pass
2306 if (m_brcEnabled)
2307 {
2308 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStats(&cmdBuffer));
2309 }
2310 }
2311 else
2312 { //scalability mode
2313 if (m_brcEnabled)
2314 {
2315 //MMIO register is not used in scalability BRC case. all information is in TileSizeRecord stream out buffer
2316 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStatisticsForScalability(&cmdBuffer));
2317 }
2318 else
2319 {
2320 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
2321 }
2322 }
2323
2324 #if (_DEBUG || _RELEASE_INTERNAL)
2325 //this is to support BRC scalbility test to match with single pipe. Will be removed later after enhanced BRC Scalability is enabled.
2326 if (m_brcEnabled && m_forceSinglePakPass)
2327 {
2328 CODECHAL_ENCODE_CHK_STATUS_RETURN(ResetImgCtrlRegInPAKStatisticsBuffer(&cmdBuffer));
2329 }
2330 #endif
2331
2332 if (m_singleTaskPhaseSupported &&
2333 m_brcEnabled && m_numPipe >= 2 && !IsLastPass())
2334 {
2335 // Signal HW semaphore for the BRC dependency (i.e., next BRC pass waits for the current BRC pass)
2336 for (auto i = 0; i < m_numPipe; i++)
2337 {
2338 if (!Mos_ResourceIsNull(&m_resBrcSemaphoreMem[i].sResource))
2339 {
2340 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2341 storeDataParams.pOsResource = &m_resBrcSemaphoreMem[i].sResource;
2342 storeDataParams.dwValue = 1;
2343
2344 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2345 &cmdBuffer,
2346 &storeDataParams));
2347 }
2348 }
2349 }
2350 }
2351
2352 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2353 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2354
2355 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2356 {
2357 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
2358 }
2359
2360 std::string pakPassName = "PAK_PASS" + std::to_string(static_cast<uint32_t>(m_currPass));
2361 CODECHAL_DEBUG_TOOL(
2362 CODECHAL_ENCODE_CHK_STATUS_RETURN( m_debugInterface->DumpCmdBuffer(
2363 &cmdBuffer,
2364 CODECHAL_NUM_MEDIA_STATES,
2365 pakPassName.data()));)
2366
2367 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2368
2369 if (IsFirstPipe() &&
2370 (m_pakOnlyTest == 0) && // In the PAK only test, no need to wait for ENC's completion
2371 IsFirstPass() &&
2372 !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
2373 {
2374 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
2375 syncParams.GpuContext = m_videoContext;
2376 syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
2377
2378 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
2379 }
2380
2381 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2382 {
2383 bool nullRendering = m_videoContextUsesNullHw;
2384 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, nullRendering));
2385
2386 CODECHAL_DEBUG_TOOL(
2387 CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpHucDebugOutputBuffers());
2388 CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpPakOutput());
2389 if (m_mmcState)
2390 {
2391 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
2392 }
2393 )
2394
2395 if ((IsLastPipe()) &&
2396 (IsLastPass()) &&
2397 m_signalEnc &&
2398 m_currRefSync &&
2399 !Mos_ResourceIsNull(&m_currRefSync->resSyncObject))
2400 {
2401 // signal semaphore
2402 MOS_SYNC_PARAMS syncParams;
2403 syncParams = g_cInitSyncParams;
2404 syncParams.GpuContext = m_videoContext;
2405 syncParams.presSyncResource = &m_currRefSync->resSyncObject;
2406
2407 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
2408 m_currRefSync->uiSemaphoreObjCount++;
2409 m_currRefSync->bInUsed = true;
2410 }
2411 }
2412
2413 // Reset parameters for next PAK execution
2414 if (IsLastPipe() && IsLastPass())
2415 {
2416 if (!m_singleTaskPhaseSupported)
2417 {
2418 m_osInterface->pfnResetPerfBufferID(m_osInterface);
2419 }
2420
2421 m_currPakSliceIdx = (m_currPakSliceIdx + 1) % CODECHAL_HEVC_NUM_PAK_SLICE_BATCH_BUFFERS;
2422
2423 if (m_hevcSeqParams->ParallelBRC)
2424 {
2425 m_brcBuffers.uiCurrBrcPakStasIdxForWrite =
2426 (m_brcBuffers.uiCurrBrcPakStasIdxForWrite + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
2427 }
2428
2429 m_newPpsHeader = 0;
2430 m_newSeqHeader = 0;
2431 m_frameNum++;
2432 }
2433
2434 return eStatus;
2435 }
2436
DecideEncodingPipeNumber()2437 MOS_STATUS CodechalEncHevcStateG11::DecideEncodingPipeNumber()
2438 {
2439 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2440
2441 CODECHAL_ENCODE_FUNCTION_ENTER;
2442
2443 m_numPipe = m_numVdbox;
2444
2445 uint8_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
2446
2447 if (numTileColumns > m_numPipe)
2448 {
2449 m_numPipe = 1;
2450 }
2451
2452 if (numTileColumns < m_numPipe)
2453 {
2454 if(numTileColumns >= 1 && numTileColumns <= 4)
2455 {
2456 m_numPipe = numTileColumns;
2457 }
2458 else
2459 {
2460 m_numPipe = 1; // invalid tile column test cases and switch back to the single VDBOX mode
2461 }
2462 }
2463
2464 m_useVirtualEngine = true; //always use virtual engine interface for single pipe and scalability mode
2465
2466 if (!m_forceScalability)
2467 {
2468 //resolution < 4K, always go with single pipe
2469 if (m_frameWidth * m_frameHeight < ENCODE_HEVC_4K_PIC_WIDTH * ENCODE_HEVC_4K_PIC_HEIGHT)
2470 {
2471 m_numPipe = 1;
2472 }
2473 }
2474
2475 m_numUsedVdbox = m_numPipe;
2476 m_numberTilesInFrame = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1);
2477
2478 if (m_scalabilityState)
2479 {
2480 // Create/ re-use a GPU context with 2 pipes
2481 m_scalabilityState->ucScalablePipeNum = m_numPipe;
2482 }
2483
2484 return eStatus;
2485 }
2486
PlatformCapabilityCheck()2487 MOS_STATUS CodechalEncHevcStateG11::PlatformCapabilityCheck()
2488 {
2489 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2490
2491 CODECHAL_ENCODE_FUNCTION_ENTER;
2492
2493 CODECHAL_ENCODE_CHK_STATUS_RETURN(DecideEncodingPipeNumber());
2494
2495 if (MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
2496 {
2497 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ChkGpuCtxReCreation(this, m_scalabilityState,
2498 (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
2499 }
2500
2501 if (m_frameWidth * m_frameHeight > ENCODE_HEVC_MAX_8K_PIC_WIDTH * ENCODE_HEVC_MAX_8K_PIC_HEIGHT)
2502 {
2503 eStatus = MOS_STATUS_INVALID_PARAMETER;
2504 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Frame resolution greater than 8k not supported");
2505 }
2506
2507 if ((uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_chromaFormat &&
2508 (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat &&
2509 Format_YUY2 == m_reconSurface.Format)
2510 {
2511 if (m_reconSurface.dwHeight < m_oriFrameHeight * 2 ||
2512 m_reconSurface.dwWidth < m_oriFrameWidth / 2)
2513 {
2514 return MOS_STATUS_INVALID_PARAMETER;
2515 }
2516 }
2517
2518 // set RDOQ Intra blocks Threshold for Gen11+
2519 m_rdoqIntraTuThreshold = 0;
2520 if (m_hevcRdoqEnabled)
2521 {
2522 if (1 == m_hevcSeqParams->TargetUsage)
2523 {
2524 m_rdoqIntraTuThreshold = 0xffff;
2525 }
2526 else if (4 == m_hevcSeqParams->TargetUsage)
2527 {
2528 m_rdoqIntraTuThreshold = m_picWidthInMb * m_picHeightInMb;
2529 m_rdoqIntraTuThreshold = MOS_MIN(m_rdoqIntraTuThreshold / 10, 0xffff);
2530 }
2531 }
2532
2533 return eStatus;
2534 }
2535
CheckSupportedFormat(PMOS_SURFACE surface)2536 bool CodechalEncHevcStateG11::CheckSupportedFormat(PMOS_SURFACE surface)
2537 {
2538 CODECHAL_ENCODE_FUNCTION_ENTER;
2539
2540 bool isColorFormatSupported = false;
2541
2542 if (nullptr == surface)
2543 {
2544 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid (nullptr) Pointer.");
2545 return isColorFormatSupported;
2546 }
2547
2548 switch (surface->Format)
2549 {
2550 case Format_NV12:
2551 isColorFormatSupported = IS_Y_MAJOR_TILE_FORMAT(surface->TileType);
2552 break;
2553 case Format_YUY2:
2554 case Format_YUYV:
2555 case Format_A8R8G8B8:
2556 case Format_P010:
2557 case Format_Y210:
2558 break;
2559 default:
2560 CODECHAL_ENCODE_ASSERTMESSAGE("Input surface color format = %d not supported!", surface->Format);
2561 break;
2562 }
2563
2564 return isColorFormatSupported;
2565 }
2566
GetSystemPipeNumberCommon()2567 MOS_STATUS CodechalEncHevcStateG11::GetSystemPipeNumberCommon()
2568 {
2569 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2570
2571 CODECHAL_ENCODE_FUNCTION_ENTER;
2572
2573 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
2574 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2575
2576 MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
2577 statusKey = MOS_UserFeature_ReadValue_ID(
2578 nullptr,
2579 __MEDIA_USER_FEATURE_VALUE_ENCODE_DISABLE_SCALABILITY,
2580 &userFeatureData,
2581 m_osInterface->pOsContext);
2582
2583 bool disableScalability = true; // m_hwInterface->IsDisableScalability() default false
2584 if (statusKey == MOS_STATUS_SUCCESS)
2585 {
2586 disableScalability = userFeatureData.i32Data ? true : false;
2587 }
2588
2589 MEDIA_SYSTEM_INFO *gtSystemInfo = m_osInterface->pfnGetGtSystemInfo(m_osInterface);
2590 CODECHAL_ENCODE_CHK_NULL_RETURN(gtSystemInfo);
2591
2592 if (gtSystemInfo && disableScalability == false)
2593 {
2594 // Both VE mode and media solo mode should be able to get the VDBOX number via the same interface
2595 m_numVdbox = (uint8_t)(gtSystemInfo->VDBoxInfo.NumberOfVDBoxEnabled);
2596 }
2597 else
2598 {
2599 m_numVdbox = 1;
2600 }
2601
2602 return eStatus;
2603 }
2604
HucPakIntegrate(PMOS_COMMAND_BUFFER cmdBuffer)2605 MOS_STATUS CodechalEncHevcStateG11::HucPakIntegrate(
2606 PMOS_COMMAND_BUFFER cmdBuffer)
2607 {
2608 CODECHAL_ENCODE_FUNCTION_ENTER;
2609
2610 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2611
2612 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2613
2614 CODECHAL_ENCODE_CHK_COND_RETURN(
2615 (m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()),
2616 "ERROR - vdbox index exceed the maximum");
2617
2618 auto mmioRegisters = m_hwInterface->GetHucInterface()->GetMmioRegisters(m_vdboxIndex);
2619
2620 // load kernel from WOPCM into L2 storage RAM
2621 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
2622 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
2623 imemParams.dwKernelDescriptor = VDBOX_HUC_PAK_INTEGRATION_KERNEL_DESCRIPTOR;
2624
2625 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucImemStateCmd(cmdBuffer, &imemParams));
2626
2627 // pipe mode select
2628 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
2629 pipeModeSelectParams.Mode = m_mode;
2630 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
2631
2632 // DMEM set
2633 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
2634 if (m_brcEnabled && m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ)
2635 {
2636 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakIntegrate(&dmemParams));
2637 }
2638 else
2639 {
2640 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakIntegrateCqp(&dmemParams));
2641 }
2642 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucDmemStateCmd(cmdBuffer, &dmemParams));
2643
2644 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
2645 if (m_brcEnabled && m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ)
2646 {
2647 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCPakIntegrate(&virtualAddrParams));
2648 }
2649 else
2650 {
2651 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCPakIntegrateCqp(&virtualAddrParams));
2652 }
2653
2654 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucVirtualAddrStateCmd(cmdBuffer, &virtualAddrParams));
2655
2656 // Write HUC_STATUS2 mask - bit 6 - valid IMEM loaded
2657 MHW_MI_STORE_DATA_PARAMS storeDataParams;
2658 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2659 storeDataParams.pOsResource = &m_resHucStatus2Buffer;
2660 storeDataParams.dwResourceOffset = 0;
2661 storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatus2ImemLoadedMask();
2662 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
2663
2664 // Store HUC_STATUS2 register
2665 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
2666 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
2667 storeRegParams.presStoreBuffer = &m_resHucStatus2Buffer;
2668 storeRegParams.dwOffset = sizeof(uint32_t);
2669 storeRegParams.dwRegister = mmioRegisters->hucStatus2RegOffset;
2670 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams));
2671
2672 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucStartCmd(cmdBuffer, true));
2673
2674 // wait Huc completion (use HEVC bit for now)
2675 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
2676 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
2677 vdPipeFlushParams.Flags.bFlushHEVC = 1;
2678 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
2679 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetVdencInterface()->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams));
2680
2681 // Flush the engine to ensure memory written out
2682 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2683 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2684 flushDwParams.bVideoPipelineCacheInvalidate = true;
2685 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
2686
2687 EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
2688
2689 uint32_t baseOffset =
2690 (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource
2691
2692 // Write HUC_STATUS mask
2693 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2694 storeDataParams.pOsResource = &encodeStatusBuf.resStatusBuffer;
2695 storeDataParams.dwResourceOffset = baseOffset + encodeStatusBuf.dwHuCStatusMaskOffset;
2696 storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatusReEncodeMask();
2697 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2698 cmdBuffer,
2699 &storeDataParams));
2700
2701 // store HUC_STATUS register
2702 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
2703 storeRegParams.presStoreBuffer = &encodeStatusBuf.resStatusBuffer;
2704 storeRegParams.dwOffset = baseOffset + encodeStatusBuf.dwHuCStatusRegOffset;
2705 storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
2706 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(
2707 cmdBuffer,
2708 &storeRegParams));
2709
2710 return eStatus;
2711 }
2712
Initialize(CodechalSetting * settings)2713 MOS_STATUS CodechalEncHevcStateG11::Initialize(CodechalSetting * settings)
2714 {
2715 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2716
2717 CODECHAL_ENCODE_FUNCTION_ENTER;
2718
2719 // Common initialization
2720 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::Initialize(settings));
2721
2722 m_numDelay = 15; //Value suggested by HW team.
2723 m_bmeMethodTable = (uint8_t *)m_meMethod;
2724 m_b4XMeDistortionBufferSupported = true;
2725 m_brcBuffers.dwBrcConstantSurfaceWidth = HEVC_BRC_CONSTANT_SURFACE_WIDTH_G9;
2726 m_brcBuffers.dwBrcConstantSurfaceHeight = HEVC_BRC_CONSTANT_SURFACE_HEIGHT_G10;
2727 m_brcHistoryBufferSize = HEVC_BRC_HISTORY_BUFFER_SIZE_G11;
2728 m_maxNumSlicesSupported = CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6;
2729 m_brcBuffers.dwBrcHcpPicStateSize = BRC_IMG_STATE_SIZE_PER_PASS_G11 * CODECHAL_ENCODE_BRC_MAXIMUM_NUM_PASSES;
2730
2731 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
2732 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2733 MOS_UserFeature_ReadValue_ID(
2734 nullptr,
2735 __MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID,
2736 &userFeatureData,
2737 m_osInterface->pOsContext);
2738 m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false;
2739
2740 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2741 MOS_UserFeature_ReadValue_ID(
2742 nullptr,
2743 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_REGION_NUMBER_ID,
2744 &userFeatureData,
2745 m_osInterface->pOsContext);
2746 // Region number must be greater than 1
2747 m_numberConcurrentGroup = (userFeatureData.i32Data < 1) ? 1 : userFeatureData.i32Data;
2748
2749 if (m_numberConcurrentGroup > 16)
2750 {
2751 // Region number cannot be larger than 16
2752 m_numberConcurrentGroup = 16;
2753 }
2754
2755 // Subthread number used in the ENC kernel
2756 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2757 MOS_UserFeature_ReadValue_ID(
2758 nullptr,
2759 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_SUBTHREAD_NUM_ID,
2760 &userFeatureData,
2761 m_osInterface->pOsContext);
2762 m_numberEncKernelSubThread = (userFeatureData.i32Data < 1) ? 1 : userFeatureData.i32Data;
2763
2764 if (m_numberEncKernelSubThread > m_hevcThreadTaskDataNum)
2765 {
2766 m_numberEncKernelSubThread = m_hevcThreadTaskDataNum; // support up to 2 sub-threads in one LCU64x64
2767 }
2768
2769 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2770 MOS_UserFeature_ReadValue_ID(
2771 nullptr,
2772 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_26Z_ENABLE_ID,
2773 &userFeatureData,
2774 m_osInterface->pOsContext);
2775 m_enable26WalkingPattern = (userFeatureData.i32Data) ? false : true;
2776
2777 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2778 MOS_UserFeature_ReadValue_ID(
2779 nullptr,
2780 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_RDOQ_ENABLE_ID,
2781 &userFeatureData,
2782 m_osInterface->pOsContext);
2783 m_hevcRdoqEnabled = userFeatureData.i32Data ? true : false;
2784
2785 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2786 MOS_UserFeature_ReadValue_ID(
2787 nullptr,
2788 __MEDIA_USER_FEATURE_VALUE_HEVC_VME_ENCODE_SSE_ENABLE_ID,
2789 &userFeatureData,
2790 m_osInterface->pOsContext);
2791 m_sseSupported = userFeatureData.i32Data ? true : false;
2792
2793 // Overriding the defaults here with 32 aligned dimensions
2794 // 2x Scaling WxH
2795 m_downscaledWidth2x =
2796 CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_frameWidth);
2797 m_downscaledHeight2x =
2798 CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_frameHeight);
2799
2800 // HME Scaling WxH
2801 m_downscaledWidth4x =
2802 CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_frameWidth);
2803 m_downscaledHeight4x =
2804 CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_frameHeight);
2805 m_downscaledWidthInMb4x =
2806 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth4x);
2807 m_downscaledHeightInMb4x =
2808 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight4x);
2809
2810 // SuperHME Scaling WxH
2811 m_downscaledWidth16x =
2812 CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_downscaledWidth4x);
2813 m_downscaledHeight16x =
2814 CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_downscaledHeight4x);
2815 m_downscaledWidthInMb16x =
2816 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth16x);
2817 m_downscaledHeightInMb16x =
2818 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight16x);
2819
2820 // UltraHME Scaling WxH
2821 m_downscaledWidth32x =
2822 CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_downscaledWidth16x);
2823 m_downscaledHeight32x =
2824 CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_downscaledHeight16x);
2825 m_downscaledWidthInMb32x =
2826 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth32x);
2827 m_downscaledHeightInMb32x =
2828 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight32x);
2829
2830 // disable MMCD if we enable Codechal dump. Because dump code changes the surface state from compressed to uncompressed,
2831 // this causes mis-match issue between dump is enabled or disabled.
2832 CODECHAL_DEBUG_TOOL(
2833 if (m_mmcState && m_debugInterface && m_debugInterface->m_dbgCfgHead){
2834 //m_mmcState->SetMmcDisabled();
2835 })
2836
2837 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetSystemPipeNumberCommon());
2838
2839 if (MOS_VE_SUPPORTED(m_osInterface))
2840 {
2841 m_scalabilityState = (PCODECHAL_ENCODE_SCALABILITY_STATE)MOS_AllocAndZeroMemory(sizeof(CODECHAL_ENCODE_SCALABILITY_STATE));
2842 CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalabilityState);
2843 //scalability initialize
2844 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_InitializeState(m_scalabilityState, m_hwInterface));
2845 }
2846
2847 MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
2848 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2849 statusKey = MOS_UserFeature_ReadValue_ID(
2850 nullptr,
2851 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_STITCH,
2852 &userFeatureData,
2853 m_osInterface->pOsContext);
2854 m_enableTileStitchByHW = userFeatureData.i32Data ? true : false;
2855
2856 statusKey = MOS_STATUS_SUCCESS;
2857 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2858 statusKey = MOS_UserFeature_ReadValue_ID(
2859 nullptr,
2860 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_SEMAPHORE,
2861 &userFeatureData,
2862 m_osInterface->pOsContext);
2863 m_enableHWSemaphore = userFeatureData.i32Data ? true : false;
2864
2865 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2866 statusKey = MOS_UserFeature_ReadValue_ID(
2867 nullptr,
2868 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_WP_SUPPORT_ID,
2869 &userFeatureData,
2870 m_osInterface->pOsContext);
2871 m_weightedPredictionSupported = userFeatureData.i32Data ? true : false;
2872
2873 #if (_DEBUG || _RELEASE_INTERNAL)
2874 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2875 statusKey = MOS_UserFeature_ReadValue_ID(
2876 nullptr,
2877 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE,
2878 &userFeatureData,
2879 m_osInterface->pOsContext);
2880 m_kmdVeOveride.Value = (uint64_t)userFeatureData.i64Data;
2881
2882 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2883 statusKey = MOS_UserFeature_ReadValue_ID(
2884 nullptr,
2885 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_MEDIARESET_TEST_ID,
2886 &userFeatureData,
2887 m_osInterface->pOsContext);
2888 m_enableTestMediaReset = userFeatureData.i32Data ? true : false;
2889
2890 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2891 MOS_UserFeature_ReadValue_ID(
2892 nullptr,
2893 __MEDIA_USER_FEATURE_VALUE_HEVC_VME_FORCE_SCALABILITY_ID,
2894 &userFeatureData,
2895 m_osInterface->pOsContext);
2896 m_forceScalability = userFeatureData.i32Data ? true : false;
2897
2898 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2899 MOS_UserFeature_ReadValue_ID(
2900 nullptr,
2901 __MEDIA_USER_FEATURE_VALUE_HEVC_VME_BRC_LTR_INTERVAL_ID,
2902 &userFeatureData,
2903 m_osInterface->pOsContext);
2904 m_ltrInterval = (uint32_t)(userFeatureData.i32Data);
2905
2906 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2907 MOS_UserFeature_ReadValue_ID(
2908 nullptr,
2909 __MEDIA_USER_FEATURE_VALUE_HEVC_VME_BRC_LTR_DISABLE_ID,
2910 &userFeatureData,
2911 m_osInterface->pOsContext);
2912 m_enableBrcLTR = (userFeatureData.i32Data) ? false : true;
2913 #endif
2914
2915 if (m_codecFunction != CODECHAL_FUNCTION_PAK)
2916 {
2917 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2918 MOS_UserFeature_ReadValue_ID(
2919 nullptr,
2920 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ME_ENABLE_ID,
2921 &userFeatureData,
2922 m_osInterface->pOsContext);
2923 m_hmeSupported = (userFeatureData.i32Data) ? true : false;
2924
2925 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2926 MOS_UserFeature_ReadValue_ID(
2927 nullptr,
2928 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_16xME_ENABLE_ID,
2929 &userFeatureData,
2930 m_osInterface->pOsContext);
2931 m_16xMeSupported = (userFeatureData.i32Data) ? true : false;
2932
2933 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2934 MOS_UserFeature_ReadValue_ID(
2935 nullptr,
2936 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_32xME_ENABLE_ID,
2937 &userFeatureData,
2938 m_osInterface->pOsContext);
2939 // Keeping UHME by Default ON for Gen11
2940 m_32xMeSupported = (userFeatureData.i32Data) ? false : true;
2941
2942 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2943 MOS_UserFeature_ReadValue_ID(
2944 nullptr,
2945 __MEDIA_USER_FEATURE_VALUE_HEVC_NUM_THREADS_PER_LCU_ID,
2946 &userFeatureData,
2947 m_osInterface->pOsContext);
2948 m_totalNumThreadsPerLcu = (uint16_t)userFeatureData.i32Data;
2949
2950 if (m_totalNumThreadsPerLcu < m_minThreadsPerLcuB || m_totalNumThreadsPerLcu > m_maxThreadsPerLcuB)
2951 {
2952 return MOS_STATUS_INVALID_PARAMETER;
2953 }
2954 }
2955
2956
2957 if (m_frameWidth < 128 || m_frameHeight < 128)
2958 {
2959 m_16xMeSupported = false;
2960 m_32xMeSupported = false;
2961 }
2962
2963 else if (m_frameWidth < 512 || m_frameHeight < 512)
2964 {
2965 m_32xMeSupported = false;
2966 }
2967
2968 char stringData[MOS_USER_CONTROL_MAX_DATA_SIZE];
2969 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
2970 userFeatureData.StringData.pStringData = stringData;
2971 statusKey = MOS_UserFeature_ReadValue_ID(
2972 nullptr,
2973 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_PAK_ONLY_ID,
2974 &userFeatureData,
2975 m_osInterface->pOsContext);
2976
2977 if (statusKey == MOS_STATUS_SUCCESS && userFeatureData.StringData.uSize > 0)
2978 {
2979 MOS_SecureStrcpy(m_pakOnlyDataFolder,
2980 sizeof(m_pakOnlyDataFolder) / sizeof(m_pakOnlyDataFolder[0]),
2981 stringData);
2982
2983 uint32_t len = strlen(m_pakOnlyDataFolder);
2984 if (m_pakOnlyDataFolder[len - 1] == '\\')
2985 {
2986 m_pakOnlyDataFolder[len - 1] = 0;
2987 }
2988
2989 m_pakOnlyTest = true;
2990 // PAK only mode does not need to init any kernel
2991 }
2992
2993 return eStatus;
2994 }
2995
LoadCosts(uint8_t sliceType,uint8_t qp)2996 void CodechalEncHevcStateG11::LoadCosts(uint8_t sliceType, uint8_t qp)
2997 {
2998 if (sliceType >= CODECHAL_HEVC_NUM_SLICE_TYPES)
2999 {
3000 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid slice type");
3001 sliceType = CODECHAL_HEVC_I_SLICE;
3002 }
3003
3004 double qpScale = 0.60;
3005 int32_t qpMinus12 = qp - 12;
3006 double lambda = sqrt(qpScale * pow(2.0, MOS_MAX(0, qpMinus12) / 3.0));
3007 uint8_t lcuIdx = ((m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3) == 6) ? 1 : 0;
3008 m_lambdaRD = (uint16_t)(qpScale * pow(2.0, MOS_MAX(0, qpMinus12) / 3.0) * 4 + 0.5);
3009
3010 m_modeCostCre[LUTCREMODE_INTRA_32X32] = CRECOST(lambda, LUTMODEBITS_INTRA_32X32, lcuIdx, sliceType);
3011 m_modeCostCre[LUTCREMODE_INTRA_16X16] = CRECOST(lambda, LUTMODEBITS_INTRA_16X16, lcuIdx, sliceType);
3012 m_modeCostCre[LUTCREMODE_INTRA_8X8] = CRECOST(lambda, LUTMODEBITS_INTRA_8X8, lcuIdx, sliceType);
3013 m_modeCostCre[LUTCREMODE_INTRA_CHROMA] = CRECOST(lambda, LUTMODEBITS_INTRA_CHROMA, lcuIdx, sliceType);
3014 m_modeCostCre[LUTCREMODE_INTER_32X32] = CRECOST(lambda, LUTMODEBITS_INTER_32X32, lcuIdx, sliceType);
3015 m_modeCostCre[LUTCREMODE_INTER_32X16] = CRECOST(lambda, LUTMODEBITS_INTER_32X16, lcuIdx, sliceType);
3016 m_modeCostCre[LUTCREMODE_INTER_16X16] = CRECOST(lambda, LUTMODEBITS_INTER_16X16, lcuIdx, sliceType);
3017 m_modeCostCre[LUTCREMODE_INTER_16X8] = CRECOST(lambda, LUTMODEBITS_INTER_16X8, lcuIdx, sliceType);
3018 m_modeCostCre[LUTCREMODE_INTER_8X8] = CRECOST(lambda, LUTMODEBITS_INTER_8X8, lcuIdx, sliceType);
3019 m_modeCostCre[LUTCREMODE_INTER_BIDIR] = CRECOST(lambda, LUTMODEBITS_INTER_BIDIR, lcuIdx, sliceType);
3020 m_modeCostCre[LUTCREMODE_INTER_SKIP] = CRECOST(lambda, LUTMODEBITS_INTER_SKIP, lcuIdx, sliceType);
3021 m_modeCostCre[LUTCREMODE_INTRA_NONDC_32X32] = CRECOST(lambda, LUTMODEBITS_INTRA_NONDC_32X32, lcuIdx, sliceType);
3022 m_modeCostCre[LUTCREMODE_INTRA_NONDC_16X16] = CRECOST(lambda, LUTMODEBITS_INTRA_NONDC_16X16, lcuIdx, sliceType);
3023 m_modeCostCre[LUTCREMODE_INTRA_NONDC_8X8] = CRECOST(lambda, LUTMODEBITS_INTRA_NONDC_8X8, lcuIdx, sliceType);
3024 m_modeCostCre[LUTCREMODE_INTRA_NONPRED] = CRECOST(lambda, LUTMODEBITS_INTRA_MPM, lcuIdx, sliceType);
3025
3026 m_modeCostRde[LUTRDEMODE_INTRA_64X64] = RDEBITS62(LUTMODEBITS_INTRA_64X64, lcuIdx, sliceType);
3027 m_modeCostRde[LUTRDEMODE_INTRA_32X32] = RDEBITS62(LUTMODEBITS_INTRA_32X32, lcuIdx, sliceType);
3028 m_modeCostRde[LUTRDEMODE_INTRA_16X16] = RDEBITS62(LUTMODEBITS_INTRA_16X16, lcuIdx, sliceType);
3029 m_modeCostRde[LUTRDEMODE_INTRA_8X8] = RDEBITS62(LUTMODEBITS_INTRA_8X8, lcuIdx, sliceType);
3030 m_modeCostRde[LUTRDEMODE_INTRA_NXN] = RDEBITS62(LUTMODEBITS_INTRA_NXN, lcuIdx, sliceType);
3031 m_modeCostRde[LUTRDEMODE_INTRA_MPM] = RDEBITS62(LUTMODEBITS_INTRA_MPM, lcuIdx, sliceType);
3032 m_modeCostRde[LUTRDEMODE_INTRA_DC_32X32] = RDEBITS62(LUTMODEBITS_INTRA_DC_32X32, lcuIdx, sliceType);
3033 m_modeCostRde[LUTRDEMODE_INTRA_DC_8X8] = RDEBITS62(LUTMODEBITS_INTRA_DC_8X8, lcuIdx, sliceType);
3034 m_modeCostRde[LUTRDEMODE_INTRA_NONDC_32X32] = RDEBITS62(LUTMODEBITS_INTRA_NONDC_32X32, lcuIdx, sliceType);
3035 m_modeCostRde[LUTRDEMODE_INTRA_NONDC_8X8] = RDEBITS62(LUTMODEBITS_INTRA_NONDC_8X8, lcuIdx, sliceType);
3036 m_modeCostRde[LUTRDEMODE_INTER_BIDIR] = RDEBITS62(LUTMODEBITS_INTER_BIDIR, lcuIdx, sliceType);
3037 m_modeCostRde[LUTRDEMODE_INTER_REFID] = RDEBITS62(LUTMODEBITS_INTER_REFID, lcuIdx, sliceType);
3038 m_modeCostRde[LUTRDEMODE_SKIP_64X64] = RDEBITS62(LUTMODEBITS_SKIP_64X64, lcuIdx, sliceType);
3039 m_modeCostRde[LUTRDEMODE_SKIP_32X32] = RDEBITS62(LUTMODEBITS_SKIP_32X32, lcuIdx, sliceType);
3040 m_modeCostRde[LUTRDEMODE_SKIP_16X16] = RDEBITS62(LUTMODEBITS_SKIP_16X16, lcuIdx, sliceType);
3041 m_modeCostRde[LUTRDEMODE_SKIP_8X8] = RDEBITS62(LUTMODEBITS_SKIP_8X8, lcuIdx, sliceType);
3042 m_modeCostRde[LUTRDEMODE_MERGE_64X64] = RDEBITS62(LUTMODEBITS_MERGE_64X64, lcuIdx, sliceType);
3043 m_modeCostRde[LUTRDEMODE_MERGE_32X32] = RDEBITS62(LUTMODEBITS_MERGE_32X32, lcuIdx, sliceType);
3044 m_modeCostRde[LUTRDEMODE_MERGE_16X16] = RDEBITS62(LUTMODEBITS_MERGE_16X16, lcuIdx, sliceType);
3045 m_modeCostRde[LUTRDEMODE_MERGE_8X8] = RDEBITS62(LUTMODEBITS_MERGE_8X8, lcuIdx, sliceType);
3046 m_modeCostRde[LUTRDEMODE_INTER_32X32] = RDEBITS62(LUTMODEBITS_INTER_32X32, lcuIdx, sliceType);
3047 m_modeCostRde[LUTRDEMODE_INTER_32X16] = RDEBITS62(LUTMODEBITS_INTER_32X16, lcuIdx, sliceType);
3048 m_modeCostRde[LUTRDEMODE_INTER_16X16] = RDEBITS62(LUTMODEBITS_INTER_16X16, lcuIdx, sliceType);
3049 m_modeCostRde[LUTRDEMODE_INTER_16X8] = RDEBITS62(LUTMODEBITS_INTER_16X8, lcuIdx, sliceType);
3050 m_modeCostRde[LUTRDEMODE_INTER_8X8] = RDEBITS62(LUTMODEBITS_INTER_8X8, lcuIdx, sliceType);
3051 m_modeCostRde[LUTRDEMODE_TU_DEPTH_0] = RDEBITS62(LUTMODEBITS_TU_DEPTH_0, lcuIdx, sliceType);
3052 m_modeCostRde[LUTRDEMODE_TU_DEPTH_1] = RDEBITS62(LUTMODEBITS_TU_DEPTH_1, lcuIdx, sliceType);
3053
3054 for (uint8_t i = 0; i < 8; i++)
3055 {
3056 m_modeCostRde[LUTRDEMODE_CBF + i] = RDEBITS62(LUTMODEBITS_CBF + i, lcuIdx, sliceType);
3057 }
3058 }
3059
3060 // ------------------------------------------------------------------------------
3061 //| Purpose: Setup curbe for HEVC MbEnc B Kernels
3062 //| Return: N/A
3063 //------------------------------------------------------------------------------
SetCurbeMbEncKernel()3064 MOS_STATUS CodechalEncHevcStateG11::SetCurbeMbEncKernel()
3065 {
3066 uint32_t curIdx = m_currRecycledBufIdx;
3067 MOS_LOCK_PARAMS lockFlags;
3068 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3069
3070 uint8_t tuMapping = ((m_hevcSeqParams->TargetUsage) / 3) % 3; // Map TU 1,4,6 to 0,1,2
3071
3072 // Initialize the CURBE data
3073 MBENC_CURBE curbe;
3074
3075 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
3076 {
3077 if(m_encodeParams.bMbQpDataEnabled)
3078 {
3079 curbe.QPType = QP_TYPE_CU_LEVEL; // !< Even though CQP mode, as mbqpbuffer surface is updated with Application Qp map
3080 // !< QP type should be set to QP_TYPE_CU_LEVEL for mbenc kernel to consider this surface.
3081 }
3082 else
3083 curbe.QPType = QP_TYPE_CONSTANT;
3084 curbe.ROIEnable = m_hevcPicParams->NumROI ? true : false;
3085 }
3086 else
3087 {
3088 curbe.QPType = m_lcuBrcEnabled ? QP_TYPE_CU_LEVEL : QP_TYPE_FRAME;
3089 }
3090
3091 // TU based settings
3092 curbe.EnableCu64Check = m_tuSettings[EnableCu64CheckTuParam][tuMapping];
3093 curbe.MaxNumIMESearchCenter = m_tuSettings[MaxNumIMESearchCenterTuParam][tuMapping];
3094 curbe.MaxTransformDepthInter = m_tuSettings[Log2TUMaxDepthInterTuParam][tuMapping];
3095 curbe.MaxTransformDepthIntra = m_tuSettings[Log2TUMaxDepthIntraTuParam][tuMapping];
3096 curbe.Dynamic64Order = m_tuSettings[Dynamic64OrderTuParam][tuMapping];
3097 curbe.DynamicOrderTh = m_tuSettings[DynamicOrderThTuParam][tuMapping];
3098 curbe.Dynamic64Enable = m_tuSettings[Dynamic64EnableTuParam][tuMapping];
3099 curbe.Dynamic64Th = m_tuSettings[Dynamic64ThTuParam][tuMapping];
3100 curbe.IncreaseExitThresh = m_tuSettings[IncreaseExitThreshTuParam][tuMapping];
3101 curbe.IntraSpotCheck = m_tuSettings[IntraSpotCheckFlagTuParam][tuMapping];
3102 curbe.Fake32Enable = m_tuSettings[Fake32EnableTuParam][tuMapping];
3103 curbe.Dynamic64Min32 = m_tuSettings[Dynamic64Min32][tuMapping];
3104
3105 curbe.FrameWidthInSamples = m_frameWidth;
3106 curbe.FrameHeightInSamples = m_frameHeight;
3107
3108 curbe.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
3109 curbe.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
3110 curbe.Log2MaxTUSize = m_hevcSeqParams->log2_max_transform_block_size_minus2 + 2;
3111 curbe.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
3112
3113 curbe.ChromaFormatType = m_hevcSeqParams->chroma_format_idc;
3114
3115 curbe.TUDepthControl = curbe.MaxTransformDepthInter;
3116
3117 int32_t sliceQp = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
3118 curbe.FrameQP = abs(sliceQp);
3119 curbe.FrameQPSign = (sliceQp > 0) ? 0 : 1;
3120
3121 #if 0 // no need in the optimized kernel because kernel does the table look-up
3122 LoadCosts(CODECHAL_HEVC_B_SLICE, (uint8_t)sliceQp);
3123 curbe.DW4_ModeIntra32x32Cost = m_modeCostCre[LUTCREMODE_INTRA_32X32];
3124 curbe.DW4_ModeIntraNonDC32x32Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_32X32];
3125
3126 curbe.DW5_ModeIntra16x16Cost = m_modeCostCre[LUTCREMODE_INTRA_16X16];
3127 curbe.DW5_ModeIntraNonDC16x16Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_16X16];
3128 curbe.DW5_ModeIntra8x8Cost = m_modeCostCre[LUTCREMODE_INTRA_8X8];
3129 curbe.DW5_ModeIntraNonDC8x8Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_8X8];
3130
3131 curbe.DW6_ModeIntraNonPred = m_modeCostCre[LUTCREMODE_INTRA_NONPRED];
3132
3133 curbe.DW7_ChromaIntraModeCost = m_modeCostCre[LUTCREMODE_INTRA_CHROMA];
3134
3135 curbe.DW12_IntraModeCostMPM = m_modeCostRde[LUTRDEMODE_INTRA_MPM];
3136
3137 curbe.DW13_IntraTUDept0Cost = m_modeCostRde[LUTRDEMODE_TU_DEPTH_0];
3138 curbe.DW13_IntraTUDept1Cost = m_modeCostRde[LUTRDEMODE_TU_DEPTH_1];
3139
3140 curbe.DW14_IntraTU4x4CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_4X4];
3141 curbe.DW14_IntraTU8x8CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_8X8];
3142 curbe.DW14_IntraTU16x16CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_16X16];
3143 curbe.DW14_IntraTU32x32CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_32X32];
3144 curbe.DW15_LambdaRD = (uint16_t)m_lambdaRD;
3145 curbe.DW17_IntraNonDC8x8Penalty = m_modeCostRde[LUTRDEMODE_INTRA_NONDC_8X8];
3146 curbe.DW17_IntraNonDC32x32Penalty = m_modeCostRde[LUTRDEMODE_INTRA_NONDC_32X32];
3147 #endif
3148
3149 curbe.NumofColumnTile = m_hevcPicParams->num_tile_columns_minus1 + 1;
3150 curbe.NumofRowTile = m_hevcPicParams->num_tile_rows_minus1 + 1;
3151 curbe.HMEFlag = m_hmeSupported ? 3 : 0;
3152
3153 curbe.MaxRefIdxL0 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10 - 1;
3154 curbe.MaxRefIdxL1 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L1_REF_G10 - 1;
3155 curbe.MaxBRefIdxL0 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10 - 1;
3156
3157 // Check whether Last Frame is I frame or not
3158 if (m_frameNum == 0 || m_picHeightInMb == I_TYPE || (m_frameNum && m_lastPictureCodingType==I_TYPE))
3159 {
3160 // This is the flag to notify kernel not to use the history buffer
3161 curbe.LastFrameIsIntra = true;
3162 }
3163 else
3164 {
3165 curbe.LastFrameIsIntra = false;
3166 }
3167
3168 curbe.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
3169 curbe.TemporalMvpEnableFlag = m_hevcSliceParams->slice_temporal_mvp_enable_flag;
3170 curbe.CollocatedFromL0Flag = m_hevcSliceParams->collocated_from_l0_flag;
3171 curbe.theSameRefList = m_sameRefList;
3172 curbe.IsLowDelay = m_lowDelay;
3173 curbe.NumRefIdxL0 = m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1;
3174 curbe.NumRefIdxL1 = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? 0 : (m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1);
3175 if (m_hevcSeqParams->TargetUsage == 1)
3176 {
3177 // MaxNumMergeCand C Model uses 4 for TU1,
3178 // for quality consideration, make sure not larger than the value from App as it will be used in PAK
3179 curbe.MaxNumMergeCand = MOS_MIN(m_hevcSliceParams->MaxNumMergeCand, 4);
3180 }
3181 else
3182 {
3183 // MaxNumMergeCand C Model uses 2 for TU4 and TU7,
3184 // for quality consideration, make sure not larger than the value from App as it will be used in PAK
3185 curbe.MaxNumMergeCand = MOS_MIN(m_hevcSliceParams->MaxNumMergeCand, 2);
3186 }
3187
3188 int32_t tbRefListL0[CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10] = { 0 }, tbRefListL1[CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L1_REF_G10] = {0};
3189 curbe.FwdPocNumber_L0_mTb_0 = tbRefListL0[0] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][0]);
3190 curbe.BwdPocNumber_L1_mTb_0 = tbRefListL1[0] = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][0]) : ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][0]);
3191 curbe.FwdPocNumber_L0_mTb_1 = tbRefListL0[1] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][1]);
3192 curbe.BwdPocNumber_L1_mTb_1 = tbRefListL1[1] = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][1]) : ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][1]);
3193
3194 curbe.FwdPocNumber_L0_mTb_2 = tbRefListL0[2] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][2]);
3195 curbe.BwdPocNumber_L1_mTb_2 = tbRefListL1[2] = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][2]) : ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][2]);
3196 curbe.FwdPocNumber_L0_mTb_3 = tbRefListL0[3] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][3]);
3197 curbe.BwdPocNumber_L1_mTb_3 = tbRefListL1[3] = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][3]) : ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][3]);
3198
3199 curbe.RefFrameWinHeight = m_frameHeight;
3200 curbe.RefFrameWinWidth = m_frameWidth;
3201
3202 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::GetRoundingIntraInterToUse());
3203
3204 curbe.RoundingInter = (m_roundingInterInUse + 1) << 4; // Should be an input from par in the cmodel (slice state)
3205 curbe.RoundingIntra = (m_roundingIntraInUse + 1) << 4; // Should be an input from par in the cmodel (slice state)
3206 curbe.RDEQuantRoundValue = (m_roundingInterInUse + 1) << 4;
3207
3208 uint32_t gopB = m_hevcSeqParams->GopRefDist;
3209
3210 curbe.CostScalingForRA = 1; // default setting
3211
3212 // get the min distance between current pic and ref pics
3213 uint32_t minPocDist = 255;
3214 uint32_t costTableIndex = 0;
3215
3216 if (curbe.SliceType == CODECHAL_ENCODE_HEVC_B_SLICE)
3217 {
3218 if (curbe.CostScalingForRA == 1)
3219 {
3220 for (uint8_t ref = 0; ref < curbe.NumRefIdxL0; ref++)
3221 {
3222 if ((uint32_t)abs(tbRefListL0[ref]) < minPocDist)
3223 minPocDist = abs(tbRefListL0[ref]);
3224 }
3225 for (uint8_t ref = 0; ref < curbe.NumRefIdxL1; ref++)
3226 {
3227 if ((uint32_t)abs(tbRefListL1[ref]) < minPocDist)
3228 minPocDist = abs(tbRefListL1[ref]);
3229 }
3230
3231 if (gopB == 4)
3232 {
3233 costTableIndex = minPocDist;
3234 if (minPocDist == 4)
3235 costTableIndex -= 1;
3236 }
3237 if (gopB == 8)
3238 {
3239 costTableIndex = minPocDist + 3;
3240 if (minPocDist == 4)
3241 costTableIndex -= 1;
3242 if (minPocDist == 8)
3243 costTableIndex -= 4;
3244 }
3245 }
3246 }
3247 else if (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE)
3248 {
3249 costTableIndex = 8;
3250 }
3251 else
3252 {
3253 costTableIndex = 9;
3254 }
3255
3256 curbe.CostTableIndex = costTableIndex;
3257
3258 // the following fields are needed by the new optimized kernel in v052417
3259 curbe.Log2ParallelMergeLevel = m_hevcPicParams->log2_parallel_merge_level_minus2 + 2;
3260 curbe.MaxIntraRdeIter = 1;
3261 curbe.CornerNeighborPixel = 0;
3262 curbe.IntraNeighborAvailFlags = 0;
3263 curbe.SubPelMode = 3; // qual-pel search
3264 curbe.InterSADMeasure = 2; // Haar transform
3265 curbe.IntraSADMeasure = 2; // Haar transform
3266 curbe.IntraPrediction = 0; // enable 32x32, 16x16, and 8x8 luma intra prediction
3267 curbe.RefIDCostMode = 1; // 0: AVC and 1: linear method
3268 curbe.TUBasedCostSetting = 0;
3269 curbe.ConcurrentGroupNum = m_numberConcurrentGroup;
3270 curbe.NumofUnitInWaveFront = m_numWavefrontInOneRegion;
3271 curbe.LoadBalenceEnable = 0; // when this flag is false, kernel does not use LoadBalance (or MBENC_B_FRAME_CONCURRENT_TG_DATA) buffe
3272 curbe.ThreadNumber = MOS_MIN(2, m_numberEncKernelSubThread);
3273 curbe.Pic_init_qp_B = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
3274 curbe.Pic_init_qp_P = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
3275 curbe.Pic_init_qp_I = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
3276 curbe.WaveFrontSplitsEnable = (m_numberConcurrentGroup == 1) ? false : true;
3277 curbe.SuperHME = m_16xMeSupported;
3278 curbe.UltraHME = m_32xMeSupported;
3279 curbe.PerBFrameQPOffset = 0;
3280
3281 switch (m_hevcSeqParams->TargetUsage)
3282 {
3283 case 1:
3284 curbe.Degree45 = 0;
3285 curbe.Break12Dependency = 0;
3286 curbe.DisableTemporal16and8 = 0;
3287 break;
3288 case 4:
3289 curbe.Degree45 = 1;
3290 curbe.Break12Dependency = 1;
3291 curbe.DisableTemporal16and8 = 0;
3292 break;
3293 default:
3294 curbe.Degree45 = 1;
3295 curbe.Break12Dependency = 1;
3296 curbe.DisableTemporal16and8 = 1;
3297 break;
3298 }
3299
3300 curbe.LongTermReferenceFlags_L0 = 0;
3301 for (uint32_t i = 0; i < curbe.NumRefIdxL0; i++)
3302 {
3303 curbe.LongTermReferenceFlags_L0 |= (m_hevcSliceParams->RefPicList[0][i].PicFlags & PICTURE_LONG_TERM_REFERENCE) << i;
3304 }
3305 curbe.LongTermReferenceFlags_L1 = 0;
3306 for (uint32_t i = 0; i < curbe.NumRefIdxL1; i++)
3307 {
3308 curbe.LongTermReferenceFlags_L1 |= (m_hevcSliceParams->RefPicList[1][i].PicFlags & PICTURE_LONG_TERM_REFERENCE) << i;
3309 }
3310
3311 curbe.Stepping = 0;
3312 curbe.Cu64SkipCheckOnly = 0;
3313 curbe.Cu642Nx2NCheckOnly = 0;
3314 curbe.EnableCu64AmpCheck = 1;
3315 curbe.IntraSpeedMode = 0; // 35 mode
3316 curbe.DisableIntraNxN = 0;
3317
3318 #if 0 //needed only when using A stepping on simu/emu
3319 curbe.Stepping = 1;
3320 curbe.TUDepthControl = 1;
3321 curbe.MaxTransformDepthInter = 1;
3322 curbe.MaxTransformDepthIntra = 0;
3323 curbe.Cu64SkipCheckOnly = 0;
3324 curbe.Cu642Nx2NCheckOnly = 1;
3325 curbe.EnableCu64AmpCheck = 0;
3326 curbe.DisableIntraNxN = 1;
3327 curbe.MaxNumMergeCand = 1;
3328 #endif
3329
3330 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3331 lockFlags.WriteOnly = 1;
3332 auto buf = (PMBENC_COMBINED_BUFFER1)m_osInterface->pfnLockResource(
3333 m_osInterface,
3334 &m_encBCombinedBuffer1[curIdx].sResource,
3335 &lockFlags);
3336 CODECHAL_ENCODE_CHK_NULL_RETURN(buf);
3337
3338 if(curbe.Degree45)
3339 {
3340 MOS_ZeroMemory(&buf->concurrent, sizeof(buf->concurrent));
3341 }
3342 buf->Curbe = curbe;
3343
3344 m_osInterface->pfnUnlockResource(
3345 m_osInterface,
3346 &m_encBCombinedBuffer1[curIdx].sResource);
3347
3348 if(m_initEncConstTable)
3349 {
3350 // Initialize the Enc Constant Table surface
3351 MOS_LOCK_PARAMS lockFlags;
3352 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3353 lockFlags.WriteOnly = 1;
3354
3355 auto data = (uint8_t*)m_osInterface->pfnLockResource(
3356 m_osInterface,
3357 &m_encConstantTableForB.sResource,
3358 &lockFlags);
3359 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
3360
3361 if (m_isMaxLcu64)
3362 {
3363 MOS_SecureMemcpy(data, m_encConstantTableForB.dwSize,
3364 (const void*)m_encLcu64ConstantDataLut, sizeof(m_encLcu64ConstantDataLut));
3365 }
3366 else
3367 {
3368 MOS_SecureMemcpy(data, m_encConstantTableForB.dwSize,
3369 (const void*)m_encLcu32ConstantDataLut, sizeof(m_encLcu32ConstantDataLut));
3370 }
3371
3372 m_osInterface->pfnUnlockResource(
3373 m_osInterface,
3374 &m_encConstantTableForB.sResource);
3375 m_initEncConstTable = false;
3376 }
3377
3378 // binding table index
3379 MBENC_COMBINED_BTI params;
3380 if (m_isMaxLcu64)
3381 {
3382 for (uint32_t i = 0; i < MAX_MULTI_FRAME_NUMBER; i++)
3383 {
3384 params.BTI_LCU64.Combined1DSurIndexMF1[i] = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER1;
3385 params.BTI_LCU64.Combined1DSurIndexMF2[i] = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER2;
3386 params.BTI_LCU64.VMEInterPredictionSurfIndexMF[i] = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0;
3387 params.BTI_LCU64.SrcSurfIndexMF[i] = MBENC_B_FRAME_CURR_Y;
3388 params.BTI_LCU64.SrcReconSurfIndexMF[i] = MBENC_B_FRAME_CURR_Y_WITH_RECON_BOUNDARY_PIX;
3389 params.BTI_LCU64.CURecordSurfIndexMF[i] = MBENC_B_FRAME_ENC_CU_RECORD;
3390 params.BTI_LCU64.PAKObjectSurfIndexMF[i] = MBENC_B_FRAME_PAK_OBJ;
3391 params.BTI_LCU64.CUPacketSurfIndexMF[i] = MBENC_B_FRAME_PAK_CU_RECORD;
3392 params.BTI_LCU64.SWScoreBoardSurfIndexMF[i] = MBENC_B_FRAME_SW_SCOREBOARD;
3393 params.BTI_LCU64.QPCU16SurfIndexMF[i] = MBENC_B_FRAME_CU_QP_DATA;
3394 params.BTI_LCU64.LCULevelDataSurfIndexMF[i] = MBENC_B_FRAME_LCU_LEVEL_DATA_INPUT;
3395 params.BTI_LCU64.TemporalMVSurfIndexMF[i] = MBENC_B_FRAME_COLOCATED_CU_MV_DATA;
3396 params.BTI_LCU64.HmeDataSurfIndexMF[i] = MBENC_B_FRAME_HME_MOTION_PREDICTOR_DATA;
3397 params.BTI_LCU64.VME2XInterPredictionSurfIndexMF[i] = MBENC_B_FRAME_VME_PRED_FOR_2X_DS_CURR;
3398 }
3399 params.BTI_LCU64.DebugSurfIndexMF[0] = MBENC_B_FRAME_DEBUG_SURFACE;
3400 params.BTI_LCU64.DebugSurfIndexMF[1] = MBENC_B_FRAME_DEBUG_SURFACE1;
3401 params.BTI_LCU64.DebugSurfIndexMF[2] = MBENC_B_FRAME_DEBUG_SURFACE2;
3402 params.BTI_LCU64.DebugSurfIndexMF[3] = MBENC_B_FRAME_DEBUG_SURFACE3;
3403 params.BTI_LCU64.HEVCCnstLutSurfIndex = MBENC_B_FRAME_ENC_CONST_TABLE;
3404 params.BTI_LCU64.LoadBalenceSurfIndex = MBENC_B_FRAME_CONCURRENT_TG_DATA;
3405 }
3406 else
3407 {
3408 for (uint32_t i = 0; i < MAX_MULTI_FRAME_NUMBER; i++)
3409 {
3410 params.BTI_LCU32.Combined1DSurIndexMF1[i] = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER1;
3411 params.BTI_LCU32.Combined1DSurIndexMF2[i] = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER2;
3412 params.BTI_LCU32.VMEInterPredictionSurfIndexMF[i] = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0;
3413 params.BTI_LCU32.SrcSurfIndexMF[i] = MBENC_B_FRAME_CURR_Y;
3414 params.BTI_LCU32.SrcReconSurfIndexMF[i] = MBENC_B_FRAME_CURR_Y_WITH_RECON_BOUNDARY_PIX;
3415 params.BTI_LCU32.CURecordSurfIndexMF[i] = MBENC_B_FRAME_ENC_CU_RECORD;
3416 params.BTI_LCU32.PAKObjectSurfIndexMF[i] = MBENC_B_FRAME_PAK_OBJ;
3417 params.BTI_LCU32.CUPacketSurfIndexMF[i] = MBENC_B_FRAME_PAK_CU_RECORD;
3418 params.BTI_LCU32.SWScoreBoardSurfIndexMF[i] = MBENC_B_FRAME_SW_SCOREBOARD;
3419 params.BTI_LCU32.QPCU16SurfIndexMF[i] = MBENC_B_FRAME_CU_QP_DATA;
3420 params.BTI_LCU32.LCULevelDataSurfIndexMF[i] = MBENC_B_FRAME_LCU_LEVEL_DATA_INPUT;
3421 params.BTI_LCU32.TemporalMVSurfIndexMF[i] = MBENC_B_FRAME_COLOCATED_CU_MV_DATA;
3422 params.BTI_LCU32.HmeDataSurfIndexMF[i] = MBENC_B_FRAME_HME_MOTION_PREDICTOR_DATA;
3423 }
3424 params.BTI_LCU32.DebugSurfIndexMF[0] = MBENC_B_FRAME_DEBUG_SURFACE;
3425 params.BTI_LCU32.DebugSurfIndexMF[1] = MBENC_B_FRAME_DEBUG_SURFACE1;
3426 params.BTI_LCU32.DebugSurfIndexMF[2] = MBENC_B_FRAME_DEBUG_SURFACE2;
3427 params.BTI_LCU32.DebugSurfIndexMF[3] = MBENC_B_FRAME_DEBUG_SURFACE3;
3428 params.BTI_LCU32.HEVCCnstLutSurfIndex = MBENC_B_FRAME_ENC_CONST_TABLE;
3429 params.BTI_LCU32.LoadBalenceSurfIndex = MBENC_B_FRAME_CONCURRENT_TG_DATA;
3430 }
3431
3432 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
3433 PMHW_KERNEL_STATE kernelState = m_isMaxLcu64 ? &m_mbEncKernelStates[MBENC_LCU64_KRNIDX] : &m_mbEncKernelStates[MBENC_LCU32_KRNIDX];
3434 CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
3435 ¶ms,
3436 kernelState->dwCurbeOffset,
3437 sizeof(params)));
3438
3439 return eStatus;
3440 }
3441
3442 // ------------------------------------------------------------------------------
3443 //| Purpose: Setup curbe for HEVC BrcInitReset Kernel
3444 //| Return: N/A
3445 //------------------------------------------------------------------------------
SetCurbeBrcInitReset(CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx)3446 MOS_STATUS CodechalEncHevcStateG11::SetCurbeBrcInitReset(
3447 CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx)
3448 {
3449 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3450
3451 CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
3452
3453 if (brcKrnIdx != CODECHAL_HEVC_BRC_INIT && brcKrnIdx != CODECHAL_HEVC_BRC_RESET)
3454 {
3455 CODECHAL_ENCODE_ASSERTMESSAGE("Brc kernel requested is not init or reset\n");
3456 return MOS_STATUS_INVALID_PARAMETER;
3457 }
3458
3459 // Initialize the CURBE data
3460 BRC_INITRESET_CURBE curbe = m_brcInitResetCurbeInit;
3461
3462 uint32_t profileLevelMaxFrame = GetProfileLevelMaxFrameSize();
3463
3464 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR ||
3465 m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR ||
3466 m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
3467 {
3468 if (m_hevcSeqParams->InitVBVBufferFullnessInBit == 0)
3469 {
3470 CODECHAL_ENCODE_ASSERTMESSAGE("Initial VBV Buffer Fullness is zero\n");
3471 return MOS_STATUS_INVALID_PARAMETER;
3472 }
3473
3474 if (m_hevcSeqParams->VBVBufferSizeInBit == 0)
3475 {
3476 CODECHAL_ENCODE_ASSERTMESSAGE("VBV buffer size in bits is zero\n");
3477 return MOS_STATUS_INVALID_PARAMETER;
3478 }
3479 }
3480
3481 curbe.DW0_ProfileLevelMaxFrame = profileLevelMaxFrame;
3482 curbe.DW1_InitBufFull = m_hevcSeqParams->InitVBVBufferFullnessInBit;
3483 curbe.DW2_BufSize = m_hevcSeqParams->VBVBufferSizeInBit;
3484 curbe.DW3_TargetBitRate = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS; //DDI in Kbits
3485 curbe.DW4_MaximumBitRate = m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS;
3486 curbe.DW5_MinimumBitRate = 0;
3487 curbe.DW6_FrameRateM = m_hevcSeqParams->FrameRate.Numerator;
3488 curbe.DW7_FrameRateD = m_hevcSeqParams->FrameRate.Denominator;
3489 curbe.DW8_BRCFlag = BRCINIT_IGNORE_PICTURE_HEADER_SIZE; // always ignore the picture header size set in BRC Update curbe
3490 if (m_hevcPicParams->NumROI)
3491 {
3492 curbe.DW8_BRCFlag |= BRCINIT_DISABLE_MBBRC; // BRC ROI need disable MBBRC logic in LcuBrc Kernel
3493 }
3494 else
3495 {
3496 curbe.DW8_BRCFlag |= (m_lcuBrcEnabled) ? 0 : BRCINIT_DISABLE_MBBRC;
3497 }
3498 curbe.DW8_BRCFlag |= (m_brcEnabled && m_numPipe > 1) ? BRCINIT_USEHUCBRC : 0;
3499
3500 // For non-ICQ, ACQP Buffer always set to 1
3501 curbe.DW25_ACQPBuffer = 1;
3502
3503 curbe.DW25_SlidingWindowSize = m_slidingWindowSize;
3504
3505 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR)
3506 {
3507 curbe.DW4_MaximumBitRate = curbe.DW3_TargetBitRate;
3508 curbe.DW8_BRCFlag |= BRCINIT_ISCBR;
3509 }
3510 else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR)
3511 {
3512 if (curbe.DW4_MaximumBitRate < curbe.DW3_TargetBitRate)
3513 {
3514 curbe.DW4_MaximumBitRate = 2 * curbe.DW3_TargetBitRate;
3515 }
3516 curbe.DW8_BRCFlag |= BRCINIT_ISVBR;
3517 }
3518 else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
3519 {
3520 curbe.DW8_BRCFlag |= BRCINIT_ISAVBR;
3521 // For AVBR, max bitrate = target bitrate,
3522 curbe.DW3_TargetBitRate = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS; //DDI in Kbits
3523 curbe.DW4_MaximumBitRate = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
3524 }
3525 else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_ICQ)
3526 {
3527 curbe.DW8_BRCFlag |= BRCINIT_ISICQ;
3528 curbe.DW25_ACQPBuffer = m_hevcSeqParams->ICQQualityFactor;
3529 }
3530 else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_VCM)
3531 {
3532 curbe.DW4_MaximumBitRate = curbe.DW3_TargetBitRate;
3533 curbe.DW8_BRCFlag |= BRCINIT_ISVCM;
3534 }
3535 else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
3536 {
3537 curbe.DW8_BRCFlag = BRCINIT_ISCQP;
3538 }
3539 else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_QVBR)
3540 {
3541 if (curbe.DW4_MaximumBitRate < curbe.DW3_TargetBitRate)
3542 {
3543 curbe.DW4_MaximumBitRate = curbe.DW3_TargetBitRate; // Use max bit rate for HRD compliance
3544 }
3545 curbe.DW8_BRCFlag = curbe.DW8_BRCFlag | BRCINIT_ISQVBR | BRCINIT_ISVBR; // We need to make sure that VBR is used for QP determination.
3546 // use ICQQualityFactor to determine the larger Qp for each MB
3547 curbe.DW25_ACQPBuffer = m_hevcSeqParams->ICQQualityFactor;
3548 }
3549
3550 curbe.DW9_FrameWidth = m_oriFrameWidth;
3551 curbe.DW10_FrameHeight = m_oriFrameHeight;
3552 curbe.DW10_AVBRAccuracy = m_usAvbrAccuracy;
3553 curbe.DW11_AVBRConvergence = m_usAvbrConvergence;
3554 curbe.DW12_NumberSlice = m_numSlices;
3555
3556 /**********************************************************************
3557 In case of non-HB/BPyramid Structure
3558 BRC_Param_A = GopP
3559 BRC_Param_B = GopB
3560 In case of HB/BPyramid GOP Structure
3561 BRC_Param_A, BRC_Param_B, BRC_Param_C, BRC_Param_D are
3562 BRC Parameters set as follows as per CModel equation
3563 ***********************************************************************/
3564 // BPyramid GOP
3565 m_hevcSeqParams->GopRefDist = m_hevcSeqParams->GopRefDist == 0 ? 1 : m_hevcSeqParams->GopRefDist;
3566 if (m_hevcSeqParams->NumOfBInGop[1] != 0 || m_hevcSeqParams->NumOfBInGop[2] != 0)
3567 {
3568 curbe.DW8_BRCGopP = ((m_hevcSeqParams->GopPicSize) / m_hevcSeqParams->GopRefDist);
3569 curbe.DW9_BRCGopB = curbe.DW8_BRCGopP;
3570 curbe.DW13_BRCGopB1 = curbe.DW8_BRCGopP * 2;
3571 curbe.DW14_BRCGopB2 = ((m_hevcSeqParams->GopPicSize) - (curbe.DW8_BRCGopP) - (curbe.DW13_BRCGopB1) - (curbe.DW9_BRCGopB));
3572 // B1 Level GOP
3573 if (m_hevcSeqParams->NumOfBInGop[2] == 0)
3574 {
3575 curbe.DW14_MaxBRCLevel = 3;
3576 }
3577 // B2 Level GOP
3578 else
3579 {
3580 curbe.DW14_MaxBRCLevel = 4;
3581 }
3582 }
3583 // For Regular GOP - No BPyramid
3584 else
3585 {
3586 curbe.DW14_MaxBRCLevel = 1;
3587 curbe.DW8_BRCGopP = (m_hevcSeqParams->GopRefDist) ? ((m_hevcSeqParams->GopPicSize - 1) / m_hevcSeqParams->GopRefDist) : 0;
3588 curbe.DW9_BRCGopB = m_hevcSeqParams->GopPicSize - 1 - curbe.DW8_BRCGopP;
3589 }
3590
3591 // Set dynamic thresholds
3592 double inputBitsPerFrame = (double)((double)curbe.DW4_MaximumBitRate * (double)curbe.DW7_FrameRateD);
3593 inputBitsPerFrame = (double)(inputBitsPerFrame / curbe.DW6_FrameRateM);
3594
3595 if (curbe.DW2_BufSize < (uint32_t)inputBitsPerFrame * 4)
3596 {
3597 curbe.DW2_BufSize = (uint32_t)inputBitsPerFrame * 4;
3598 }
3599
3600 if (curbe.DW1_InitBufFull == 0)
3601 {
3602 curbe.DW1_InitBufFull = 7 * curbe.DW2_BufSize / 8;
3603 }
3604 if (curbe.DW1_InitBufFull < (uint32_t)(inputBitsPerFrame * 2))
3605 {
3606 curbe.DW1_InitBufFull = (uint32_t)(inputBitsPerFrame * 2);
3607 }
3608 if (curbe.DW1_InitBufFull > curbe.DW2_BufSize)
3609 {
3610 curbe.DW1_InitBufFull = curbe.DW2_BufSize;
3611 }
3612
3613 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
3614 {
3615 // For AVBR, Buffer size = 2*Bitrate, InitVBV = 0.75 * BufferSize
3616 curbe.DW2_BufSize = 2 * m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
3617 curbe.DW1_InitBufFull = (uint32_t)(0.75 * curbe.DW2_BufSize);
3618 }
3619
3620
3621 if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)
3622 {
3623 curbe.DW15_LongTermInterval = 0; // no LTR for low delay brc
3624 }
3625 else
3626 {
3627 curbe.DW15_LongTermInterval = (m_enableBrcLTR && m_ltrInterval) ? m_ltrInterval : m_enableBrcLTR ? HEVC_BRC_LONG_TERM_REFRENCE_FLAG : 0;
3628 }
3629
3630 double bpsRatio = ( (double) inputBitsPerFrame / ( (double)(curbe.DW2_BufSize) / 30));
3631 bpsRatio = (bpsRatio < 0.1) ? 0.1 : (bpsRatio > 3.5) ? 3.5 : bpsRatio;
3632
3633 curbe.DW19_DeviationThreshold0_PBframe = (uint32_t)(-50 * pow(0.90, bpsRatio));
3634 curbe.DW19_DeviationThreshold1_PBframe = (uint32_t)(-50 * pow(0.66, bpsRatio));
3635 curbe.DW19_DeviationThreshold2_PBframe = (uint32_t)(-50 * pow(0.46, bpsRatio));
3636 curbe.DW19_DeviationThreshold3_PBframe = (uint32_t)(-50 * pow(0.3, bpsRatio));
3637
3638 curbe.DW20_DeviationThreshold4_PBframe = (uint32_t)(50 * pow(0.3, bpsRatio));
3639 curbe.DW20_DeviationThreshold5_PBframe = (uint32_t)(50 * pow(0.46, bpsRatio));
3640 curbe.DW20_DeviationThreshold6_PBframe = (uint32_t)(50 * pow(0.7, bpsRatio));
3641 curbe.DW20_DeviationThreshold7_PBframe = (uint32_t)(50 * pow(0.9, bpsRatio));
3642
3643 curbe.DW21_DeviationThreshold0_VBRcontrol = (uint32_t)(-50 * pow(0.9, bpsRatio));
3644 curbe.DW21_DeviationThreshold1_VBRcontrol = (uint32_t)(-50 * pow(0.7, bpsRatio));
3645 curbe.DW21_DeviationThreshold2_VBRcontrol = (uint32_t)(-50 * pow(0.5, bpsRatio));
3646 curbe.DW21_DeviationThreshold3_VBRcontrol = (uint32_t)(-50 * pow(0.3, bpsRatio));
3647
3648 curbe.DW22_DeviationThreshold4_VBRcontrol = (uint32_t)(100 * pow(0.4, bpsRatio));
3649 curbe.DW22_DeviationThreshold5_VBRcontrol = (uint32_t)(100 * pow(0.5, bpsRatio));
3650 curbe.DW22_DeviationThreshold6_VBRcontrol = (uint32_t)(100 * pow(0.75, bpsRatio));
3651 curbe.DW22_DeviationThreshold7_VBRcontrol = (uint32_t)(100 * pow(0.9, bpsRatio));
3652
3653 curbe.DW23_DeviationThreshold0_Iframe = (uint32_t)(-50 * pow(0.8, bpsRatio));
3654 curbe.DW23_DeviationThreshold1_Iframe = (uint32_t)(-50 * pow(0.6, bpsRatio));
3655 curbe.DW23_DeviationThreshold2_Iframe = (uint32_t)(-50 * pow(0.34, bpsRatio));
3656 curbe.DW23_DeviationThreshold3_Iframe = (uint32_t)(-50 * pow(0.2, bpsRatio));
3657
3658 curbe.DW24_DeviationThreshold4_Iframe = (uint32_t)(50 * pow(0.2, bpsRatio));
3659 curbe.DW24_DeviationThreshold5_Iframe = (uint32_t)(50 * pow(0.4, bpsRatio));
3660 curbe.DW24_DeviationThreshold6_Iframe = (uint32_t)(50 * pow(0.66, bpsRatio));
3661 curbe.DW24_DeviationThreshold7_Iframe = (uint32_t)(50 * pow(0.9, bpsRatio));
3662
3663 curbe.DW26_RandomAccess = (m_hevcSeqParams->HierarchicalFlag && !m_hevcSeqParams->LowDelayMode) ? true : false;
3664
3665 if (m_brcInit)
3666 {
3667 m_dBrcInitCurrentTargetBufFullInBits = curbe.DW1_InitBufFull;
3668 }
3669
3670 m_brcInitResetBufSizeInBits = curbe.DW2_BufSize;
3671 m_dBrcInitResetInputBitsPerFrame = inputBitsPerFrame;
3672
3673 PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
3674 CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
3675 &curbe,
3676 kernelState->dwCurbeOffset,
3677 sizeof(curbe)));
3678
3679 return eStatus;
3680 }
3681
3682 // ------------------------------------------------------------------------------
3683 //| Purpose: Setup curbe for HEVC BrcUpdate Kernel
3684 //| Return: N/A
3685 //------------------------------------------------------------------------------
SetCurbeBrcUpdate(CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx)3686 MOS_STATUS CodechalEncHevcStateG11::SetCurbeBrcUpdate(
3687 CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx)
3688 {
3689 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3690
3691 if (brcKrnIdx != CODECHAL_HEVC_BRC_FRAME_UPDATE && brcKrnIdx != CODECHAL_HEVC_BRC_LCU_UPDATE)
3692 {
3693 CODECHAL_ENCODE_ASSERTMESSAGE("Brc kernel requested is not frame update or LCU update\n");
3694 return MOS_STATUS_INVALID_PARAMETER;
3695 }
3696
3697 CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
3698
3699 // Initialize the CURBE data
3700 BRCUPDATE_CURBE curbe = m_brcUpdateCurbeInit;
3701
3702 curbe.DW5_TargetSize_Flag = 0;
3703
3704 if (m_dBrcInitCurrentTargetBufFullInBits > (double)m_brcInitResetBufSizeInBits)
3705 {
3706 m_dBrcInitCurrentTargetBufFullInBits -= (double)m_brcInitResetBufSizeInBits;
3707 curbe.DW5_TargetSize_Flag = 1;
3708 }
3709
3710 if (m_numSkipFrames)
3711 {
3712 // pass num/size of skipped frames to update BRC
3713 curbe.DW6_NumSkippedFrames = m_numSkipFrames;
3714 curbe.DW15_SizeOfSkippedFrames = m_sizeSkipFrames;
3715
3716 // account for skipped frame in calculating CurrentTargetBufFullInBits
3717 m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame * m_numSkipFrames;
3718 }
3719
3720 curbe.DW0_TargetSize = (uint32_t)(m_dBrcInitCurrentTargetBufFullInBits);
3721 curbe.DW1_FrameNumber = m_storeData - 1; // Check if we can remove this is unused (set to 0)
3722
3723 // BRC PAK statistic buffer from last frame, the encoded size includes header already.
3724 // in BRC Initreset kernel, curbe DW8_BRCFlag will always ignore picture header size, so no need to set picture header size here.
3725 curbe.DW2_PictureHeaderSize = 0;
3726
3727 curbe.DW5_CurrFrameBrcLevel = m_currFrameBrcLevel;
3728 curbe.DW5_MaxNumPAKs = m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses();
3729
3730 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
3731 {
3732 curbe.DW6_CqpValue = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
3733 }
3734 if (m_hevcPicParams->NumROI)
3735 {
3736 curbe.DW6_ROIEnable = m_brcEnabled ? false : true;
3737 curbe.DW6_BRCROIEnable = m_brcEnabled ? true : false;
3738 curbe.DW6_RoiRatio = CalculateROIRatio();
3739 }
3740 curbe.DW6_SlidingWindowEnable = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_LOW);
3741
3742 //for low delay brc
3743 curbe.DW6_LowDelayEnable = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW);
3744 curbe.DW16_UserMaxFrameSize = GetProfileLevelMaxFrameSize();
3745
3746 curbe.DW14_ParallelMode = m_hevcSeqParams->ParallelBRC;
3747
3748 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
3749 {
3750 curbe.DW3_StartGAdjFrame0 = (uint32_t)((10 * m_usAvbrConvergence) / (double)150);
3751 curbe.DW3_StartGAdjFrame1 = (uint32_t)((50 * m_usAvbrConvergence) / (double)150);
3752 curbe.DW4_StartGAdjFrame2 = (uint32_t)((100 * m_usAvbrConvergence) / (double)150);
3753 curbe.DW4_StartGAdjFrame3 = (uint32_t)((150 * m_usAvbrConvergence) / (double)150);
3754
3755 curbe.DW11_gRateRatioThreshold0 =
3756 (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 40)));
3757 curbe.DW11_gRateRatioThreshold1 =
3758 (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 75)));
3759 curbe.DW12_gRateRatioThreshold2 = (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 97)));
3760 curbe.DW12_gRateRatioThreshold3 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (103 - 100)));
3761 curbe.DW12_gRateRatioThreshold4 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (125 - 100)));
3762 curbe.DW12_gRateRatioThreshold5 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (160 - 100)));
3763 }
3764
3765 if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)
3766 {
3767 curbe.DW17_LongTerm_Current = 0; // no LTR for low delay brc
3768 }
3769 else
3770 {
3771 m_isFrameLTR = (CodecHal_PictureIsLongTermRef(m_currReconstructedPic));
3772 curbe.DW17_LongTerm_Current = (m_enableBrcLTR && m_isFrameLTR) ? 1 : 0;
3773 }
3774
3775 PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
3776 CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
3777 &curbe,
3778 kernelState->dwCurbeOffset,
3779 sizeof(curbe)));
3780
3781 return eStatus;
3782 }
3783
SendMbEncSurfacesKernel(PMOS_COMMAND_BUFFER cmdBuffer)3784 MOS_STATUS CodechalEncHevcStateG11::SendMbEncSurfacesKernel(
3785 PMOS_COMMAND_BUFFER cmdBuffer)
3786 {
3787 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3788
3789 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
3790 PMHW_KERNEL_STATE kernelState = m_isMaxLcu64 ? &m_mbEncKernelStates[MBENC_LCU64_KRNIDX] : &m_mbEncKernelStates[MBENC_LCU32_KRNIDX];
3791
3792 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelBindingTable);
3793 PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = m_isMaxLcu64 ? &m_mbEncKernelBindingTable[MBENC_LCU64_KRNIDX] : &m_mbEncKernelBindingTable[MBENC_LCU32_KRNIDX];
3794
3795 PMOS_SURFACE inputSurface = m_rawSurfaceToEnc;
3796 uint32_t startBTI = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0;
3797 CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
3798
3799 // Combined 1D buffer 1, which contains regular kernel curbe and concurrent map
3800 startBTI = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER1;
3801 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
3802 &surfaceCodecParams,
3803 &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource,
3804 MOS_BYTES_TO_DWORDS(m_encBCombinedBuffer1[m_currRecycledBufIdx].dwSize),
3805 0,
3806 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ENC_BCOMBINED1_ENCODE].Value,
3807 bindingTable->dwBindingTableEntries[startBTI++],
3808 false));
3809
3810 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
3811 m_hwInterface,
3812 cmdBuffer,
3813 &surfaceCodecParams,
3814 kernelState));
3815
3816 CODECHAL_DEBUG_TOOL(
3817 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
3818 &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource,
3819 CodechalDbgAttr::attrOutput,
3820 "Hevc_CombinedBuffer1",
3821 m_encBCombinedBuffer1[m_currRecycledBufIdx].dwSize,
3822 0,
3823 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
3824 );
3825
3826 // Combined 1D RAW buffer 2, which contains non fixed sizes of buffers
3827 startBTI = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER2;
3828 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
3829 &surfaceCodecParams,
3830 &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
3831 m_encBCombinedBuffer2[m_currRecycledBufIdx].dwSize,
3832 0,
3833 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ENC_BCOMBINED2_ENCODE].Value,
3834 bindingTable->dwBindingTableEntries[startBTI++],
3835 false));
3836 surfaceCodecParams.bRawSurface = true;
3837
3838 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
3839 m_hwInterface,
3840 cmdBuffer,
3841 &surfaceCodecParams,
3842 kernelState));
3843
3844 CODECHAL_DEBUG_TOOL(
3845 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
3846 &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
3847 CodechalDbgAttr::attrOutput,
3848 "Hevc_CombinedBuffer2",
3849 m_encBCombinedBuffer2[m_currRecycledBufIdx].dwSize,
3850 0,
3851 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
3852 );
3853 // VME surfaces
3854 startBTI = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0;
3855 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
3856 &surfaceCodecParams,
3857 inputSurface,
3858 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
3859 bindingTable->dwBindingTableEntries[startBTI++]));
3860
3861 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
3862 m_hwInterface,
3863 cmdBuffer,
3864 &surfaceCodecParams,
3865 kernelState));
3866
3867 for (int32_t surface_idx = 0; surface_idx < 4; surface_idx++)
3868 {
3869 int32_t ll = 0;
3870 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
3871 if (!CodecHal_PictureIsInvalid(refPic) &&
3872 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
3873 {
3874 int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
3875 PMOS_SURFACE refSurfacePtr;
3876 if (surface_idx == 0 && m_useWeightedSurfaceForL0)
3877 {
3878 refSurfacePtr = m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L0_START + surface_idx);
3879 }
3880 else
3881 {
3882 refSurfacePtr = &m_refList[idx]->sRefBuffer;
3883 }
3884
3885 // Picture Y VME
3886 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
3887 &surfaceCodecParams,
3888 refSurfacePtr,
3889 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
3890 bindingTable->dwBindingTableEntries[startBTI++]));
3891
3892 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
3893 m_hwInterface,
3894 cmdBuffer,
3895 &surfaceCodecParams,
3896 kernelState));
3897
3898 CODECHAL_DEBUG_TOOL(
3899 m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
3900 std::string refSurfName = "RefSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex));
3901 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
3902 &m_refList[idx]->sRefBuffer,
3903 CodechalDbgAttr::attrReferenceSurfaces,
3904 refSurfName.data())));
3905 }
3906 else
3907 {
3908 // Providing Dummy surface as per VME requirement.
3909 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
3910 &surfaceCodecParams,
3911 inputSurface,
3912 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
3913 bindingTable->dwBindingTableEntries[startBTI++]));
3914
3915 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
3916 m_hwInterface,
3917 cmdBuffer,
3918 &surfaceCodecParams,
3919 kernelState));
3920 }
3921
3922 ll = 1;
3923 refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
3924 if (!CodecHal_PictureIsInvalid(refPic) &&
3925 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
3926 {
3927 int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
3928 PMOS_SURFACE refSurfacePtr;
3929 if (surface_idx == 0 && m_useWeightedSurfaceForL1)
3930 {
3931 refSurfacePtr = m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L1_START + surface_idx);
3932 }
3933 else
3934 {
3935 refSurfacePtr = &m_refList[idx]->sRefBuffer;
3936 }
3937
3938 // Picture Y VME
3939 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
3940 &surfaceCodecParams,
3941 refSurfacePtr,
3942 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
3943 bindingTable->dwBindingTableEntries[startBTI++]));
3944
3945 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
3946 m_hwInterface,
3947 cmdBuffer,
3948 &surfaceCodecParams,
3949 kernelState));
3950
3951 CODECHAL_DEBUG_TOOL(
3952 m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
3953 std::string refSurfName = "RefSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex));
3954 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
3955 &m_refList[idx]->sRefBuffer,
3956 CodechalDbgAttr::attrReferenceSurfaces,
3957 refSurfName.data())));
3958 }
3959 else
3960 {
3961 // Providing Dummy surface as per VME requirement.
3962 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
3963 &surfaceCodecParams,
3964 inputSurface,
3965 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
3966 bindingTable->dwBindingTableEntries[startBTI++]));
3967
3968 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
3969 m_hwInterface,
3970 cmdBuffer,
3971 &surfaceCodecParams,
3972 kernelState));
3973 }
3974 }
3975
3976 //Source Y and UV
3977 startBTI = MBENC_B_FRAME_CURR_Y;
3978 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
3979 &surfaceCodecParams,
3980 inputSurface,
3981 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
3982 bindingTable->dwBindingTableEntries[startBTI++],
3983 m_verticalLineStride,
3984 false));
3985
3986 surfaceCodecParams.bUseUVPlane = true;
3987
3988 surfaceCodecParams.dwUVBindingTableOffset = bindingTable->dwBindingTableEntries[startBTI];
3989 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
3990 m_hwInterface,
3991 cmdBuffer,
3992 &surfaceCodecParams,
3993 kernelState));
3994
3995 CODECHAL_DEBUG_TOOL(
3996 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
3997 inputSurface,
3998 CodechalDbgAttr::attrEncodeRawInputSurface,
3999 "MbEnc_Input_SrcSurf")));
4000
4001 // Current Y with reconstructed boundary pixels
4002 startBTI = MBENC_B_FRAME_CURR_Y_WITH_RECON_BOUNDARY_PIX;
4003 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4004 &surfaceCodecParams,
4005 &m_currPicWithReconBoundaryPix,
4006 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4007 bindingTable->dwBindingTableEntries[startBTI],
4008 m_verticalLineStride,
4009 true));
4010
4011 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4012 m_hwInterface,
4013 cmdBuffer,
4014 &surfaceCodecParams,
4015 kernelState));
4016
4017 // Enc CU Record
4018 startBTI = MBENC_B_FRAME_ENC_CU_RECORD;
4019 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4020 &surfaceCodecParams,
4021 &m_intermediateCuRecordSurfaceLcu32,
4022 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ENC_CU_RECORD_ENCODE].Value,
4023 bindingTable->dwBindingTableEntries[startBTI],
4024 0,
4025 true));
4026
4027 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4028 m_hwInterface,
4029 cmdBuffer,
4030 &surfaceCodecParams,
4031 kernelState));
4032
4033 // PAK object command surface
4034 startBTI = MBENC_B_FRAME_PAK_OBJ;
4035 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4036 &surfaceCodecParams,
4037 &m_resMbCodeSurface,
4038 MOS_BYTES_TO_DWORDS(m_mvOffset),
4039 0,
4040 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
4041 bindingTable->dwBindingTableEntries[startBTI],
4042 true));
4043
4044 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4045 m_hwInterface,
4046 cmdBuffer,
4047 &surfaceCodecParams,
4048 kernelState));
4049
4050 // CU packet for PAK surface
4051 startBTI = MBENC_B_FRAME_PAK_CU_RECORD;
4052 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4053 &surfaceCodecParams,
4054 &m_resMbCodeSurface,
4055 MOS_BYTES_TO_DWORDS(m_mbCodeSize - m_mvOffset),
4056 m_mvOffset,
4057 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ENC_CU_PACKET_FOR_PAK_ENCODE].Value,
4058 bindingTable->dwBindingTableEntries[startBTI],
4059 true));
4060
4061 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4062 m_hwInterface,
4063 cmdBuffer,
4064 &surfaceCodecParams,
4065 kernelState));
4066
4067 //Software scoreboard surface
4068 startBTI = MBENC_B_FRAME_SW_SCOREBOARD;
4069 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4070 &surfaceCodecParams,
4071 m_swScoreboardState->GetCurSwScoreboardSurface(),
4072 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_SOFTWARE_SCOREBOARD_ENCODE].Value,
4073 bindingTable->dwBindingTableEntries[startBTI],
4074 m_verticalLineStride,
4075 true));
4076
4077 surfaceCodecParams.bUse32UINTSurfaceFormat = true;
4078 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4079 m_hwInterface,
4080 cmdBuffer,
4081 &surfaceCodecParams,
4082 kernelState));
4083
4084 // Scratch surface for Internal Use Only
4085 startBTI = MBENC_B_FRAME_SCRATCH_SURFACE;
4086 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4087 &surfaceCodecParams,
4088 &m_scratchSurface,
4089 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_SCRATCH_ENCODE].Value,
4090 bindingTable->dwBindingTableEntries[startBTI],
4091 m_verticalLineStride,
4092 true));
4093
4094 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4095 m_hwInterface,
4096 cmdBuffer,
4097 &surfaceCodecParams,
4098 kernelState));
4099
4100 // CU 16x16 QP data input surface
4101 startBTI = MBENC_B_FRAME_CU_QP_DATA;
4102 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4103 &surfaceCodecParams,
4104 &m_brcBuffers.sBrcMbQpBuffer,
4105 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
4106 bindingTable->dwBindingTableEntries[startBTI],
4107 m_verticalLineStride,
4108 false));
4109
4110 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4111 m_hwInterface,
4112 cmdBuffer,
4113 &surfaceCodecParams,
4114 kernelState));
4115
4116 // Lcu level data input
4117 startBTI = MBENC_B_FRAME_LCU_LEVEL_DATA_INPUT;
4118 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4119 &surfaceCodecParams,
4120 &m_lcuLevelInputDataSurface[m_currRecycledBufIdx],
4121 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_LCU_LEVEL_DATA_ENCODE].Value,
4122 bindingTable->dwBindingTableEntries[startBTI],
4123 m_verticalLineStride,
4124 false));
4125
4126 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4127 m_hwInterface,
4128 cmdBuffer,
4129 &surfaceCodecParams,
4130 kernelState));
4131
4132 // Enc B 32x32 Constant Table surface
4133 startBTI = MBENC_B_FRAME_ENC_CONST_TABLE;
4134 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4135 &surfaceCodecParams,
4136 &m_encConstantTableForB.sResource,
4137 MOS_BYTES_TO_DWORDS(m_encConstantTableForB.dwSize),
4138 0,
4139 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ENC_CONSTANT_TABLE_ENCODE].Value,
4140 bindingTable->dwBindingTableEntries[startBTI],
4141 false));
4142
4143 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4144 m_hwInterface,
4145 cmdBuffer,
4146 &surfaceCodecParams,
4147 kernelState));
4148
4149 // Colocated CU Motion Vector Data surface
4150 startBTI = MBENC_B_FRAME_COLOCATED_CU_MV_DATA;
4151 uint8_t mbCodeIdxForTempMVP = 0xFF;
4152 if (m_hevcPicParams->CollocatedRefPicIndex != 0xFF && m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC)
4153 {
4154 uint8_t frameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx;
4155
4156 mbCodeIdxForTempMVP = m_refList[frameIdx]->ucScalingIdx;
4157 }
4158
4159 if(m_pictureCodingType == I_TYPE)
4160 {
4161 // No temoporal MVP in the I frame
4162 m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
4163 }
4164 else
4165 {
4166 if (mbCodeIdxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag)
4167 {
4168 // Temporal reference MV index is invalid and so disable the temporal MVP
4169 CODECHAL_ENCODE_ASSERT(false);
4170 m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
4171 }
4172 }
4173
4174 if (mbCodeIdxForTempMVP == 0xFF)
4175 {
4176 startBTI++;
4177 }
4178 else
4179 {
4180 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4181 &surfaceCodecParams,
4182 m_trackedBuf->GetMvTemporalBuffer(mbCodeIdxForTempMVP),
4183 MOS_BYTES_TO_DWORDS(m_sizeOfMvTemporalBuffer),
4184 0,
4185 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ENC_MV_TEMPORAL_BUFFER_ENCODE].Value,
4186 bindingTable->dwBindingTableEntries[startBTI++],
4187 false));
4188
4189 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4190 m_hwInterface,
4191 cmdBuffer,
4192 &surfaceCodecParams,
4193 kernelState));
4194 }
4195
4196 startBTI = MBENC_B_FRAME_HME_MOTION_PREDICTOR_DATA;
4197
4198 // HME motion predictor data
4199 if (m_hmeEnabled)
4200 {
4201 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4202 &surfaceCodecParams,
4203 m_hmeKernel->GetSurface(CodechalKernelHme::SurfaceId::me4xMvDataBuffer),
4204 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_MV_DATA_ENCODE].Value,
4205 bindingTable->dwBindingTableEntries[startBTI++],
4206 m_verticalLineStride,
4207 false));
4208
4209 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4210 m_hwInterface,
4211 cmdBuffer,
4212 &surfaceCodecParams,
4213 kernelState));
4214 }
4215 else
4216 {
4217 startBTI++;
4218 }
4219
4220 // Brc Combined Enc parameter surface
4221 startBTI = MBENC_B_FRAME_BRC_COMBINED_ENC_PARAMETER_SURFACE;
4222 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4223 &surfaceCodecParams,
4224 &m_brcInputForEncKernelBuffer->sResource,
4225 MOS_BYTES_TO_DWORDS(HEVC_FRAMEBRC_BUF_CONST_SIZE),
4226 0,
4227 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_COMBINED_ENC_ENCODE].Value,
4228 bindingTable->dwBindingTableEntries[startBTI++],
4229 false));
4230
4231 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4232 m_hwInterface,
4233 cmdBuffer,
4234 &surfaceCodecParams,
4235 kernelState));
4236
4237 startBTI = MBENC_B_FRAME_VME_PRED_FOR_2X_DS_CURR;
4238 if (m_isMaxLcu64)
4239 {
4240 PMOS_SURFACE currScaledSurface2x = m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER);
4241
4242 //VME 2X Inter prediction surface for current frame
4243 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4244 &surfaceCodecParams,
4245 currScaledSurface2x,
4246 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
4247 bindingTable->dwBindingTableEntries[startBTI++]));
4248
4249 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4250 m_hwInterface,
4251 cmdBuffer,
4252 &surfaceCodecParams,
4253 kernelState));
4254
4255 CODECHAL_DEBUG_TOOL(
4256 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4257 currScaledSurface2x,
4258 CodechalDbgAttr::attrReferenceSurfaces,
4259 "2xScaledSurf")));
4260
4261 // RefFrame's 2x DS surface
4262 for (int32_t surface_idx = 0; surface_idx < 4; surface_idx++)
4263 {
4264 int32_t ll = 0;
4265 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
4266 if (!CodecHal_PictureIsInvalid(refPic) &&
4267 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
4268 {
4269 int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
4270
4271 // Picture Y VME
4272 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4273 &surfaceCodecParams,
4274 m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
4275 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4276 bindingTable->dwBindingTableEntries[startBTI++]));
4277
4278 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4279 m_hwInterface,
4280 cmdBuffer,
4281 &surfaceCodecParams,
4282 kernelState));
4283
4284 CODECHAL_DEBUG_TOOL(
4285 m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
4286 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4287 m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
4288 CodechalDbgAttr::attrReferenceSurfaces,
4289 "Ref2xScaledSurf")));
4290 }
4291 else
4292 {
4293 // Providing Dummy surface as per VME requirement.
4294 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4295 &surfaceCodecParams,
4296 currScaledSurface2x,
4297 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4298 bindingTable->dwBindingTableEntries[startBTI++]));
4299
4300 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4301 m_hwInterface,
4302 cmdBuffer,
4303 &surfaceCodecParams,
4304 kernelState));
4305 }
4306
4307 ll = 1;
4308 refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
4309 if (!CodecHal_PictureIsInvalid(refPic) &&
4310 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
4311 {
4312 int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
4313
4314 // Picture Y VME
4315 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4316 &surfaceCodecParams,
4317 m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
4318 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4319 bindingTable->dwBindingTableEntries[startBTI++]));
4320
4321 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4322 m_hwInterface,
4323 cmdBuffer,
4324 &surfaceCodecParams,
4325 kernelState));
4326
4327 CODECHAL_DEBUG_TOOL(
4328 m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
4329 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
4330 m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
4331 CodechalDbgAttr::attrReferenceSurfaces,
4332 "Ref2xScaledSurf")));
4333 }
4334 else
4335 {
4336 // Providing Dummy surface as per VME requirement.
4337 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
4338 &surfaceCodecParams,
4339 currScaledSurface2x,
4340 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
4341 bindingTable->dwBindingTableEntries[startBTI++]));
4342
4343 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4344 m_hwInterface,
4345 cmdBuffer,
4346 &surfaceCodecParams,
4347 kernelState));
4348 }
4349 }
4350 }
4351
4352 // Kernel debug surface
4353 startBTI = MBENC_B_FRAME_DEBUG_SURFACE;
4354 for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_debugSurface); i++, startBTI++)
4355 {
4356 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4357 &surfaceCodecParams,
4358 &m_debugSurface[i].sResource,
4359 MOS_BYTES_TO_DWORDS(m_debugSurface[i].dwSize),
4360 0,
4361 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_DEBUG_ENCODE].Value,
4362 bindingTable->dwBindingTableEntries[startBTI],
4363 false));
4364
4365 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4366 m_hwInterface,
4367 cmdBuffer,
4368 &surfaceCodecParams,
4369 kernelState));
4370 }
4371
4372 return eStatus;
4373 }
4374
SendBrcInitResetSurfaces(PMOS_COMMAND_BUFFER cmdBuffer,CODECHAL_HEVC_BRC_KRNIDX krnIdx)4375 MOS_STATUS CodechalEncHevcStateG11::SendBrcInitResetSurfaces(
4376 PMOS_COMMAND_BUFFER cmdBuffer,
4377 CODECHAL_HEVC_BRC_KRNIDX krnIdx)
4378 {
4379 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4380
4381 if (krnIdx != CODECHAL_HEVC_BRC_INIT && krnIdx != CODECHAL_HEVC_BRC_RESET)
4382 {
4383 CODECHAL_ENCODE_ASSERTMESSAGE("Brc kernel requested is not init or reset\n");
4384 return MOS_STATUS_INVALID_PARAMETER;
4385 }
4386
4387 PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_brcKernelBindingTable[krnIdx];
4388 uint32_t startBTI = 0;
4389 CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
4390 // BRC History Buffer
4391 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4392 &surfaceCodecParams,
4393 &m_brcBuffers.resBrcHistoryBuffer,
4394 MOS_BYTES_TO_DWORDS(m_brcHistoryBufferSize),
4395 0,
4396 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_HISTORY_ENCODE].Value,
4397 bindingTable->dwBindingTableEntries[startBTI++],
4398 true));
4399
4400 PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[krnIdx];
4401 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4402 m_hwInterface,
4403 cmdBuffer,
4404 &surfaceCodecParams,
4405 kernelState));
4406
4407 // BRC Distortion surface
4408 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4409 &surfaceCodecParams,
4410 m_brcDistortion,
4411 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
4412 bindingTable->dwBindingTableEntries[startBTI++],
4413 0,
4414 true));
4415 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4416 m_hwInterface,
4417 cmdBuffer,
4418 &surfaceCodecParams,
4419 kernelState));
4420
4421 return eStatus;
4422 }
4423
SetupBrcConstantTable(PMOS_SURFACE brcConstantData)4424 MOS_STATUS CodechalEncHevcStateG11::SetupBrcConstantTable(
4425 PMOS_SURFACE brcConstantData)
4426 {
4427 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4428
4429 CODECHAL_ENCODE_FUNCTION_ENTER;
4430
4431 MOS_LOCK_PARAMS lockFlags;
4432 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4433 lockFlags.WriteOnly = 1;
4434 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &brcConstantData->OsResource, &lockFlags);
4435 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4436
4437 uint32_t size = brcConstantData->dwHeight * brcConstantData->dwWidth;
4438 // 576-byte of Qp adjust table
4439 MOS_SecureMemcpy(data, size, g_cInit_HEVC_BRC_QP_ADJUST, sizeof(g_cInit_HEVC_BRC_QP_ADJUST));
4440 data += sizeof(g_cInit_HEVC_BRC_QP_ADJUST);
4441 size -= sizeof(g_cInit_HEVC_BRC_QP_ADJUST);
4442
4443 //lambda and mode cost
4444 if (m_isMaxLcu64)
4445 {
4446 MOS_SecureMemcpy(data, size, m_brcLcu64x64LambdaModeCostInit, sizeof(m_brcLcu64x64LambdaModeCostInit));
4447 }
4448 else
4449 {
4450 MOS_SecureMemcpy(data, size, m_brcLcu32x32LambdaModeCostInit, sizeof(m_brcLcu32x32LambdaModeCostInit));
4451 }
4452 data += m_brcLambdaModeCostTableSize;
4453 size -= m_brcLambdaModeCostTableSize;
4454
4455 m_osInterface->pfnUnlockResource(m_osInterface, &brcConstantData->OsResource);
4456
4457 return eStatus;
4458 }
4459
SendBrcFrameUpdateSurfaces(PMOS_COMMAND_BUFFER cmdBuffer)4460 MOS_STATUS CodechalEncHevcStateG11::SendBrcFrameUpdateSurfaces(
4461 PMOS_COMMAND_BUFFER cmdBuffer)
4462 {
4463 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4464
4465 // Fill HCP_IMG_STATE so that BRC kernel can use it to generate the write buffer for PAK
4466 PMOS_RESOURCE brcHcpStateReadBuffer = &m_brcBuffers.resBrcImageStatesReadBuffer[m_currRecycledBufIdx];
4467 MHW_VDBOX_HEVC_PIC_STATE mhwHevcPicState;
4468 mhwHevcPicState.pHevcEncSeqParams = m_hevcSeqParams;
4469 mhwHevcPicState.pHevcEncPicParams = m_hevcPicParams;
4470 mhwHevcPicState.bUseVDEnc = m_vdencEnabled ? 1 : 0;
4471 mhwHevcPicState.sseEnabledInVmeEncode = m_sseEnabled;
4472 mhwHevcPicState.brcNumPakPasses = m_mfxInterface->GetBrcNumPakPasses();
4473 mhwHevcPicState.rhodomainRCEnable = m_brcEnabled && (m_numPipe > 1);
4474 mhwHevcPicState.bSAOEnable = m_hevcSeqParams->SAO_enabled_flag ? (m_hevcSliceParams->slice_sao_luma_flag || m_hevcSliceParams->slice_sao_chroma_flag) : 0;
4475 // disable RDOQ before we get enough quality/perf data for BRC to prove its goodness
4476 //mhwHevcPicState.bHevcRdoqEnabled = m_hevcRdoqEnabled;
4477 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcPicBrcBuffer(brcHcpStateReadBuffer, &mhwHevcPicState));
4478
4479 PMOS_SURFACE brcConstantData = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx];
4480 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBrcConstantTable(brcConstantData));
4481
4482 uint32_t startBTI = 0;
4483 PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE];
4484 PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_brcKernelBindingTable[CODECHAL_HEVC_BRC_FRAME_UPDATE];
4485 CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
4486
4487 // BRC History Buffer
4488 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4489 &surfaceCodecParams,
4490 &m_brcBuffers.resBrcHistoryBuffer,
4491 MOS_BYTES_TO_DWORDS(m_brcHistoryBufferSize),
4492 0,
4493 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_HISTORY_ENCODE].Value,
4494 bindingTable->dwBindingTableEntries[startBTI++],
4495 true));
4496 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4497 m_hwInterface,
4498 cmdBuffer,
4499 &surfaceCodecParams,
4500 kernelState));
4501
4502 // BRC Prev PAK statistics output buffer
4503 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4504 &surfaceCodecParams,
4505 &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead],
4506 MOS_BYTES_TO_DWORDS(m_hevcBrcPakStatisticsSize),
4507 0,
4508 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_PAK_STATS_ENCODE].Value,
4509 bindingTable->dwBindingTableEntries[startBTI++],
4510 false));
4511 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4512 m_hwInterface,
4513 cmdBuffer,
4514 &surfaceCodecParams,
4515 kernelState));
4516
4517 // BRC HCP_PIC_STATE read
4518 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4519 &surfaceCodecParams,
4520 brcHcpStateReadBuffer,
4521 MOS_BYTES_TO_DWORDS(m_brcBuffers.dwBrcHcpPicStateSize),
4522 0,
4523 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_PIC_STATE_READ_ENCODE].Value,
4524 bindingTable->dwBindingTableEntries[startBTI++],
4525 false));
4526 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4527 m_hwInterface,
4528 cmdBuffer,
4529 &surfaceCodecParams,
4530 kernelState));
4531
4532 // BRC HCP_PIC_STATE write
4533 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4534 &surfaceCodecParams,
4535 &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
4536 MOS_BYTES_TO_DWORDS(m_brcBuffers.dwBrcHcpPicStateSize),
4537 0,
4538 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_PIC_STATE_WRITE_ENCODE].Value,
4539 bindingTable->dwBindingTableEntries[startBTI++],
4540 true));
4541 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4542 m_hwInterface,
4543 cmdBuffer,
4544 &surfaceCodecParams,
4545 kernelState));
4546
4547 // Combined ENC-parameter buffer
4548 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4549 &surfaceCodecParams,
4550 &m_brcInputForEncKernelBuffer->sResource,
4551 MOS_BYTES_TO_DWORDS(HEVC_FRAMEBRC_BUF_CONST_SIZE),
4552 0,
4553 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_COMBINED_ENC_ENCODE].Value,
4554 bindingTable->dwBindingTableEntries[startBTI++],
4555 true));
4556 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4557 m_hwInterface,
4558 cmdBuffer,
4559 &surfaceCodecParams,
4560 kernelState));
4561
4562 // BRC Distortion surface
4563 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4564 &surfaceCodecParams,
4565 m_brcDistortion,
4566 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
4567 bindingTable->dwBindingTableEntries[startBTI++],
4568 0,
4569 true));
4570 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4571 m_hwInterface,
4572 cmdBuffer,
4573 &surfaceCodecParams,
4574 kernelState));
4575
4576 // BRC Data surface
4577 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4578 &surfaceCodecParams,
4579 brcConstantData,
4580 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_CONSTANT_DATA_ENCODE].Value,
4581 bindingTable->dwBindingTableEntries[startBTI++],
4582 0,
4583 false));
4584 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4585 m_hwInterface,
4586 cmdBuffer,
4587 &surfaceCodecParams,
4588 kernelState));
4589
4590 // Pixel MB Statistics surface
4591 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4592 &surfaceCodecParams,
4593 &m_resMbStatsBuffer,
4594 MOS_BYTES_TO_DWORDS(m_hwInterface->m_avcMbStatBufferSize),
4595 0,
4596 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_MB_STATS_ENCODE].Value,
4597 bindingTable->dwBindingTableEntries[startBTI++],
4598 false));
4599 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4600 m_hwInterface,
4601 cmdBuffer,
4602 &surfaceCodecParams,
4603 kernelState));
4604
4605 // Mv and Distortion summation surface
4606 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4607 &surfaceCodecParams,
4608 &m_mvAndDistortionSumSurface.sResource,
4609 MOS_BYTES_TO_DWORDS(m_mvAndDistortionSumSurface.dwSize),
4610 0,
4611 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DISTORTION_ENCODE].Value,
4612 bindingTable->dwBindingTableEntries[startBTI++],
4613 false));
4614 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4615 m_hwInterface,
4616 cmdBuffer,
4617 &surfaceCodecParams,
4618 kernelState));
4619
4620 CODECHAL_DEBUG_TOOL(
4621 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4622 &m_mvAndDistortionSumSurface.sResource,
4623 CodechalDbgAttr::attrInput,
4624 "MvDistSum",
4625 m_mvAndDistortionSumSurface.dwSize,
4626 0,
4627 CODECHAL_MEDIA_STATE_BRC_UPDATE));
4628 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4629 &m_brcBuffers.resBrcImageStatesReadBuffer[m_currRecycledBufIdx],
4630 CodechalDbgAttr::attrInput,
4631 "ImgStateRead",
4632 BRC_IMG_STATE_SIZE_PER_PASS_G11 * m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(),
4633 0,
4634 CODECHAL_MEDIA_STATE_BRC_UPDATE));
4635
4636 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
4637 &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx],
4638 CodechalDbgAttr::attrInput,
4639 "ConstData",
4640 CODECHAL_MEDIA_STATE_BRC_UPDATE));
4641
4642 // PAK statistics buffer is only dumped for BrcUpdate kernel input
4643 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4644 &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead],
4645 CodechalDbgAttr::attrInput,
4646 "PakStats",
4647 HEVC_BRC_PAK_STATISTCS_SIZE,
4648 0,
4649 CODECHAL_MEDIA_STATE_BRC_UPDATE));
4650 // HEVC maintains a ptr to its own distortion surface, as it may be a couple different surfaces
4651 if (m_brcDistortion) {
4652 CODECHAL_ENCODE_CHK_STATUS_RETURN(
4653 m_debugInterface->DumpBuffer(
4654 &m_brcDistortion->OsResource,
4655 CodechalDbgAttr::attrInput,
4656 "BrcDist_BeforeFrameBrc",
4657 m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
4658 m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
4659 CODECHAL_MEDIA_STATE_BRC_UPDATE));
4660 }
4661
4662 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4663 &m_brcBuffers.resBrcHistoryBuffer,
4664 CodechalDbgAttr::attrInput,
4665 "HistoryRead_beforeFramBRC",
4666 m_brcHistoryBufferSize,
4667 0,
4668 CODECHAL_MEDIA_STATE_BRC_UPDATE));
4669 if (m_brcBuffers.pMbEncKernelStateInUse) {
4670 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
4671 CODECHAL_MEDIA_STATE_BRC_UPDATE,
4672 m_brcBuffers.pMbEncKernelStateInUse));
4673 } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(&m_resMbStatsBuffer,
4674 CodechalDbgAttr::attrInput,
4675 "MBStatsSurf",
4676 m_hwInterface->m_avcMbStatBufferSize,
4677 0,
4678 CODECHAL_MEDIA_STATE_BRC_UPDATE));)
4679 return eStatus;
4680 }
4681
SendBrcLcuUpdateSurfaces(PMOS_COMMAND_BUFFER cmdBuffer)4682 MOS_STATUS CodechalEncHevcStateG11::SendBrcLcuUpdateSurfaces(
4683 PMOS_COMMAND_BUFFER cmdBuffer)
4684 {
4685 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4686
4687 PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE];
4688 PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_brcKernelBindingTable[CODECHAL_HEVC_BRC_LCU_UPDATE];
4689 uint32_t startBTI = 0;
4690 CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
4691
4692 if (m_brcEnabled)
4693 {
4694 // BRC History Buffer
4695 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4696 &surfaceCodecParams,
4697 &m_brcBuffers.resBrcHistoryBuffer,
4698 MOS_BYTES_TO_DWORDS(m_brcHistoryBufferSize),
4699 0,
4700 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_HISTORY_ENCODE].Value,
4701 bindingTable->dwBindingTableEntries[startBTI++],
4702 true));
4703 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4704 m_hwInterface,
4705 cmdBuffer,
4706 &surfaceCodecParams,
4707 kernelState));
4708
4709 // BRC Distortion surface - Intra or Inter
4710 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4711 &surfaceCodecParams,
4712 m_brcDistortion,
4713 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
4714 bindingTable->dwBindingTableEntries[startBTI++],
4715 0,
4716 true));
4717 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4718 m_hwInterface,
4719 cmdBuffer,
4720 &surfaceCodecParams,
4721 kernelState));
4722
4723 // Pixel MB Statistics surface
4724 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
4725 &surfaceCodecParams,
4726 &m_resMbStatsBuffer,
4727 MOS_BYTES_TO_DWORDS(m_hwInterface->m_avcMbStatBufferSize),
4728 0,
4729 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_MB_STATS_ENCODE].Value,
4730 bindingTable->dwBindingTableEntries[startBTI++],
4731 false));
4732 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4733 m_hwInterface,
4734 cmdBuffer,
4735 &surfaceCodecParams,
4736 kernelState));
4737 }
4738 else
4739 {
4740 // CQP ROI
4741 startBTI += 3;
4742 }
4743 // MB QP surface
4744 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4745 &surfaceCodecParams,
4746 &m_brcBuffers.sBrcMbQpBuffer,
4747 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MB_QP_CODEC].Value,
4748 bindingTable->dwBindingTableEntries[startBTI++],
4749 0,
4750 true));
4751 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4752 m_hwInterface,
4753 cmdBuffer,
4754 &surfaceCodecParams,
4755 kernelState));
4756
4757 // ROI surface
4758 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
4759 &surfaceCodecParams,
4760 &m_brcBuffers.sBrcRoiSurface,
4761 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ROI_ENCODE].Value,
4762 bindingTable->dwBindingTableEntries[startBTI++],
4763 0,
4764 false));
4765 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
4766 m_hwInterface,
4767 cmdBuffer,
4768 &surfaceCodecParams,
4769 kernelState));
4770
4771 return eStatus;
4772 }
4773
GetCustomDispatchPattern(PMHW_WALKER_PARAMS walkerParams,PCODECHAL_WALKER_CODEC_PARAMS walkerCodecParams)4774 MOS_STATUS CodechalEncHevcStateG11::GetCustomDispatchPattern(
4775 PMHW_WALKER_PARAMS walkerParams,
4776 PCODECHAL_WALKER_CODEC_PARAMS walkerCodecParams)
4777 {
4778 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4779
4780 CODECHAL_ENCODE_CHK_NULL_RETURN(walkerParams);
4781 CODECHAL_ENCODE_CHK_NULL_RETURN(walkerCodecParams);
4782
4783 MOS_ZeroMemory(walkerParams, sizeof(*walkerParams));
4784
4785 walkerParams->WalkerMode = (MHW_WALKER_MODE)walkerCodecParams->WalkerMode;
4786
4787 walkerParams->dwLocalLoopExecCount = 0xFFFF; //MAX VALUE
4788 walkerParams->dwGlobalLoopExecCount = 0xFFFF; //MAX VALUE
4789
4790 // the following code is copied from the kernel ULT
4791 uint32_t maxThreadWidth, maxThreadHeight;
4792 uint32_t threadSpaceWidth, threadSpaceHeight, concurGroupNum, threadScaleV;
4793
4794 threadSpaceWidth = walkerCodecParams->dwResolutionX;
4795 threadSpaceHeight = walkerCodecParams->dwResolutionY;
4796 maxThreadWidth = threadSpaceWidth;
4797 maxThreadHeight = threadSpaceHeight;
4798 concurGroupNum = m_numberConcurrentGroup;
4799 threadScaleV = m_numberEncKernelSubThread;
4800
4801 if (concurGroupNum > 1)
4802 {
4803 if (m_degree45Needed)
4804 {
4805 maxThreadWidth = threadSpaceWidth;
4806 maxThreadHeight = threadSpaceWidth + (threadSpaceWidth + threadSpaceHeight + concurGroupNum - 2) / concurGroupNum;
4807 }
4808 else //for tu4 we ensure threadspace width and height is even or a multiple of 4
4809 {
4810 maxThreadWidth = (threadSpaceWidth + 1) & 0xfffe; //ensuring width is even
4811 maxThreadHeight = ((threadSpaceWidth + 1) >> 1) + (threadSpaceWidth + 2 * (((threadSpaceHeight + 3) & 0xfffc) - 1) + (2 * concurGroupNum - 1)) / (2 * concurGroupNum);
4812 }
4813 maxThreadHeight *= threadScaleV;
4814 maxThreadHeight += 1;
4815 }
4816 else
4817 {
4818 threadSpaceHeight *= threadScaleV;
4819 maxThreadHeight *= threadScaleV;
4820 }
4821
4822 uint32_t localLoopExecCount = m_degree45Needed ? (2 * m_numWavefrontInOneRegion + 1):m_numWavefrontInOneRegion;
4823
4824 eStatus = InitMediaObjectWalker(maxThreadWidth,
4825 maxThreadHeight,
4826 concurGroupNum - 1,
4827 m_swScoreboardState->GetDependencyPattern(),
4828 m_numberEncKernelSubThread - 1,
4829 localLoopExecCount,
4830 *walkerParams);
4831
4832 return eStatus;
4833 }
4834
GenerateLcuLevelData(MOS_SURFACE & lcuLevelInputDataSurfaceParam)4835 MOS_STATUS CodechalEncHevcStateG11::GenerateLcuLevelData(MOS_SURFACE &lcuLevelInputDataSurfaceParam)
4836 {
4837 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4838
4839 CODECHAL_ENCODE_FUNCTION_ENTER;
4840
4841 CODECHAL_ENCODE_CHK_NULL_RETURN(m_tileParams);
4842
4843 uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
4844 uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
4845
4846 uint32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
4847 uint32_t residual = (1 << shift) - 1;
4848
4849 uint32_t frameWidthInLcu = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift;
4850 uint32_t frameHeightInLcu = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift;
4851
4852 PLCU_LEVEL_DATA* lcuInfo = (PLCU_LEVEL_DATA*)MOS_AllocMemory(sizeof(PLCU_LEVEL_DATA) * frameWidthInLcu);
4853 CODECHAL_ENCODE_CHK_NULL_RETURN(lcuInfo);
4854 for (uint32_t i = 0; i < frameWidthInLcu; i++)
4855 {
4856 lcuInfo[i] = (PLCU_LEVEL_DATA)MOS_AllocMemory(sizeof(LCU_LEVEL_DATA) * frameHeightInLcu);
4857 if (lcuInfo[i] == nullptr)
4858 {
4859 for (uint32_t j = 0; j < i; j++)
4860 {
4861 MOS_FreeMemory(lcuInfo[j]);
4862 }
4863 MOS_FreeMemory(lcuInfo);
4864 CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr);
4865 }
4866 MOS_ZeroMemory(lcuInfo[i], (sizeof(LCU_LEVEL_DATA) * frameHeightInLcu));
4867 }
4868
4869 // Tiling case
4870 if (numTileColumns > 1 || numTileRows > 1)
4871 {
4872 // This assumes that the entire Slice is contained within a Tile
4873 for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
4874 {
4875 for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
4876 {
4877 uint32_t tileId = tileRow * numTileColumns + tileCol;
4878 MHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 currentTile = m_tileParams[tileId];
4879
4880 uint32_t tileColumnWidth = (currentTile.TileWidthInMinCbMinus1 + 1 + residual) >> shift;
4881 uint32_t tileRowHeight = (currentTile.TileHeightInMinCbMinus1 + 1 + residual) >> shift;
4882
4883 for (uint32_t startLCU = 0, sliceStartLcu = 0, slcCount = 0; slcCount < m_numSlices; slcCount++)
4884 {
4885 bool lastSliceInTile = false, sliceInTile = false;
4886
4887 eStatus = (MOS_STATUS) IsSliceInTile(slcCount,
4888 ¤tTile,
4889 &sliceInTile,
4890 &lastSliceInTile);
4891 if (eStatus != MOS_STATUS_SUCCESS)
4892 {
4893 for (uint32_t i = 0; i < frameWidthInLcu; i++)
4894 {
4895 MOS_FreeMemory(lcuInfo[i]);
4896 }
4897 MOS_FreeMemory(lcuInfo);
4898 CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
4899 }
4900
4901 if (!sliceInTile)
4902 {
4903 startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice;
4904 continue;
4905 }
4906
4907 sliceStartLcu = m_hevcSliceParams[slcCount].slice_segment_address;
4908 uint32_t sliceLcuX = sliceStartLcu % frameWidthInLcu;
4909 uint32_t sliceLcuY = sliceStartLcu / frameWidthInLcu;
4910
4911 for (uint32_t i = 0; i < m_hevcSliceParams[slcCount].NumLCUsInSlice; i++)
4912 {
4913 lcuInfo[sliceLcuX][sliceLcuY].SliceStartLcuIndex = (uint16_t)startLCU;
4914 lcuInfo[sliceLcuX][sliceLcuY].SliceEndLcuIndex = (uint16_t)(startLCU + m_hevcSliceParams[slcCount].NumLCUsInSlice); // this should be next slice start index
4915 lcuInfo[sliceLcuX][sliceLcuY].SliceId = (uint16_t)slcCount;
4916 lcuInfo[sliceLcuX][sliceLcuY].TileId = (uint16_t)tileId;
4917 lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateX = (uint16_t)currentTile.TileStartLCUX;
4918 lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateY = (uint16_t)currentTile.TileStartLCUY;
4919 lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateX = (uint16_t)(currentTile.TileStartLCUX + tileColumnWidth);
4920 lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateY = (uint16_t)(currentTile.TileStartLCUY + tileRowHeight);
4921
4922 sliceLcuX++;
4923
4924 if (sliceLcuX >= currentTile.TileStartLCUX + tileColumnWidth)
4925 {
4926 sliceLcuX = currentTile.TileStartLCUX;
4927 sliceLcuY++;
4928 }
4929 }
4930 startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice;
4931 }
4932 }
4933 }
4934 }
4935 else // non-tiling case
4936 {
4937 for (uint32_t startLCU = 0, sliceStartLcu = 0, slcCount = 0; slcCount < m_numSlices; slcCount++)
4938 {
4939 sliceStartLcu = m_hevcSliceParams[slcCount].slice_segment_address;
4940 uint32_t sliceLcuX = sliceStartLcu % frameWidthInLcu;
4941 uint32_t sliceLcuY = sliceStartLcu / frameWidthInLcu;
4942
4943 for (uint32_t i = 0; i < m_hevcSliceParams[slcCount].NumLCUsInSlice; i++)
4944 {
4945 lcuInfo[sliceLcuX][sliceLcuY].SliceStartLcuIndex = (uint16_t)startLCU;
4946 lcuInfo[sliceLcuX][sliceLcuY].SliceEndLcuIndex = (uint16_t)(startLCU + m_hevcSliceParams[slcCount].NumLCUsInSlice); // this should be next slice start index
4947 lcuInfo[sliceLcuX][sliceLcuY].SliceId = (uint16_t)slcCount;
4948 lcuInfo[sliceLcuX][sliceLcuY].TileId = 0;
4949 lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateX = 0;
4950 lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateY = 0;
4951 lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateX = (uint16_t)frameWidthInLcu;
4952 lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateY = (uint16_t)frameHeightInLcu;
4953
4954 sliceLcuX++;
4955
4956 if (sliceLcuX >= frameWidthInLcu)
4957 {
4958 sliceLcuX = 0;
4959 sliceLcuY++;
4960 }
4961 }
4962 startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice;
4963 }
4964 }
4965
4966 // Write LCU Info to the surface
4967 if (!Mos_ResourceIsNull(&lcuLevelInputDataSurfaceParam.OsResource))
4968 {
4969 MOS_LOCK_PARAMS lockFlags;
4970 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4971 lockFlags.WriteOnly = 1;
4972 PLCU_LEVEL_DATA lcuLevelData = (PLCU_LEVEL_DATA)m_osInterface->pfnLockResource(
4973 m_osInterface,
4974 &lcuLevelInputDataSurfaceParam.OsResource,
4975 &lockFlags);
4976 if (lcuLevelData == nullptr)
4977 {
4978 for (uint32_t i = 0; i < frameWidthInLcu; i++)
4979 {
4980 MOS_FreeMemory(lcuInfo[i]);
4981 }
4982 MOS_FreeMemory(lcuInfo);
4983 CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr);
4984 }
4985
4986 uint8_t* dataRowStart = (uint8_t*)lcuLevelData;
4987
4988 for (uint32_t sliceLcuY = 0; sliceLcuY < frameHeightInLcu; sliceLcuY++)
4989 {
4990 for (uint32_t sliceLcuX = 0; sliceLcuX < frameWidthInLcu; sliceLcuX++)
4991 {
4992 *(lcuLevelData) = lcuInfo[sliceLcuX][sliceLcuY];
4993
4994 if ((sliceLcuX + 1) == frameWidthInLcu)
4995 {
4996 dataRowStart += lcuLevelInputDataSurfaceParam.dwPitch;
4997 lcuLevelData = (PLCU_LEVEL_DATA)dataRowStart;
4998 }
4999 else
5000 {
5001 lcuLevelData++;
5002 }
5003 }
5004 }
5005
5006 m_osInterface->pfnUnlockResource(
5007 m_osInterface,
5008 &lcuLevelInputDataSurfaceParam.OsResource);
5009 }
5010 else
5011 {
5012 eStatus = MOS_STATUS_NULL_POINTER;
5013 CODECHAL_ENCODE_ASSERTMESSAGE("Null pointer exception\n");
5014 }
5015
5016 // Freeing the temporarily allocated memory
5017 if (lcuInfo)
5018 {
5019 for (uint32_t i = 0; i < frameWidthInLcu; i++)
5020 {
5021 MOS_FreeMemory(lcuInfo[i]);
5022 }
5023 MOS_FreeMemory(lcuInfo);
5024 }
5025 return eStatus;
5026 }
5027
GenerateConcurrentThreadGroupData()5028 MOS_STATUS CodechalEncHevcStateG11::GenerateConcurrentThreadGroupData()
5029 {
5030 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5031 uint32_t curIdx = m_currRecycledBufIdx;
5032
5033 CODECHAL_ENCODE_FUNCTION_ENTER;
5034
5035 if (!Mos_ResourceIsNull(&m_encBCombinedBuffer1[curIdx].sResource))
5036 {
5037 MOS_LOCK_PARAMS lockFlags;
5038 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
5039 lockFlags.WriteOnly = 1;
5040 auto *buf = (PMBENC_COMBINED_BUFFER1)m_osInterface->pfnLockResource(
5041 m_osInterface,
5042 &m_encBCombinedBuffer1[curIdx].sResource,
5043 &lockFlags);
5044 CODECHAL_ENCODE_CHK_NULL_RETURN(buf);
5045
5046 MOS_ZeroMemory(&buf->concurrent, sizeof(buf->concurrent));
5047
5048 auto concurrentTgData = (PCONCURRENT_THREAD_GROUP_DATA)&buf->concurrent.item[0];
5049
5050 uint32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
5051 uint32_t residual = (1 << shift) - 1;
5052
5053 uint32_t frameWidthInLcu = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift;
5054 uint32_t frameHeightInLcu = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift;
5055
5056 uint32_t slcCount = 0;
5057 // Currently only using one thread group for each slice. Extend it to multiple soon.
5058 for (uint32_t startLCU = 0; slcCount < m_numSlices; slcCount++, startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice)
5059 {
5060 uint32_t sliceStartLcu = m_hevcSliceParams[slcCount].slice_segment_address;
5061 uint32_t sliceStartLcuX = sliceStartLcu % frameWidthInLcu;
5062 uint32_t sliceStartLcuY = sliceStartLcu / frameWidthInLcu;
5063
5064 uint32_t sliceEnd = (uint16_t)(startLCU + m_hevcSliceParams[slcCount].NumLCUsInSlice); // this should be next slice start index
5065 uint32_t sliceEndLcuX = sliceStartLcu % frameWidthInLcu;
5066 uint32_t sliceEndLcuY = sliceStartLcu / frameWidthInLcu;
5067
5068 concurrentTgData->CurrSliceStartLcuX = (uint16_t)sliceStartLcuX;
5069 concurrentTgData->CurrSliceStartLcuY = (uint16_t)sliceStartLcuY;
5070
5071 concurrentTgData->CurrSliceEndLcuX = (uint16_t)sliceEndLcuX;
5072 concurrentTgData->CurrSliceEndLcuY = (uint16_t)sliceEndLcuY;
5073
5074 concurrentTgData->CurrTgStartLcuX = (uint16_t)sliceStartLcuX;
5075 concurrentTgData->CurrTgStartLcuY = (uint16_t)sliceStartLcuY;
5076
5077 concurrentTgData->CurrTgEndLcuX = (uint16_t)sliceEndLcuX;
5078 concurrentTgData->CurrTgEndLcuY = (uint16_t)sliceEndLcuY;
5079 }
5080
5081 m_osInterface->pfnUnlockResource(
5082 m_osInterface,
5083 &m_encBCombinedBuffer1[curIdx].sResource);
5084 }
5085 else
5086 {
5087 CODECHAL_ENCODE_ASSERTMESSAGE("Null pointer exception\n");
5088 return MOS_STATUS_NULL_POINTER;
5089 }
5090
5091 return eStatus;
5092 }
5093
EncodeMbEncKernel(CODECHAL_MEDIA_STATE_TYPE encFunctionType)5094 MOS_STATUS CodechalEncHevcStateG11::EncodeMbEncKernel(
5095 CODECHAL_MEDIA_STATE_TYPE encFunctionType)
5096 {
5097 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5098
5099 PerfTagSetting perfTag;
5100 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_MBENC_KERNEL);
5101
5102 // Initialize DSH kernel state
5103 PMHW_KERNEL_STATE kernelState;
5104 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
5105 CODECHAL_WALKER_DEGREE walkerDegree;
5106 MHW_WALKER_PARAMS walkerParams;
5107 uint32_t walkerResolutionX, walkerResolutionY;
5108 uint16_t totalThreadNumPerLcu = 1;
5109
5110 if (m_hevcPicParams->CodingType == I_TYPE)
5111 {
5112 encFunctionType = CODECHAL_MEDIA_STATE_HEVC_I_MBENC;
5113 }
5114 else
5115 {
5116 encFunctionType = m_isMaxLcu64 ? CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC : CODECHAL_MEDIA_STATE_HEVC_B_MBENC;
5117 }
5118
5119 if (m_isMaxLcu64)
5120 {
5121 kernelState = &m_mbEncKernelStates[MBENC_LCU64_KRNIDX];
5122 if (m_hevcSeqParams->TargetUsage == 1)
5123 {
5124 walkerResolutionX = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE) >> 6;
5125 walkerResolutionY = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE) >> 6;
5126 }
5127 else
5128 {
5129 walkerResolutionX = 2 * (MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE) >> 6);
5130 walkerResolutionY = 2 * (MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE) >> 6);
5131 }
5132 }
5133 else
5134 {
5135 kernelState = &m_mbEncKernelStates[MBENC_LCU32_KRNIDX];
5136 walkerResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
5137 walkerResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
5138 }
5139
5140 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
5141 walkerCodecParams.WalkerMode = m_walkerMode;
5142 walkerCodecParams.dwResolutionX = walkerResolutionX;
5143 walkerCodecParams.dwResolutionY = walkerResolutionY;
5144 walkerCodecParams.dwNumSlices = m_numSlices;
5145 walkerCodecParams.usTotalThreadNumPerLcu = totalThreadNumPerLcu;
5146
5147 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCustomDispatchPattern(&walkerParams, &walkerCodecParams));
5148
5149 // If Single Task Phase is not enabled, use BT count for the kernel state.
5150 if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
5151 {
5152 uint32_t maxBtCount = m_singleTaskPhaseSupported ?
5153 m_maxBtCount : kernelState->KernelParams.iBTCount;
5154 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
5155 m_stateHeapInterface,
5156 maxBtCount));
5157 m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
5158 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
5159 }
5160
5161 // Set up the DSH/SSH as normal
5162 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
5163 m_stateHeapInterface,
5164 kernelState,
5165 false,
5166 0,
5167 false,
5168 m_storeData));
5169
5170 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
5171 MOS_ZeroMemory(&idParams, sizeof(idParams));
5172 idParams.pKernelState = kernelState;
5173 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
5174 m_stateHeapInterface,
5175 1,
5176 &idParams));
5177
5178 // Generate Lcu Level Data
5179 CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateLcuLevelData(m_lcuLevelInputDataSurface[m_currRecycledBufIdx]));
5180
5181 // Generate Concurrent Thread Group Data
5182 if(m_swScoreboardState->GetDependencyPattern() == dependencyWavefront26Degree ||
5183 m_swScoreboardState->GetDependencyPattern() == dependencyWavefront26ZDegree ||
5184 m_swScoreboardState->GetDependencyPattern() == dependencyWavefront26XDegree ||
5185 m_swScoreboardState->GetDependencyPattern() == dependencyWavefront26XDDegree)
5186 {
5187 // Generate Concurrent Thread Group Data
5188 CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateConcurrentThreadGroupData());
5189 }
5190 else
5191 {
5192 // For 45D walking patter, kernel generates the concurrent thread group by itself. No need for driver to generate.
5193 }
5194
5195 // setup curbe
5196 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeMbEncKernel());
5197
5198 CODECHAL_DEBUG_TOOL(
5199 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5200 encFunctionType,
5201 MHW_DSH_TYPE,
5202 kernelState));
5203
5204 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
5205 encFunctionType,
5206 kernelState));
5207 //CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpHEVCMbEncCurbeG11(
5208 // m_debugInterface,
5209 // encFunctionType,
5210 // &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource)); //to do
5211
5212 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5213 encFunctionType,
5214 MHW_ISH_TYPE,
5215 kernelState));
5216 )
5217
5218 MOS_COMMAND_BUFFER cmdBuffer;
5219 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
5220
5221 SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
5222 sendKernelCmdsParams.EncFunctionType = encFunctionType;
5223 sendKernelCmdsParams.pKernelState = kernelState;
5224 // TO DO : Remove scoreboard from VFE STATE Command
5225 sendKernelCmdsParams.bEnableCustomScoreBoard = false;
5226 sendKernelCmdsParams.pCustomScoreBoard = nullptr;
5227 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
5228
5229 // Add binding table
5230 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
5231 m_stateHeapInterface,
5232 kernelState));
5233
5234 // send surfaces
5235 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMbEncSurfacesKernel(&cmdBuffer));
5236
5237 CODECHAL_DEBUG_TOOL(
5238 if (m_pictureCodingType == I_TYPE)
5239 {
5240 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
5241 &m_lcuLevelInputDataSurface[m_currRecycledBufIdx],
5242 CodechalDbgAttr::attrOutput,
5243 "HEVC_I_MBENC_LcuLevelData_In",
5244 CODECHAL_MEDIA_STATE_HEVC_I_MBENC));
5245 }
5246 else
5247 {
5248 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
5249 &m_lcuLevelInputDataSurface[m_currRecycledBufIdx],
5250 CodechalDbgAttr::attrOutput,
5251 "HEVC_B_MBENC_LcuLevelData_In",
5252 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
5253 }
5254 )
5255
5256 if ((encFunctionType == CODECHAL_MEDIA_STATE_HEVC_B_MBENC) || (encFunctionType == CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC))
5257 {
5258 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5259 &m_encConstantTableForB.sResource,
5260 CodechalDbgAttr::attrOutput,
5261 "HEVC_B_MBENC_ConstantData_In",
5262 m_encConstantTableForB.dwSize,
5263 0,
5264 encFunctionType)));
5265 }
5266
5267 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObjectWalkerCmd(
5268 &cmdBuffer,
5269 &walkerParams));
5270
5271 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
5272
5273 // Add dump for MBEnc surface state heap here
5274 CODECHAL_DEBUG_TOOL(
5275 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5276 encFunctionType,
5277 MHW_SSH_TYPE,
5278 kernelState));
5279 )
5280
5281 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
5282 m_stateHeapInterface,
5283 kernelState));
5284
5285 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5286 {
5287 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
5288 m_stateHeapInterface));
5289 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(
5290 &cmdBuffer,
5291 nullptr));
5292 }
5293
5294 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
5295 &cmdBuffer,
5296 encFunctionType,
5297 nullptr)));
5298
5299 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
5300
5301 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
5302
5303 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5304 {
5305 m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
5306 m_lastTaskInPhase = false;
5307 }
5308
5309 CODECHAL_DEBUG_TOOL(
5310 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5311 &m_debugSurface[0].sResource,
5312 CodechalDbgAttr::attrOutput,
5313 "DebugDataSurface_Out0",
5314 m_debugSurface[0].dwSize,
5315 0,
5316 encFunctionType));
5317 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5318 &m_debugSurface[1].sResource,
5319 CodechalDbgAttr::attrOutput,
5320 "DebugDataSurface_Out1",
5321 m_debugSurface[1].dwSize,
5322 0,
5323 encFunctionType));
5324 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5325 &m_debugSurface[2].sResource,
5326 CodechalDbgAttr::attrOutput,
5327 "DebugDataSurface_Out2",
5328 m_debugSurface[2].dwSize,
5329 0,
5330 encFunctionType));
5331 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5332 &m_debugSurface[3].sResource,
5333 CodechalDbgAttr::attrOutput,
5334 "DebugDataSurface_Out3",
5335 m_debugSurface[3].dwSize,
5336 0,
5337 encFunctionType));
5338 );
5339
5340 #if 0 // the dump should be done in the GetStatusReport. However, if ENC causes PAK hangs-up, there is no way to get them.
5341 {
5342 CODECHAL_DEBUG_TOOL(
5343 CODEC_REF_LIST currRefList;
5344
5345 m_currRefList = (m_refList[m_currReconstructedPic.FrameIdx]);
5346 m_currRefList->RefPic = m_currOriginalPic;
5347
5348 m_debugInterface->m_currPic = m_currOriginalPic;
5349 m_debugInterface->m_bufferDumpFrameNum = m_storeData;
5350 m_debugInterface->m_frameType = m_pictureCodingType;
5351
5352 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5353 &m_currRefList->resRefMbCodeBuffer,
5354 CodechalDbgAttr::attrOutput,
5355 "MbCode",
5356 m_picWidthInMb * m_frameFieldHeightInMb * 64,
5357 CodecHal_PictureIsBottomField(m_currRefList->RefPic) ? m_frameFieldHeightInMb * m_picWidthInMb * 64 : 0,
5358 (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
5359 CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
5360
5361 if (m_mvDataSize)
5362 {
5363 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5364 &currRefList.resRefMvDataBuffer,
5365 CodechalDbgAttr::attrOutput,
5366 "MbData",
5367 m_picWidthInMb * m_frameFieldHeightInMb * (32 * 4),
5368 CodecHal_PictureIsBottomField(currRefList.RefPic) ? MOS_ALIGN_CEIL(m_frameFieldHeightInMb * m_picWidthInMb * (32 * 4), 0x1000) : 0,
5369 (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
5370 CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
5371 }
5372
5373 if (CodecHalIsFeiEncode(m_codecFunction))
5374 {
5375 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5376 &m_resDistortionBuffer,
5377 CodechalDbgAttr::attrOutput,
5378 "DistortionSurf",
5379 m_picWidthInMb * m_frameFieldHeightInMb * 48,
5380 CodecHal_PictureIsBottomField(currRefList.RefPic) ? MOS_ALIGN_CEIL(m_frameFieldHeightInMb * m_picWidthInMb * 48, 0x1000) : 0,
5381 (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
5382 CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
5383 }
5384
5385 )
5386
5387 CODECHAL_DEBUG_TOOL(
5388 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
5389 this,
5390 &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
5391 m_encBCombinedBuffer2[m_currRecycledBufIdx].dwSize,
5392 (const char*)"_Hevc_CombinedBuffer2",
5393 false));
5394 );
5395
5396 // Dump SW scoreboard surface - Output of MBENC
5397 CODECHAL_DEBUG_TOOL(
5398 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpHevcEncodeSwScoreboardSurface(
5399 m_debugInterface,
5400 m_swScoreboardState->GetCurSwScoreboardSurface(), false));
5401 );
5402
5403 CODECHAL_DEBUG_TOOL(
5404 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
5405 this,
5406 &m_encConstantTableForB.sResource,
5407 m_encConstantTableForB.dwSize,
5408 (const char*)"_Hevc_EncConstantTable",
5409 true));
5410 );
5411
5412 CODECHAL_DEBUG_TOOL(
5413 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
5414 this,
5415 &m_debugSurface[0].sResource,
5416 m_debugSurface[0].dwSize,
5417 (const char*)"_Hevc_DebugDump0",
5418 true));
5419 );
5420
5421 CODECHAL_DEBUG_TOOL(
5422 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
5423 this,
5424 &m_debugSurface[1].sResource,
5425 m_debugSurface[1].dwSize,
5426 (const char*)"_Hevc_DebugDump1",
5427 true));
5428 );
5429
5430 CODECHAL_DEBUG_TOOL(
5431 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
5432 this,
5433 &m_debugSurface[2].sResource,
5434 m_debugSurface[2].dwSize,
5435 (const char*)"_Hevc_DebugDump2",
5436 true));
5437 );
5438
5439 CODECHAL_DEBUG_TOOL(
5440 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer(
5441 this,
5442 &m_debugSurface[3].sResource,
5443 m_debugSurface[3].dwSize,
5444 (const char*)"_Hevc_DebugDump3",
5445 true));
5446 );
5447
5448 CODECHAL_DEBUG_TOOL(
5449 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
5450 &m_currPicWithReconBoundaryPix,
5451 CodechalDbgAttr::attrReconstructedSurface,
5452 "ReconSurf")));
5453 }
5454 #endif
5455
5456 return eStatus;
5457 }
5458
EncodeBrcInitResetKernel()5459 MOS_STATUS CodechalEncHevcStateG11::EncodeBrcInitResetKernel()
5460 {
5461 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5462
5463 CODECHAL_ENCODE_FUNCTION_ENTER;
5464
5465 CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
5466
5467 CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx = m_brcInit ? CODECHAL_HEVC_BRC_INIT : CODECHAL_HEVC_BRC_RESET;
5468
5469 // Initialize DSH kernel state
5470 PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
5471
5472 // If Single Task Phase is not enabled, use BT count for the kernel state.
5473 if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
5474 {
5475 uint32_t maxBtCount = m_singleTaskPhaseSupported ?
5476 m_maxBtCount : kernelState->KernelParams.iBTCount;
5477 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
5478 m_stateHeapInterface,
5479 maxBtCount));
5480 m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
5481 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
5482 }
5483
5484 // Set up the DSH/SSH as normal
5485 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
5486 m_stateHeapInterface,
5487 kernelState,
5488 false,
5489 0,
5490 false,
5491 m_storeData));
5492
5493 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
5494 MOS_ZeroMemory(&idParams, sizeof(idParams));
5495 idParams.pKernelState = kernelState;
5496 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
5497 m_stateHeapInterface,
5498 1,
5499 &idParams));
5500
5501 // Setup curbe for BrcInitReset kernel
5502 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeBrcInitReset(
5503 brcKrnIdx));
5504
5505 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_BRC_INIT_RESET;
5506 CODECHAL_DEBUG_TOOL(
5507 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5508 encFunctionType,
5509 MHW_DSH_TYPE,
5510 kernelState));
5511 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
5512 encFunctionType,
5513 kernelState));
5514 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5515 encFunctionType,
5516 MHW_ISH_TYPE,
5517 kernelState));
5518 )
5519
5520 MOS_COMMAND_BUFFER cmdBuffer;
5521 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
5522
5523 SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
5524 sendKernelCmdsParams.EncFunctionType = encFunctionType;
5525 sendKernelCmdsParams.pKernelState = kernelState;
5526 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
5527
5528 // Add binding table
5529 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
5530 m_stateHeapInterface,
5531 kernelState));
5532
5533 // Send surfaces for BrcInitReset Kernel
5534 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendBrcInitResetSurfaces(&cmdBuffer, brcKrnIdx));
5535
5536 MHW_MEDIA_OBJECT_PARAMS mediaObjectParams;
5537 MOS_ZeroMemory(&mediaObjectParams, sizeof(mediaObjectParams));
5538
5539 MediaObjectInlineData mediaObjectInlineData;
5540 MOS_ZeroMemory(&mediaObjectInlineData, sizeof(mediaObjectInlineData));
5541 mediaObjectParams.pInlineData = &mediaObjectInlineData;
5542 mediaObjectParams.dwInlineDataSize = sizeof(mediaObjectInlineData);
5543 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObject(
5544 &cmdBuffer,
5545 nullptr,
5546 &mediaObjectParams));
5547
5548 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
5549
5550 // Add dump for BrcInitReset surface state heap here
5551 CODECHAL_DEBUG_TOOL(
5552 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5553 encFunctionType,
5554 MHW_SSH_TYPE,
5555 kernelState));
5556 )
5557
5558 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
5559 m_stateHeapInterface,
5560 kernelState));
5561 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5562 {
5563 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
5564 m_stateHeapInterface));
5565 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(
5566 &cmdBuffer,
5567 nullptr));
5568 }
5569
5570 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
5571 &cmdBuffer,
5572 encFunctionType,
5573 nullptr)));
5574
5575 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
5576
5577 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
5578
5579 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5580 {
5581 m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
5582 m_lastTaskInPhase = false;
5583 }
5584
5585 return eStatus;
5586 }
5587
EncodeBrcFrameUpdateKernel()5588 MOS_STATUS CodechalEncHevcStateG11::EncodeBrcFrameUpdateKernel()
5589 {
5590 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5591
5592 CODECHAL_ENCODE_FUNCTION_ENTER;
5593
5594 PerfTagSetting perfTag;
5595 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE);
5596
5597 CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx = CODECHAL_HEVC_BRC_FRAME_UPDATE;
5598
5599 // Initialize DSH kernel state
5600 PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
5601
5602 // If Single Task Phase is not enabled, use BT count for the kernel state.
5603 if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
5604 {
5605 uint32_t maxBtCount = m_singleTaskPhaseSupported ?
5606 m_maxBtCount : kernelState->KernelParams.iBTCount;
5607 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
5608 m_stateHeapInterface,
5609 maxBtCount));
5610 m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
5611 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
5612 }
5613
5614 // Set up the DSH/SSH as normal
5615 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
5616 m_stateHeapInterface,
5617 kernelState,
5618 false,
5619 0,
5620 false,
5621 m_storeData));
5622
5623 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
5624 MOS_ZeroMemory(&idParams, sizeof(idParams));
5625 idParams.pKernelState = kernelState;
5626 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
5627 m_stateHeapInterface,
5628 1,
5629 &idParams));
5630
5631 // Setup curbe for BrcFrameUpdate kernel
5632 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeBrcUpdate(
5633 brcKrnIdx));
5634
5635 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_BRC_UPDATE;
5636 CODECHAL_DEBUG_TOOL(
5637 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5638 encFunctionType,
5639 MHW_DSH_TYPE,
5640 kernelState));
5641 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
5642 encFunctionType,
5643 kernelState));
5644 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5645 encFunctionType,
5646 MHW_ISH_TYPE,
5647 kernelState));
5648 )
5649
5650 MOS_COMMAND_BUFFER cmdBuffer;
5651 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
5652
5653 SendKernelCmdsParams sendKernelCmdsParams;
5654 sendKernelCmdsParams = SendKernelCmdsParams();
5655 sendKernelCmdsParams.EncFunctionType = encFunctionType;
5656 sendKernelCmdsParams.pKernelState = kernelState;
5657 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
5658
5659 // Add binding table
5660 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
5661 m_stateHeapInterface,
5662 kernelState));
5663
5664 // Send surfaces for BrcFrameUpdate Kernel
5665 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendBrcFrameUpdateSurfaces(&cmdBuffer));
5666
5667 MHW_MEDIA_OBJECT_PARAMS mediaObjectParams;
5668 MOS_ZeroMemory(&mediaObjectParams, sizeof(mediaObjectParams));
5669
5670 MediaObjectInlineData mediaObjectInlineData;
5671 MOS_ZeroMemory(&mediaObjectInlineData, sizeof(mediaObjectInlineData));
5672 mediaObjectParams.pInlineData = &mediaObjectInlineData;
5673 mediaObjectParams.dwInlineDataSize = sizeof(mediaObjectInlineData);
5674 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObject(
5675 &cmdBuffer,
5676 nullptr,
5677 &mediaObjectParams));
5678
5679 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
5680
5681 // Add dump for BrcFrameUpdate surface state heap here
5682 CODECHAL_DEBUG_TOOL(
5683 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5684 encFunctionType,
5685 MHW_SSH_TYPE,
5686 kernelState));
5687 )
5688
5689 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
5690 m_stateHeapInterface,
5691 kernelState));
5692 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5693 {
5694 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
5695 m_stateHeapInterface));
5696 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(
5697 &cmdBuffer,
5698 nullptr));
5699 }
5700
5701 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
5702 &cmdBuffer,
5703 encFunctionType,
5704 nullptr)));
5705
5706 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
5707
5708 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
5709
5710 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5711 {
5712 m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
5713 m_lastTaskInPhase = false;
5714 }
5715
5716 return eStatus;
5717 }
5718
EncodeBrcLcuUpdateKernel()5719 MOS_STATUS CodechalEncHevcStateG11::EncodeBrcLcuUpdateKernel()
5720 {
5721 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5722
5723 CODECHAL_ENCODE_FUNCTION_ENTER;
5724
5725 PerfTagSetting perfTag;
5726 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE_LCU);
5727
5728 CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx = CODECHAL_HEVC_BRC_LCU_UPDATE;
5729
5730 // Initialize DSH kernel state
5731 PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
5732
5733 // If Single Task Phase is not enabled, use BT count for the kernel state.
5734 if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
5735 {
5736 uint32_t maxBtCount = m_singleTaskPhaseSupported ?
5737 m_maxBtCount : kernelState->KernelParams.iBTCount;
5738 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
5739 m_stateHeapInterface,
5740 maxBtCount));
5741 m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
5742 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
5743 }
5744
5745 // Set up the DSH/SSH as normal
5746 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
5747 m_stateHeapInterface,
5748 kernelState,
5749 false,
5750 0,
5751 false,
5752 m_storeData));
5753
5754 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
5755 MOS_ZeroMemory(&idParams, sizeof(idParams));
5756 idParams.pKernelState = kernelState;
5757 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
5758 m_stateHeapInterface,
5759 1,
5760 &idParams));
5761
5762 // Setup curbe for BrcFrameUpdate kernel
5763 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeBrcUpdate(
5764 brcKrnIdx));
5765
5766 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_MB_BRC_UPDATE;
5767
5768 CODECHAL_DEBUG_TOOL(
5769 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5770 encFunctionType,
5771 MHW_DSH_TYPE,
5772 kernelState));
5773 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
5774 encFunctionType,
5775 kernelState));
5776 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5777 encFunctionType,
5778 MHW_ISH_TYPE,
5779 kernelState));
5780 )
5781
5782 MOS_COMMAND_BUFFER cmdBuffer;
5783 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
5784
5785 SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
5786 sendKernelCmdsParams.EncFunctionType = encFunctionType;
5787 sendKernelCmdsParams.pKernelState = kernelState;
5788 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
5789
5790 // Add binding table
5791 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
5792 m_stateHeapInterface,
5793 kernelState));
5794
5795 if (m_hevcPicParams->NumROI)
5796 {
5797 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupROISurface());
5798 }
5799
5800 // Send surfaces for BrcFrameUpdate Kernel
5801 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendBrcLcuUpdateSurfaces(&cmdBuffer));
5802
5803 // Program Media walker
5804 uint32_t resolutionX, resolutionY;
5805 resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth);
5806 resolutionX = MOS_ROUNDUP_SHIFT(resolutionX, 4);
5807 resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight);
5808 resolutionY = MOS_ROUNDUP_SHIFT(resolutionY, 3);
5809 CODECHAL_ENCODE_ASSERTMESSAGE("LucBRC thread space = %d x %d", resolutionX, resolutionY);
5810
5811 MHW_WALKER_PARAMS walkerParams;
5812 MOS_ZeroMemory(&walkerParams, sizeof(walkerParams));
5813
5814 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
5815 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
5816 walkerCodecParams.WalkerMode = m_walkerMode;
5817 walkerCodecParams.dwResolutionX = resolutionX;
5818 walkerCodecParams.dwResolutionY = resolutionY;
5819 walkerCodecParams.bNoDependency = true;
5820 walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported;
5821 walkerCodecParams.ucGroupId = m_groupId;
5822 walkerCodecParams.wPictureCodingType = m_pictureCodingType;
5823 walkerCodecParams.bUseScoreboard = false;
5824
5825 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
5826 m_hwInterface,
5827 &walkerParams,
5828 &walkerCodecParams));
5829
5830 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObjectWalkerCmd(
5831 &cmdBuffer,
5832 &walkerParams));
5833
5834 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
5835
5836 // Add dump for BrcFrameUpdate surface state heap here
5837 CODECHAL_DEBUG_TOOL(
5838 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5839 encFunctionType,
5840 MHW_SSH_TYPE,
5841 kernelState));
5842 )
5843
5844 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
5845 m_stateHeapInterface,
5846 kernelState));
5847 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5848 {
5849 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
5850 m_stateHeapInterface));
5851 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(
5852 &cmdBuffer,
5853 nullptr));
5854 }
5855
5856 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
5857 &cmdBuffer,
5858 encFunctionType,
5859 nullptr)));
5860
5861 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
5862
5863 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
5864
5865 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5866 {
5867 m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
5868 m_lastTaskInPhase = false;
5869 }
5870
5871 return eStatus;
5872 }
5873
EncodeKernelFunctions()5874 MOS_STATUS CodechalEncHevcStateG11::EncodeKernelFunctions()
5875 {
5876 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5877 typedef void (* pAppCallBack)();
5878
5879 if (m_pakOnlyTest)
5880 {
5881 // Skip ENC when PAK only mode is enabled
5882 return eStatus;
5883 }
5884
5885 if (m_pictureCodingType == P_TYPE)
5886 {
5887 m_lowDelay = true;
5888 }
5889
5890 if (m_hevcPicParams->bUsedAsRef || m_brcEnabled)
5891 {
5892 m_currRefSync = &m_refSync[m_currMbCodeIdx];
5893
5894 // Check if the signal obj has been used before
5895 if (!m_hevcSeqParams->ParallelBRC && (m_currRefSync->uiSemaphoreObjCount || m_currRefSync->bInUsed))
5896 {
5897 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
5898 syncParams.GpuContext = m_renderContext;
5899 syncParams.presSyncResource = &m_currRefSync->resSyncObject;
5900 syncParams.uiSemaphoreCount = m_currRefSync->uiSemaphoreObjCount;
5901
5902 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
5903 m_currRefSync->uiSemaphoreObjCount = 0;
5904 m_currRefSync->bInUsed = false;
5905 }
5906 }
5907 else
5908 {
5909 m_currRefSync = nullptr;
5910 }
5911
5912 //Reset to use a different performance tag ID
5913 m_osInterface->pfnResetPerfBufferID(m_osInterface);
5914
5915 m_firstTaskInPhase = true;
5916 m_lastTaskInPhase = false;
5917
5918 m_brcInputForEncKernelBuffer = &m_encBCombinedBuffer2[m_currRecycledBufIdx];
5919
5920 // BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface
5921 // BRC init is called once even for CQP mode when ROI is enabled, hence also checking for first frame flag
5922 if ((m_brcEnabled && (m_brcInit || m_brcReset)) || (m_firstFrame && m_hevcPicParams->NumROI))
5923 {
5924 m_firstTaskInPhase = m_lastTaskInPhase = true;
5925 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcInitResetKernel());
5926 m_brcInit = m_brcReset = false;
5927 }
5928
5929 m_firstTaskInPhase = true;
5930 m_lastTaskInPhase = false;
5931
5932 CodechalEncodeSwScoreboard::KernelParams swScoreboardKernelParames;
5933 MOS_ZeroMemory(&swScoreboardKernelParames, sizeof(swScoreboardKernelParames));
5934 // SW scoreboard Kernel Call -- to be continued - DS + HME kernel call
5935 swScoreboardKernelParames.isHevc = false; // can be set to false. Need to enabled only for an optimization which is not needed for now
5936
5937 m_degree45Needed = true;
5938 if (m_hevcSeqParams->TargetUsage == 1)
5939 {
5940 m_numberConcurrentGroup = MOS_MIN(m_maxWavefrontsforTU1, m_numberConcurrentGroup);
5941 // m_numberConcurrentGroup should default to 2 here for TU1. the only other value allowed from reg key will be 1
5942 m_degree45Needed = false;
5943 }
5944
5945 DecideConcurrentGroupAndWaveFrontNumber();
5946
5947 DependencyPattern walkPattern;
5948 if (m_hevcSeqParams->TargetUsage == 1)
5949 {
5950 if (m_isMaxLcu64)
5951 {
5952 walkPattern = m_numberConcurrentGroup == 1 ? dependencyWavefront26XDegreeAlt:dependencyWavefront26XDDegree;
5953 }
5954 else
5955 {
5956 walkPattern = m_numberConcurrentGroup == 1 ? dependencyWavefront26Degree:dependencyWavefront26DDegree;
5957 }
5958 }
5959 else if (m_hevcSeqParams->TargetUsage == 4)
5960 {
5961 walkPattern = m_numberConcurrentGroup == 1 ? dependencyWavefront45Degree:dependencyWavefront45DDegree;
5962 }
5963 else
5964 {
5965 walkPattern = dependencyWavefront45DDegree;
5966 }
5967 m_swScoreboardState->SetDependencyPattern(walkPattern);
5968
5969 if (m_isMaxLcu64)
5970 {
5971 if (m_hevcSeqParams->TargetUsage == 1)
5972 {
5973 swScoreboardKernelParames.scoreboardWidth = (m_widthAlignedMaxLcu >> 6);
5974 swScoreboardKernelParames.scoreboardHeight = (m_heightAlignedMaxLcu >> 6) * m_numberEncKernelSubThread;
5975 }
5976 else
5977 {
5978 swScoreboardKernelParames.scoreboardWidth = 2*(m_widthAlignedMaxLcu >> 6);
5979 swScoreboardKernelParames.scoreboardHeight = 2*(m_heightAlignedMaxLcu >> 6);
5980 }
5981 swScoreboardKernelParames.numberOfWaveFrontSplit = m_numberConcurrentGroup;
5982 swScoreboardKernelParames.numberOfChildThread = m_numberEncKernelSubThread - 1; // child thread number is minus one of the total sub-thread for the main thread takes one.
5983
5984 }
5985 else
5986 {
5987 swScoreboardKernelParames.scoreboardWidth = 4*(m_widthAlignedLcu32 >> 5);
5988 swScoreboardKernelParames.scoreboardHeight = m_heightAlignedLcu32 >> 5;
5989 swScoreboardKernelParames.numberOfWaveFrontSplit = m_numberConcurrentGroup;
5990 swScoreboardKernelParames.numberOfChildThread = 0;
5991 }
5992 swScoreboardKernelParames.swScoreboardSurfaceWidth = swScoreboardKernelParames.scoreboardWidth;
5993 swScoreboardKernelParames.swScoreboardSurfaceHeight = swScoreboardKernelParames.scoreboardHeight;
5994
5995 m_swScoreboardState->SetCurSwScoreboardSurfaceIndex(m_currRecycledBufIdx);
5996
5997 swScoreboardKernelParames.lcuInfoSurface = &m_lcuLevelInputDataSurface[m_currRecycledBufIdx];
5998
5999 if(m_useSwInitScoreboard)
6000 {
6001 SetupSwScoreBoard(&swScoreboardKernelParames);
6002 }
6003 else
6004 {
6005 // Call SW scoreboard Init kernel used by MBEnc kernel
6006 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_swScoreboardState->Execute(&swScoreboardKernelParames));
6007 }
6008
6009 // Dump SW scoreboard surface - Output of SW scoreboard Init Kernel and Input to MBENC
6010 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
6011 m_swScoreboardState->GetCurSwScoreboardSurface(),
6012 CodechalDbgAttr::attrInput,
6013 "InitSWScoreboard_In",
6014 CODECHAL_MEDIA_STATE_SW_SCOREBOARD_INIT)));
6015
6016 // Csc, Downscaling, and/or 10-bit to 8-bit conversion
6017 CODECHAL_ENCODE_CHK_NULL_RETURN(m_cscDsState);
6018
6019 CodechalEncodeCscDs::KernelParams cscScalingKernelParams;
6020 MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams));
6021 cscScalingKernelParams.bLastTaskInPhaseCSC =
6022 cscScalingKernelParams.bLastTaskInPhase4xDS = !(m_16xMeSupported || m_hmeEnabled || m_brcEnabled);
6023 cscScalingKernelParams.bLastTaskInPhase16xDS = !(m_32xMeSupported || m_hmeEnabled || m_brcEnabled);
6024 cscScalingKernelParams.bLastTaskInPhase32xDS = !(m_hmeEnabled || m_brcEnabled);
6025
6026 CodechalEncodeCscDsG11::HevcExtKernelParams hevcExtCscParams;
6027 MOS_ZeroMemory(&hevcExtCscParams, sizeof(hevcExtCscParams));
6028
6029 if (m_isMaxLcu64)
6030 {
6031 hevcExtCscParams.bHevcEncHistorySum = true;
6032 hevcExtCscParams.bUseLCU32 = false;
6033 hevcExtCscParams.presHistoryBuffer = &m_encBCombinedBuffer2[m_lastRecycledBufIdx].sResource;
6034 hevcExtCscParams.dwSizeHistoryBuffer = m_historyOutBufferSize;
6035 hevcExtCscParams.dwOffsetHistoryBuffer = m_historyOutBufferOffset;
6036 hevcExtCscParams.presHistorySumBuffer = &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource;
6037 hevcExtCscParams.dwSizeHistorySumBuffer = sizeof(MBENC_COMBINED_BUFFER2::ucHistoryInBuffer);
6038 hevcExtCscParams.dwOffsetHistorySumBuffer = sizeof(MBENC_COMBINED_BUFFER2::ucBrcCombinedEncBuffer);
6039 hevcExtCscParams.presMultiThreadTaskBuffer = &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource;
6040 hevcExtCscParams.dwSizeMultiThreadTaskBuffer = m_threadTaskBufferSize;
6041 hevcExtCscParams.dwOffsetMultiThreadTaskBuffer = m_threadTaskBufferOffset;
6042 cscScalingKernelParams.hevcExtParams = &hevcExtCscParams;
6043 }
6044 else
6045 {
6046 cscScalingKernelParams.hevcExtParams = nullptr; // LCU32 does not require history buffers
6047 }
6048
6049 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->KernelFunctions(&cscScalingKernelParams));
6050
6051 if (m_hmeEnabled)
6052 {
6053 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel());
6054 }
6055 else if (m_brcEnabled && m_hevcPicParams->CodingType == I_TYPE)
6056 {
6057 m_lastTaskInPhase = true;
6058
6059 CodechalKernelIntraDist::CurbeParam curbeParam;
6060 curbeParam.downScaledWidthInMb4x = m_downscaledWidthInMb4x;
6061 curbeParam.downScaledHeightInMb4x = m_downscaledHeightInMb4x;
6062
6063 CodechalKernelIntraDist::SurfaceParams surfaceParam;
6064 surfaceParam.input4xDsSurface =
6065 surfaceParam.input4xDsVmeSurface = m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
6066 surfaceParam.intraDistSurface = m_brcDistortion;
6067 surfaceParam.intraDistBottomFieldOffset = m_brcBuffers.dwMeBrcDistortionBottomFieldOffset;
6068 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_intraDistKernel->Execute(curbeParam, surfaceParam));
6069 }
6070
6071 // BRC + MbEnc in second task phase
6072 m_firstTaskInPhase = true;
6073 m_lastTaskInPhase = false;
6074
6075 // Wait for PAK if necessary
6076 CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitForPak());
6077
6078 // ROI uses the BRC LCU update kernel, even in CQP. So we will call it
6079 if (m_hevcPicParams->NumROI && !m_brcEnabled)
6080 {
6081 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcLcuUpdateKernel());
6082 m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame;
6083
6084 CODECHAL_DEBUG_TOOL(
6085 if (!Mos_ResourceIsNull(&m_brcBuffers.sBrcMbQpBuffer.OsResource)) {
6086 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6087 &m_brcBuffers.sBrcMbQpBuffer.OsResource,
6088 CodechalDbgAttr::attrOutput,
6089 "MbQp",
6090 m_brcBuffers.sBrcMbQpBuffer.dwPitch * m_brcBuffers.sBrcMbQpBuffer.dwHeight,
6091 m_brcBuffers.dwBrcMbQpBottomFieldOffset,
6092 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6093 }
6094 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6095 &m_brcDistortion->OsResource,
6096 CodechalDbgAttr::attrInput,
6097 "BrcDist_AfterLcuBrc",
6098 m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
6099 m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
6100 CODECHAL_MEDIA_STATE_BRC_UPDATE));)
6101 }
6102
6103 if (m_brcEnabled)
6104 {
6105 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcFrameUpdateKernel());
6106 CODECHAL_DEBUG_TOOL(
6107 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6108 &m_brcDistortion->OsResource,
6109 CodechalDbgAttr::attrInput,
6110 "BrcDist_AfterFrameBrc",
6111 m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
6112 m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
6113 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6114 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6115 &m_brcBuffers.resBrcHistoryBuffer,
6116 CodechalDbgAttr::attrOutput,
6117 "HistoryWrite",
6118 m_brcHistoryBufferSize,
6119 0,
6120 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6121 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6122 &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
6123 CodechalDbgAttr::attrOutput,
6124 "ImgStateWrite",
6125 BRC_IMG_STATE_SIZE_PER_PASS_G11 * m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(),
6126 0,
6127 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6128 )
6129
6130 if (m_lcuBrcEnabled || m_hevcPicParams->NumROI)
6131 {
6132 // LCU-based BRC needs to have frame-based one to be call first in order to get HCP_IMG_STATE command result
6133 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcLcuUpdateKernel());
6134 m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame;
6135 }
6136 else
6137 {
6138 m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame;
6139 }
6140
6141 CODECHAL_DEBUG_TOOL(
6142 if (!Mos_ResourceIsNull(&m_brcBuffers.sBrcMbQpBuffer.OsResource))
6143 {
6144 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6145 &m_brcBuffers.sBrcMbQpBuffer.OsResource,
6146 CodechalDbgAttr::attrOutput,
6147 "MbQp",
6148 m_brcBuffers.sBrcMbQpBuffer.dwPitch * m_brcBuffers.sBrcMbQpBuffer.dwHeight,
6149 m_brcBuffers.dwBrcMbQpBottomFieldOffset,
6150 CODECHAL_MEDIA_STATE_BRC_UPDATE));
6151 }
6152 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
6153 &m_brcDistortion->OsResource,
6154 CodechalDbgAttr::attrInput,
6155 "BrcDist_AfterLcuBrc",
6156 m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight,
6157 m_brcBuffers.dwMeBrcDistortionBottomFieldOffset,
6158 CODECHAL_MEDIA_STATE_BRC_UPDATE));)
6159 }
6160
6161 m_useWeightedSurfaceForL0 = false;
6162 m_useWeightedSurfaceForL1 = false;
6163
6164 //currently only support same weightoffset for all slices, and only support Luma weighted prediction
6165 auto slicetype = m_hevcSliceParams->slice_type;
6166 if (m_weightedPredictionSupported && !m_feiEnable &&
6167 ((slicetype == CODECHAL_HEVC_P_SLICE && m_hevcPicParams->weighted_pred_flag) ||
6168 (slicetype == CODECHAL_HEVC_B_SLICE && m_hevcPicParams->weighted_bipred_flag)))
6169 {
6170 uint32_t LumaWeightFlag[2] = {0}; //[L0, L1]
6171 CodechalEncodeWP::SliceParams sliceWPParams;
6172 memset((void *)&sliceWPParams, 0, sizeof(sliceWPParams));
6173
6174 //populate the slice WP parameter structure
6175 sliceWPParams.luma_log2_weight_denom = m_hevcSliceParams->luma_log2_weight_denom; // luma weidht denom
6176 for (auto i = 0; i < 2; i++)
6177 {
6178 for (auto j = 0; j < CODEC_MAX_NUM_REF_FRAME_HEVC; j++)
6179 {
6180 sliceWPParams.weights[i][j][0][0] = (1 << m_hevcSliceParams->luma_log2_weight_denom) +
6181 m_hevcSliceParams->delta_luma_weight[i][j]; //Luma weight
6182 sliceWPParams.weights[i][j][0][1] = m_hevcSliceParams->luma_offset[i][j]; //Luma offset
6183
6184 if (m_hevcSliceParams->delta_luma_weight[i][j] || m_hevcSliceParams->luma_offset[i][j])
6185 {
6186 LumaWeightFlag[i] |= (1 << j);
6187 }
6188 }
6189 }
6190
6191 CodechalEncodeWP::KernelParams wpKernelParams;
6192 memset((void *)&wpKernelParams, 0, sizeof(wpKernelParams));
6193 wpKernelParams.useWeightedSurfaceForL0 = &m_useWeightedSurfaceForL0;
6194 wpKernelParams.useWeightedSurfaceForL1 = &m_useWeightedSurfaceForL1;
6195 wpKernelParams.slcWPParams = &sliceWPParams;
6196
6197 // Weighted Prediction to be applied for L0
6198 for (auto i = 0; i < (m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1); i++)
6199 {
6200 if((LumaWeightFlag[LIST_0] & (1 << i)) && (i < CODEC_MAX_FORWARD_WP_FRAME))
6201 {
6202 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][i];
6203 if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
6204 {
6205 MOS_SURFACE refFrameInput;
6206 uint8_t frameIndex = m_picIdx[refPic.FrameIdx].ucPicIdx;
6207 refFrameInput = m_hevcPicParams->bUseRawPicForRef ? m_refList[frameIndex]->sRefRawBuffer : m_refList[frameIndex]->sRefReconBuffer;
6208
6209 //Weighted Prediction for ith forward reference frame
6210 wpKernelParams.useRefPicList1 = false;
6211 wpKernelParams.wpIndex = i;
6212 wpKernelParams.refFrameInput = &refFrameInput;
6213 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_wpState->Execute(&wpKernelParams));
6214 }
6215 }
6216 }
6217
6218 // Weighted Predition to be applied for L1
6219 if (slicetype == CODECHAL_HEVC_B_SLICE && m_hevcPicParams->weighted_bipred_flag)
6220 {
6221 for (auto i = 0; i < (m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1); i++)
6222 {
6223 if((LumaWeightFlag[LIST_1] & (1 << i)) && (i < CODEC_MAX_BACKWARD_WP_FRAME))
6224 {
6225 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_1][i];
6226 if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
6227 {
6228 MOS_SURFACE refFrameInput;
6229 uint8_t frameIndex = m_picIdx[refPic.FrameIdx].ucPicIdx;
6230 refFrameInput = m_hevcPicParams->bUseRawPicForRef ? m_refList[frameIndex]->sRefRawBuffer : m_refList[frameIndex]->sRefReconBuffer;
6231
6232 //Weighted Prediction for ith backward reference frame
6233 wpKernelParams.useRefPicList1 = true;
6234 wpKernelParams.wpIndex = i;
6235 wpKernelParams.refFrameInput = &refFrameInput;
6236 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_wpState->Execute(&wpKernelParams));
6237 }
6238 }
6239 }
6240 }
6241 }
6242
6243 #if (_DEBUG || _RELEASE_INTERNAL)
6244
6245 MOS_USER_FEATURE_VALUE_WRITE_DATA userFeatureWriteData;
6246 // Weighted prediction for L0 Reporting
6247 userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__;
6248 userFeatureWriteData.Value.i32Data = m_useWeightedSurfaceForL0;
6249 userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_WEIGHTED_PREDICTION_L0_IN_USE_ID;
6250 MOS_UserFeature_WriteValues_ID(NULL, &userFeatureWriteData, 1, m_osInterface->pOsContext);
6251 // Weighted prediction for L1 Reporting
6252 userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__;
6253 userFeatureWriteData.Value.i32Data = m_useWeightedSurfaceForL1;
6254 userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_WEIGHTED_PREDICTION_L1_IN_USE_ID;
6255 MOS_UserFeature_WriteValues_ID(NULL, &userFeatureWriteData, 1, m_osInterface->pOsContext);
6256
6257 #endif // _DEBUG || _RELEASE_INTERNAL
6258
6259 // Reset to use a different performance tag ID
6260 m_osInterface->pfnResetPerfBufferID(m_osInterface);
6261
6262 m_lastTaskInPhase = true;
6263
6264 if (m_hevcPicParams->CodingType == I_TYPE)
6265 {
6266 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMbEncKernel(CODECHAL_MEDIA_STATE_HEVC_I_MBENC));
6267 }
6268 else
6269 {
6270 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMbEncKernel(m_isMaxLcu64 ? CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC : CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
6271 }
6272
6273 // Notify PAK engine once ENC is done
6274 if (!Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
6275 {
6276 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
6277 syncParams.GpuContext = m_renderContext;
6278 syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
6279
6280 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
6281 }
6282
6283 if (m_brcEnabled)
6284 {
6285 if (m_hevcSeqParams->ParallelBRC)
6286 {
6287 m_brcBuffers.uiCurrBrcPakStasIdxForRead =
6288 (m_brcBuffers.uiCurrBrcPakStasIdxForRead + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
6289 }
6290 }
6291
6292 CODECHAL_DEBUG_TOOL(
6293 uint8_t index;
6294 CODEC_PICTURE refPic;
6295 if (m_useWeightedSurfaceForL0) {
6296 refPic = m_hevcSliceParams->RefPicList[LIST_0][0];
6297 index = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
6298
6299 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
6300 &m_refList[index]->sRefBuffer,
6301 CodechalDbgAttr::attrReferenceSurfaces,
6302 "WP_In_L0")));
6303
6304 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
6305 m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L0_START + 0),
6306 CodechalDbgAttr::attrReferenceSurfaces,
6307 "WP_Out_L0")));
6308 } if (m_useWeightedSurfaceForL1) {
6309
6310 refPic = m_hevcSliceParams->RefPicList[LIST_1][0];
6311 index = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
6312
6313 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
6314 &m_refList[index]->sRefBuffer,
6315 CodechalDbgAttr::attrReferenceSurfaces,
6316 "WP_In_L1")));
6317
6318 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
6319 m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L1_START + 0),
6320 CodechalDbgAttr::attrReferenceSurfaces,
6321 "WP_Out_L1")));
6322 })
6323
6324 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
6325 &m_scratchSurface,
6326 CodechalDbgAttr::attrInput,
6327 "Scratch_Surface",
6328 CODECHAL_MEDIA_STATE_HEVC_I_MBENC)));
6329
6330 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
6331 &m_intermediateCuRecordSurfaceLcu32,
6332 CodechalDbgAttr::attrInput,
6333 "IntermediateCURecord_Surface",
6334 CODECHAL_MEDIA_STATE_HEVC_I_MBENC)));
6335 pAppCallBack pCallBack;
6336 pCallBack = (pAppCallBack) m_encodeParams.plastEncKernelSubmissionCompleteCallback;
6337 if(pCallBack != NULL)
6338 pCallBack();
6339
6340 m_lastPictureCodingType = m_pictureCodingType;
6341 m_lastRecycledBufIdx = m_currRecycledBufIdx;
6342
6343 return eStatus;
6344 }
6345
InitKernelState()6346 MOS_STATUS CodechalEncHevcStateG11::InitKernelState()
6347 {
6348 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6349
6350 CODECHAL_ENCODE_FUNCTION_ENTER;
6351
6352 // Init kernel state
6353 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMbEnc());
6354 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateBrc());
6355
6356 // Create weighted prediction kernel state
6357 CODECHAL_ENCODE_CHK_NULL_RETURN(m_wpState = MOS_New(CodechalEncodeWPG11, this));
6358 m_wpState->SetKernelBase(m_kernelBase);
6359 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_wpState->InitKernelState());
6360 // create intra distortion kernel
6361 m_intraDistKernel = MOS_New(CodechalKernelIntraDist, this);
6362 CODECHAL_ENCODE_CHK_NULL_RETURN(m_intraDistKernel);
6363 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_intraDistKernel->Initialize(
6364 GetCommonKernelHeaderAndSizeG11,
6365 m_kernelBase,
6366 m_kuidCommon));
6367 // Create Hme kernel
6368 m_hmeKernel = MOS_New(CodechalKernelHmeG11, this);
6369 CODECHAL_ENCODE_CHK_NULL_RETURN(m_hmeKernel);
6370 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Initialize(
6371 GetCommonKernelHeaderAndSizeG11,
6372 m_kernelBase,
6373 m_kuidCommon));
6374
6375 // Create SW scoreboard init kernel state
6376 CODECHAL_ENCODE_CHK_NULL_RETURN(m_swScoreboardState = MOS_New(CodechalEncodeSwScoreboardG11, this));
6377 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_swScoreboardState->InitKernelState());
6378
6379 return eStatus;
6380 }
6381
SetDmemHuCPakIntegrate(PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)6382 MOS_STATUS CodechalEncHevcStateG11::SetDmemHuCPakIntegrate(
6383 PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)
6384 {
6385 CODECHAL_ENCODE_FUNCTION_ENTER;
6386
6387 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6388
6389 MOS_LOCK_PARAMS lockFlagsWriteOnly;
6390 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
6391 lockFlagsWriteOnly.WriteOnly = true;
6392
6393 int32_t currentPass = GetCurrentPass();
6394 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES || !m_brcEnabled)
6395 {
6396 eStatus = MOS_STATUS_INVALID_PARAMETER;
6397 return eStatus;
6398 }
6399
6400 HucPakStitchDmemEncG11* hucPakStitchDmem = (HucPakStitchDmemEncG11*)m_osInterface->pfnLockResource(
6401 m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]), &lockFlagsWriteOnly);
6402 CODECHAL_ENCODE_CHK_NULL_RETURN(hucPakStitchDmem);
6403
6404 MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakStitchDmemEncG11));
6405
6406 // reset all the offsets to -1
6407 uint32_t TotalOffsetSize = sizeof(hucPakStitchDmem->TileSizeRecord_offset) +
6408 sizeof(hucPakStitchDmem->VDENCSTAT_offset) +
6409 sizeof(hucPakStitchDmem->HEVC_PAKSTAT_offset) +
6410 sizeof(hucPakStitchDmem->HEVC_Streamout_offset) +
6411 sizeof(hucPakStitchDmem->VP9_PAK_STAT_offset) +
6412 sizeof(hucPakStitchDmem->Vp9CounterBuffer_offset);
6413 MOS_FillMemory(hucPakStitchDmem, TotalOffsetSize, 0xFF);
6414
6415 uint16_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
6416 uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
6417 CODECHAL_ENCODE_ASSERT(numTileColumns > 0 && numTileColumns % 2 == 0); //numTileColumns is nonzero and even number; 2 or 4
6418 CODECHAL_ENCODE_ASSERT(m_numPipe > 0 && m_numPipe % 2 == 0 && numTileColumns <= m_numPipe); //ucNumPipe is nonzero and even number; 2 or 4
6419 uint16_t numTiles = numTileRows * numTileColumns;
6420 uint16_t numTilesPerPipe = m_numTiles / m_numPipe;
6421
6422 hucPakStitchDmem->PicWidthInPixel = (uint16_t)m_frameWidth;
6423 hucPakStitchDmem->PicHeightInPixel = (uint16_t)m_frameHeight;
6424 hucPakStitchDmem->TotalNumberOfPAKs = m_numPipe;
6425 hucPakStitchDmem->Codec = 1; // 1: HEVC DP; 2: HEVC VDEnc; 3: VP9 VDEnc
6426 hucPakStitchDmem->MAXPass = m_brcEnabled ? (m_numPassesInOnePipe + 1) : 1;
6427 hucPakStitchDmem->CurrentPass = (uint8_t)currentPass + 1; // // Current BRC pass [1..MAXPass]
6428 hucPakStitchDmem->MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
6429 hucPakStitchDmem->CabacZeroWordFlag = true; // to do: set to true later
6430 hucPakStitchDmem->bitdepth_luma = m_hevcSeqParams->bit_depth_luma_minus8 + 8; // default: 8
6431 hucPakStitchDmem->bitdepth_chroma = m_hevcSeqParams->bit_depth_chroma_minus8 + 8; // default: 8
6432 hucPakStitchDmem->ChromaFormatIdc = m_hevcSeqParams->chroma_format_idc;
6433 hucPakStitchDmem->TotalSizeInCommandBuffer = m_numTiles * CODECHAL_CACHELINE_SIZE;
6434 // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record
6435 hucPakStitchDmem->OffsetInCommandBuffer = m_tileParams[m_numTiles - 1].TileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8;
6436 hucPakStitchDmem->LastTileBS_StartInBytes = m_tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
6437
6438 hucPakStitchDmem->StitchEnable = true;
6439 hucPakStitchDmem->StitchCommandOffset = 0;
6440 hucPakStitchDmem->BBEndforStitch = HUC_BATCH_BUFFER_END;
6441 hucPakStitchDmem->brcUnderFlowEnable = false; //temporally disable underflow bit rate control in HUC fw since it need more tuning.
6442
6443 PCODEC_ENCODER_SLCDATA slcData = m_slcData;
6444 CODECHAL_ENCODE_CHK_NULL_RETURN(slcData);
6445 uint32_t totalSliceHeaderSize = 0;
6446 for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
6447 {
6448 totalSliceHeaderSize += (slcData->BitSize + 7) >> 3;
6449 slcData++;
6450 }
6451 hucPakStitchDmem->SliceHeaderSizeinBits = totalSliceHeaderSize * 8;
6452 hucPakStitchDmem->currFrameBRClevel = m_currFrameBrcLevel;
6453
6454 //Set the kernel output offsets
6455 hucPakStitchDmem->TileSizeRecord_offset[0] = m_hevcFrameStatsOffset.uiTileSizeRecord;
6456 hucPakStitchDmem->HEVC_PAKSTAT_offset[0] = m_hevcFrameStatsOffset.uiHevcPakStatistics;
6457 hucPakStitchDmem->HEVC_Streamout_offset[0] = 0xFFFFFFFF;
6458 hucPakStitchDmem->VDENCSTAT_offset[0] = 0xFFFFFFFF;
6459
6460 for (auto i = 0; i < m_numPipe; i++)
6461 {
6462 hucPakStitchDmem->NumTiles[i] = numTilesPerPipe;
6463
6464 // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic.
6465 // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region.
6466 hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiTileSizeRecord) +
6467 m_hevcTileStatsOffset.uiTileSizeRecord;
6468 hucPakStitchDmem->HEVC_PAKSTAT_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiHevcPakStatistics) +
6469 m_hevcTileStatsOffset.uiHevcPakStatistics;
6470 }
6471
6472 m_osInterface->pfnUnlockResource(m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]));
6473
6474 MOS_ZeroMemory(dmemParams, sizeof(MHW_VDBOX_HUC_DMEM_STATE_PARAMS));
6475 dmemParams->presHucDataSource = &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]);
6476 dmemParams->dwDataLength = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemEncG11), CODECHAL_CACHELINE_SIZE);
6477 dmemParams->dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
6478
6479 return eStatus;
6480 }
6481
SetRegionsHuCPakIntegrate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)6482 MOS_STATUS CodechalEncHevcStateG11::SetRegionsHuCPakIntegrate(
6483 PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
6484 {
6485 CODECHAL_ENCODE_FUNCTION_ENTER;
6486
6487 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6488
6489 int32_t currentPass = GetCurrentPass();
6490 if (currentPass < 0 ||
6491 (currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES && m_brcEnabled) ||
6492 (currentPass != 0 && m_cqpEnabled))
6493 {
6494 eStatus = MOS_STATUS_INVALID_PARAMETER;
6495 return eStatus;
6496 }
6497
6498 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
6499
6500 MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
6501 // Add Virtual addr
6502 virtualAddrParams->regionParams[0].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; // Region 0 - Tile based input statistics from PAK/ VDEnc
6503 virtualAddrParams->regionParams[0].dwOffset = 0;
6504 virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 1 - HuC Frame statistics output
6505 virtualAddrParams->regionParams[1].isWritable = true;
6506 virtualAddrParams->regionParams[4].presRegion = &m_resBitstreamBuffer; // Region 4 - Last Tile bitstream
6507 virtualAddrParams->regionParams[5].presRegion = &m_resBitstreamBuffer; // Region 5 - HuC modifies the last tile bitstream before stitch command
6508 virtualAddrParams->regionParams[5].isWritable = true;
6509 virtualAddrParams->regionParams[6].presRegion = &m_brcBuffers.resBrcHistoryBuffer; // Region 6 History Buffer (Input/Output)
6510 virtualAddrParams->regionParams[6].isWritable = true;
6511 virtualAddrParams->regionParams[7].presRegion = &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx]; //&m_resHucPakStitchReadBatchBuffer; // Region 7 - HCP PIC state command
6512 virtualAddrParams->regionParams[9].presRegion = &m_resBrcDataBuffer; // Region 9 HuC outputs BRC data
6513 virtualAddrParams->regionParams[9].isWritable = true;
6514 virtualAddrParams->regionParams[8].presRegion = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]; // Region 8 - data buffer read by HUC for stitching cmd generation
6515 virtualAddrParams->regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource; // Region 10 - SLB for stitching cmd output from Huc
6516 virtualAddrParams->regionParams[10].isWritable = true;
6517 virtualAddrParams->regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource; // Region 15 [In/Out] - Tile Record Buffer
6518 virtualAddrParams->regionParams[15].dwOffset = 0;
6519
6520 return eStatus;
6521 }
6522
SetDmemHuCPakIntegrateCqp(PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)6523 MOS_STATUS CodechalEncHevcStateG11::SetDmemHuCPakIntegrateCqp(
6524 PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)
6525 {
6526 CODECHAL_ENCODE_FUNCTION_ENTER;
6527
6528 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6529
6530 MOS_LOCK_PARAMS lockFlagsWriteOnly;
6531 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
6532 lockFlagsWriteOnly.WriteOnly = true;
6533
6534 int32_t currentPass = GetCurrentPass();
6535 if (currentPass != 0 || (!m_cqpEnabled && m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ))
6536 {
6537 eStatus = MOS_STATUS_INVALID_PARAMETER;
6538 return eStatus;
6539 }
6540
6541 HucPakStitchDmemEncG11* hucPakStitchDmem = (HucPakStitchDmemEncG11*)m_osInterface->pfnLockResource(
6542 m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]), &lockFlagsWriteOnly);
6543 CODECHAL_ENCODE_CHK_NULL_RETURN(hucPakStitchDmem);
6544
6545 MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakStitchDmemEncG11));
6546
6547 // reset all the offsets to -1
6548 uint32_t TotalOffsetSize = sizeof(hucPakStitchDmem->TileSizeRecord_offset) +
6549 sizeof(hucPakStitchDmem->VDENCSTAT_offset) +
6550 sizeof(hucPakStitchDmem->HEVC_PAKSTAT_offset) +
6551 sizeof(hucPakStitchDmem->HEVC_Streamout_offset) +
6552 sizeof(hucPakStitchDmem->VP9_PAK_STAT_offset) +
6553 sizeof(hucPakStitchDmem->Vp9CounterBuffer_offset);
6554 MOS_FillMemory(hucPakStitchDmem, TotalOffsetSize, 0xFF);
6555
6556 uint16_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
6557 uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
6558 CODECHAL_ENCODE_ASSERT(numTileColumns > 0 && numTileColumns % 2 == 0); //numTileColumns is nonzero and even number; 2 or 4
6559 CODECHAL_ENCODE_ASSERT(m_numPipe > 0 && m_numPipe % 2 == 0 && numTileColumns <= m_numPipe); //ucNumPipe is nonzero and even number; 2 or 4
6560 uint16_t numTiles = numTileRows * numTileColumns;
6561 uint16_t numTilesPerPipe = m_numTiles / m_numPipe;
6562
6563 hucPakStitchDmem->PicWidthInPixel = (uint16_t)m_frameWidth;
6564 hucPakStitchDmem->PicHeightInPixel = (uint16_t)m_frameHeight;
6565 hucPakStitchDmem->TotalNumberOfPAKs = m_numPipe;
6566 hucPakStitchDmem->Codec = 2; //HEVC DP CQP
6567 hucPakStitchDmem->MAXPass = 1;
6568 hucPakStitchDmem->CurrentPass = 1;
6569 hucPakStitchDmem->MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
6570 hucPakStitchDmem->CabacZeroWordFlag = true;
6571 hucPakStitchDmem->bitdepth_luma = m_hevcSeqParams->bit_depth_luma_minus8 + 8; // default: 8
6572 hucPakStitchDmem->bitdepth_chroma = m_hevcSeqParams->bit_depth_chroma_minus8 + 8; // default: 8
6573 hucPakStitchDmem->ChromaFormatIdc = m_hevcSeqParams->chroma_format_idc;
6574 hucPakStitchDmem->TotalSizeInCommandBuffer = m_numTiles * CODECHAL_CACHELINE_SIZE;
6575 // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record
6576 hucPakStitchDmem->OffsetInCommandBuffer = m_tileParams[m_numTiles - 1].TileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8;
6577 hucPakStitchDmem->LastTileBS_StartInBytes = m_tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
6578
6579 hucPakStitchDmem->StitchEnable = true;
6580 hucPakStitchDmem->StitchCommandOffset = 0;
6581 hucPakStitchDmem->BBEndforStitch = HUC_BATCH_BUFFER_END;
6582
6583 //Set the kernel output offsets
6584 hucPakStitchDmem->TileSizeRecord_offset[0] = m_hevcFrameStatsOffset.uiTileSizeRecord;
6585 hucPakStitchDmem->HEVC_PAKSTAT_offset[0] = 0xFFFFFFFF;
6586 hucPakStitchDmem->HEVC_Streamout_offset[0] = 0xFFFFFFFF;
6587 hucPakStitchDmem->VDENCSTAT_offset[0] = 0xFFFFFFFF;
6588
6589 for (auto i = 0; i < m_numPipe; i++)
6590 {
6591 hucPakStitchDmem->NumTiles[i] = numTilesPerPipe;
6592
6593 // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic.
6594 // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region.
6595 hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiTileSizeRecord) +
6596 m_hevcTileStatsOffset.uiTileSizeRecord;
6597 }
6598
6599 m_osInterface->pfnUnlockResource(m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]));
6600
6601 MOS_ZeroMemory(dmemParams, sizeof(MHW_VDBOX_HUC_DMEM_STATE_PARAMS));
6602 dmemParams->presHucDataSource = &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]);
6603 dmemParams->dwDataLength = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemEncG11), CODECHAL_CACHELINE_SIZE);
6604 dmemParams->dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
6605
6606 return eStatus;
6607 }
6608
ConfigStitchDataBuffer()6609 MOS_STATUS CodechalEncHevcStateG11::ConfigStitchDataBuffer()
6610 {
6611 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6612 CODECHAL_ENCODE_FUNCTION_ENTER;
6613 int32_t currentPass = GetCurrentPass();
6614 if (currentPass < 0 ||
6615 (currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES && m_brcEnabled) ||
6616 (currentPass != 0 && m_cqpEnabled))
6617 {
6618 eStatus = MOS_STATUS_INVALID_PARAMETER;
6619 return eStatus;
6620 }
6621
6622 MOS_LOCK_PARAMS lockFlagsWriteOnly;
6623 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
6624 lockFlagsWriteOnly.WriteOnly = 1;
6625
6626 HucCommandData* hucStitchDataBuf = (HucCommandData*)m_osInterface->pfnLockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass], &lockFlagsWriteOnly);
6627 CODECHAL_ENCODE_CHK_NULL_RETURN(hucStitchDataBuf);
6628
6629 MOS_ZeroMemory(hucStitchDataBuf, sizeof(HucCommandData));
6630 hucStitchDataBuf->TotalCommands = 1;
6631 hucStitchDataBuf->InputCOM[0].SizeOfData = 0xF;
6632
6633 HucInputCmdG11 hucInputCmd;
6634 MOS_ZeroMemory(&hucInputCmd, sizeof(HucInputCmdG11));
6635
6636 CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
6637 hucInputCmd.SelectionForIndData = m_osInterface->osCpInterface->IsCpEnabled() ? 4 : 0;
6638 hucInputCmd.CmdMode = HUC_CMD_LIST_MODE;
6639 hucInputCmd.LengthOfTable = (uint8_t)(m_numTiles);
6640 hucInputCmd.CopySize = m_hwInterface->m_tileRecordSize;;
6641
6642 PMOS_RESOURCE presSrc = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource;
6643
6644 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
6645 m_osInterface,
6646 presSrc,
6647 false,
6648 false));
6649 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
6650 m_osInterface,
6651 &m_resBitstreamBuffer,
6652 true,
6653 true));
6654
6655 uint64_t srcAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, presSrc);
6656 uint64_t destAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, &m_resBitstreamBuffer);
6657 hucInputCmd.SrcAddrBottom = (uint32_t)(srcAddr & 0x00000000FFFFFFFF);
6658 hucInputCmd.SrcAddrTop = (uint32_t)((srcAddr & 0xFFFFFFFF00000000) >> 32);
6659
6660 hucInputCmd.DestAddrBottom = (uint32_t)(destAddr & 0x00000000FFFFFFFF);
6661 hucInputCmd.DestAddrTop = (uint32_t)((destAddr & 0xFFFFFFFF00000000) >> 32);
6662
6663 MOS_SecureMemcpy(hucStitchDataBuf->InputCOM[0].data, sizeof(HucInputCmdG11), &hucInputCmd, sizeof(HucInputCmdG11));
6664
6665 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]);
6666
6667 return eStatus;
6668 }
6669
SetRegionsHuCPakIntegrateCqp(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)6670 MOS_STATUS CodechalEncHevcStateG11::SetRegionsHuCPakIntegrateCqp(
6671 PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
6672 {
6673 CODECHAL_ENCODE_FUNCTION_ENTER;
6674
6675 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6676
6677 int32_t currentPass = GetCurrentPass();
6678 if (currentPass < 0 ||
6679 (m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ && m_brcEnabled) ||
6680 (currentPass != 0 && m_cqpEnabled))
6681 {
6682 eStatus = MOS_STATUS_INVALID_PARAMETER;
6683 return eStatus;
6684 }
6685 MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
6686
6687 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
6688
6689 // Add Virtual addr
6690 virtualAddrParams->regionParams[0].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; // Region 0 - Tile based input statistics from PAK/ VDEnc
6691 virtualAddrParams->regionParams[0].dwOffset = 0;
6692 virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 1 - HuC Frame statistics output
6693 virtualAddrParams->regionParams[1].isWritable = true;
6694 virtualAddrParams->regionParams[4].presRegion = &m_resBitstreamBuffer; // Region 4 - Last Tile bitstream
6695 virtualAddrParams->regionParams[5].presRegion = &m_resBitstreamBuffer; // Region 5 - HuC modifies the last tile bitstream before stitch command
6696 virtualAddrParams->regionParams[5].isWritable = true;
6697 virtualAddrParams->regionParams[6].presRegion = &m_brcBuffers.resBrcHistoryBuffer; // Region 6 History Buffer (Input/Output)
6698 virtualAddrParams->regionParams[6].isWritable = true;
6699 virtualAddrParams->regionParams[7].presRegion = &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx]; //&m_resHucPakStitchReadBatchBuffer; // Region 7 - HCP PIC state command
6700
6701 virtualAddrParams->regionParams[9].presRegion = &m_resBrcDataBuffer; // Region 9 HuC outputs BRC data
6702 virtualAddrParams->regionParams[9].isWritable = true;
6703 virtualAddrParams->regionParams[8].presRegion = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]; // Region 8 - data buffer read by HUC for stitching cmd generation
6704 virtualAddrParams->regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource; // Region 10 - SLB for stitching cmd output from Huc
6705 virtualAddrParams->regionParams[10].isWritable = true;
6706 virtualAddrParams->regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource; // Region 15 [In/Out] - Tile Record Buffer
6707 virtualAddrParams->regionParams[15].dwOffset = 0;
6708
6709 return eStatus;
6710 }
6711
6712 #if (_DEBUG || _RELEASE_INTERNAL)
ResetImgCtrlRegInPAKStatisticsBuffer(PMOS_COMMAND_BUFFER cmdBuffer)6713 MOS_STATUS CodechalEncHevcStateG11::ResetImgCtrlRegInPAKStatisticsBuffer(
6714 PMOS_COMMAND_BUFFER cmdBuffer)
6715 {
6716 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6717
6718 CODECHAL_ENCODE_FUNCTION_ENTER;
6719
6720 MHW_MI_STORE_DATA_PARAMS storeDataParams;
6721 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
6722 storeDataParams.pOsResource = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
6723 storeDataParams.dwResourceOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL);
6724 storeDataParams.dwValue = 0;
6725 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
6726 cmdBuffer,
6727 &storeDataParams));
6728
6729 return eStatus;
6730 }
6731 #endif
6732
ReadBrcPakStatisticsForScalability(PMOS_COMMAND_BUFFER cmdBuffer)6733 MOS_STATUS CodechalEncHevcStateG11::ReadBrcPakStatisticsForScalability(
6734 PMOS_COMMAND_BUFFER cmdBuffer)
6735 {
6736 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6737
6738 CODECHAL_ENCODE_FUNCTION_ENTER;
6739
6740 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
6741
6742 MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
6743 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
6744 miCpyMemMemParams.presSrc = &m_resBrcDataBuffer;
6745 miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, FrameByteCount);
6746 miCpyMemMemParams.presDst = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
6747 miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_BITSTREAM_BYTECOUNT_FRAME);
6748 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
6749
6750 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
6751 miCpyMemMemParams.presSrc = &m_resBrcDataBuffer;
6752 miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, FrameByteCountNoHeader);
6753 miCpyMemMemParams.presDst = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
6754 miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_BITSTREAM_BYTECOUNT_FRAME_NOHEADER);
6755 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
6756
6757 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
6758 miCpyMemMemParams.presSrc = &m_resBrcDataBuffer;
6759 miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, HCP_ImageStatusControl);
6760 miCpyMemMemParams.presDst = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite];
6761 miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL);
6762 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
6763
6764 uint32_t dwOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
6765 m_encodeStatusBuf.dwNumPassesOffset + // Num passes offset
6766 sizeof(uint32_t)* 2; // encodeStatus is offset by 2 DWs in the resource
6767
6768 MHW_MI_STORE_DATA_PARAMS storeDataParams;
6769 storeDataParams.pOsResource = &m_encodeStatusBuf.resStatusBuffer;
6770 storeDataParams.dwResourceOffset = dwOffset;
6771 storeDataParams.dwValue = (uint8_t)GetCurrentPass();
6772 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
6773
6774 return eStatus;
6775 }
6776
DumpHucDebugOutputBuffers()6777 MOS_STATUS CodechalEncHevcStateG11::DumpHucDebugOutputBuffers()
6778 {
6779 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6780
6781 //only dump HuC in/out buffers in brc scalability case
6782 bool dumpDebugBuffers = IsLastPipe() && (m_numPipe >= 2) && m_brcEnabled;
6783 if (m_singleTaskPhaseSupported)
6784 {
6785 dumpDebugBuffers = dumpDebugBuffers && IsLastPass();
6786 }
6787
6788 if (dumpDebugBuffers)
6789 {
6790 CODECHAL_DEBUG_TOOL(
6791 int32_t currentPass = GetCurrentPass();
6792 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
6793 &m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass],
6794 sizeof(HucPakStitchDmemEncG11),
6795 currentPass,
6796 hucRegionDumpPakIntegrate));
6797
6798 // Region 7 - HEVC PIC State Command
6799 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
6800 &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
6801 0,
6802 m_hwInterface->m_vdenc2ndLevelBatchBufferSize,
6803 7,
6804 "_PicState",
6805 true,
6806 currentPass,
6807 hucRegionDumpPakIntegrate));
6808
6809 // Region 5 - Last Tile PAK Bitstream Output
6810 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
6811 &m_resBitstreamBuffer,
6812 0,
6813 m_encodeParams.dwBitstreamSize,
6814 5,
6815 "_Bitstream",
6816 false,
6817 currentPass,
6818 hucRegionDumpPakIntegrate));
6819
6820 // Region 6 - BRC History buffer
6821 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
6822 &m_brcBuffers.resBrcHistoryBuffer,
6823 0,
6824 m_brcHistoryBufferSize,
6825 6,
6826 "_HistoryBuffer",
6827 false,
6828 currentPass,
6829 hucRegionDumpPakIntegrate));
6830 // Region 9 - HCP BRC Data Output
6831 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
6832 &m_resBrcDataBuffer,
6833 0,
6834 CODECHAL_CACHELINE_SIZE,
6835 9,
6836 "_HcpBrcData",
6837 false,
6838 currentPass,
6839 hucRegionDumpPakIntegrate));
6840 // Region 1 - Output Aggregated Frame Level Statistics
6841 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
6842 &m_resHuCPakAggregatedFrameStatsBuffer.sResource,
6843 0,
6844 m_hwInterface->m_pakIntAggregatedFrameStatsSize, // program exact out size
6845 1,
6846 "_AggregateFrameStats",
6847 false,
6848 currentPass,
6849 hucRegionDumpPakIntegrate));
6850 // Region 0 - Tile Statistics Constant Buffer
6851 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
6852 &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
6853 0,
6854 m_hwInterface->m_pakIntTileStatsSize,
6855 0,
6856 "_TileBasedStats",
6857 true,
6858 currentPass,
6859 hucRegionDumpPakIntegrate));
6860 // Region 15 - Tile Record Buffer
6861 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
6862 &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource,
6863 0,
6864 m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize,
6865 15,
6866 "_TileRecord",
6867 false,
6868 currentPass,
6869 hucRegionDumpPakIntegrate));)
6870 }
6871
6872 return eStatus;
6873 }
6874
CodechalEncHevcStateG11(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)6875 CodechalEncHevcStateG11::CodechalEncHevcStateG11(
6876 CodechalHwInterface* hwInterface,
6877 CodechalDebugInterface* debugInterface,
6878 PCODECHAL_STANDARD_INFO standardInfo)
6879 :CodechalEncHevcState(hwInterface, debugInterface, standardInfo)
6880 {
6881 m_2xMeSupported =
6882 m_useCommonKernel = true;
6883 m_useHwScoreboard = false;
6884 #ifndef _FULL_OPEN_SOURCE
6885 m_kernelBase = (uint8_t*)IGCODECKRN_G11;
6886 #else
6887 m_kernelBase = nullptr;
6888 #endif
6889 m_kuidCommon = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
6890 m_hucPakStitchEnabled = true;
6891 m_scalabilityState = nullptr;
6892
6893 MOS_ZeroMemory(&m_currPicWithReconBoundaryPix, sizeof(m_currPicWithReconBoundaryPix));
6894 MOS_ZeroMemory(&m_lcuLevelInputDataSurface, sizeof(m_lcuLevelInputDataSurface));
6895 MOS_ZeroMemory(&m_intermediateCuRecordSurfaceLcu32, sizeof(m_intermediateCuRecordSurfaceLcu32));
6896 MOS_ZeroMemory(&m_scratchSurface, sizeof(m_scratchSurface));
6897 MOS_ZeroMemory(m_debugSurface, sizeof(m_debugSurface));
6898 MOS_ZeroMemory(&m_encConstantTableForB, sizeof(m_encConstantTableForB));
6899 MOS_ZeroMemory(&m_mvAndDistortionSumSurface, sizeof(m_mvAndDistortionSumSurface));
6900 MOS_ZeroMemory(m_encBCombinedBuffer1, sizeof(m_encBCombinedBuffer1));
6901 MOS_ZeroMemory(m_encBCombinedBuffer2, sizeof(m_encBCombinedBuffer2));
6902
6903 MOS_ZeroMemory(&m_resPakcuLevelStreamoutData, sizeof(m_resPakcuLevelStreamoutData));
6904 MOS_ZeroMemory(&m_resPakSliceLevelStreamoutData, sizeof(m_resPakSliceLevelStreamoutData));
6905 MOS_ZeroMemory(m_resTileBasedStatisticsBuffer, sizeof(m_resTileBasedStatisticsBuffer));
6906 MOS_ZeroMemory(&m_resHuCPakAggregatedFrameStatsBuffer, sizeof(m_resHuCPakAggregatedFrameStatsBuffer));
6907 MOS_ZeroMemory(m_tileRecordBuffer, sizeof(m_tileRecordBuffer));
6908 MOS_ZeroMemory(&m_kmdVeOveride, sizeof(m_kmdVeOveride));
6909 MOS_ZeroMemory(&m_resHcpScalabilitySyncBuffer, sizeof(m_resHcpScalabilitySyncBuffer));
6910
6911 MOS_ZeroMemory(m_veBatchBuffer, sizeof(m_veBatchBuffer));
6912 MOS_ZeroMemory(&m_realCmdBuffer, sizeof(m_realCmdBuffer));
6913 MOS_ZeroMemory(&m_resBrcSemaphoreMem, sizeof(m_resBrcSemaphoreMem));
6914 MOS_ZeroMemory(&m_resBrcPakSemaphoreMem, sizeof(m_resBrcPakSemaphoreMem));
6915 MOS_ZeroMemory(&m_resPipeStartSemaMem, sizeof(m_resPipeStartSemaMem));
6916 MOS_ZeroMemory(&m_resPipeCompleteSemaMem, sizeof(m_resPipeCompleteSemaMem));
6917 MOS_ZeroMemory(m_resHucPakStitchDmemBuffer, sizeof(m_resHucPakStitchDmemBuffer));
6918 MOS_ZeroMemory(&m_resBrcDataBuffer, sizeof(m_resBrcDataBuffer));
6919
6920 CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_osInterface);
6921 m_hwInterface->GetStateHeapSettings()->dwNumSyncTags = CODECHAL_ENCODE_HEVC_NUM_SYNC_TAGS;
6922 m_hwInterface->GetStateHeapSettings()->dwDshSize = CODECHAL_INIT_DSH_SIZE_HEVC_ENC;
6923
6924 m_kuid = IDR_CODEC_AllHEVCEnc;
6925 MOS_STATUS eStatus = CodecHalGetKernelBinaryAndSize(
6926 m_kernelBase,
6927 m_kuid,
6928 &m_kernelBinary,
6929 &m_combinedKernelSize);
6930 CODECHAL_ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
6931
6932 m_hwInterface->GetStateHeapSettings()->dwIshSize +=
6933 MOS_ALIGN_CEIL(m_combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
6934
6935 m_osInterface->pfnVirtualEngineSupported(m_osInterface, false, true);
6936 Mos_SetVirtualEngineSupported(m_osInterface, true);
6937 }
6938
~CodechalEncHevcStateG11()6939 CodechalEncHevcStateG11::~CodechalEncHevcStateG11()
6940 {
6941 CODECHAL_ENCODE_FUNCTION_ENTER;
6942
6943 if (m_wpState)
6944 {
6945 MOS_Delete(m_wpState);
6946 m_wpState = nullptr;
6947 }
6948 MOS_Delete(m_intraDistKernel);
6949
6950 if (m_swScoreboardState)
6951 {
6952 MOS_Delete(m_swScoreboardState);
6953 m_swScoreboardState = nullptr;
6954 }
6955
6956 if (m_scalabilityState)
6957 {
6958 MOS_FreeMemAndSetNull(m_scalabilityState);
6959 }
6960 //Note: virtual engine interface destroy is done in MOS layer
6961 }
6962
CodecHalHevcGetFileSize(char * fileName)6963 static uint32_t CodecHalHevcGetFileSize(char* fileName)
6964 {
6965 FILE* fp = nullptr;
6966 uint32_t fileSize = 0;
6967 MosUtilities::MosSecureFileOpen(&fp, fileName, "rb");
6968 if (fp == nullptr)
6969 {
6970 return 0;
6971 }
6972 fseek(fp, 0, SEEK_END);
6973 fileSize = ftell(fp);
6974 fseek(fp, 0, SEEK_SET);
6975 fclose(fp);
6976
6977 return fileSize;
6978 }
6979
LoadPakCommandAndCuRecordFromFile()6980 MOS_STATUS CodechalEncHevcStateG11::LoadPakCommandAndCuRecordFromFile()
6981 {
6982 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6983
6984 CODECHAL_ENCODE_FUNCTION_ENTER;
6985
6986 char pathOfPakCmd[MOS_USER_CONTROL_MAX_DATA_SIZE];
6987 MOS_SecureStringPrint(pathOfPakCmd,
6988 sizeof(pathOfPakCmd),
6989 sizeof(pathOfPakCmd),
6990 "%s\\PAKObj.dat.%d",
6991 m_pakOnlyDataFolder,
6992 m_frameNum);
6993
6994 char pathOfCuRecord[MOS_USER_CONTROL_MAX_DATA_SIZE];
6995 MOS_SecureStringPrint(pathOfCuRecord,
6996 sizeof(pathOfCuRecord),
6997 sizeof(pathOfCuRecord),
6998 "%s\\CURecord.dat.%d",
6999 m_pakOnlyDataFolder,
7000 m_frameNum);
7001
7002 uint32_t sizePakObj = CodecHalHevcGetFileSize(pathOfPakCmd);
7003 if(sizePakObj == 0 || sizePakObj > m_mvOffset)
7004 {
7005 return MOS_STATUS_INVALID_FILE_SIZE;
7006 }
7007
7008 uint32_t sizeCuRecord = CodecHalHevcGetFileSize(pathOfCuRecord);
7009 if(sizeCuRecord == 0 || sizeCuRecord > m_mbCodeSize - m_mvOffset)
7010 {
7011 return MOS_STATUS_INVALID_FILE_SIZE;
7012 }
7013
7014 MOS_LOCK_PARAMS lockFlags;
7015 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
7016 lockFlags.WriteOnly = 1;
7017 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(
7018 m_osInterface, &m_resMbCodeSurface, &lockFlags);
7019 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
7020
7021 FILE* pakObj = nullptr;
7022 eStatus = MosUtilities::MosSecureFileOpen(&pakObj, pathOfPakCmd, "rb");
7023 if (pakObj == nullptr)
7024 {
7025 m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
7026 return eStatus;
7027 }
7028
7029 uint8_t* pakCmd = data;
7030 if(sizePakObj != fread((void*)pakCmd, 1, sizePakObj, pakObj))
7031 {
7032 fclose(pakObj);
7033 m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
7034 return MOS_STATUS_INVALID_FILE_SIZE;
7035 }
7036 fclose(pakObj);
7037
7038 uint8_t* record = data + m_mvOffset;
7039 FILE* fRecord = nullptr;
7040 eStatus = MosUtilities::MosSecureFileOpen(&fRecord, pathOfCuRecord, "rb");
7041 if (fRecord == nullptr)
7042 {
7043 m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
7044 return eStatus;
7045 }
7046
7047 if(sizeCuRecord != fread((void*)record, 1, sizeCuRecord, fRecord))
7048 {
7049 fclose(fRecord);
7050 m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
7051 return MOS_STATUS_INVALID_FILE_SIZE;
7052 }
7053 fclose(fRecord);
7054
7055 m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
7056
7057 return eStatus;
7058 }
7059
ResizeOnResChange()7060 void CodechalEncHevcStateG11::ResizeOnResChange()
7061 {
7062 CODECHAL_ENCODE_FUNCTION_ENTER;
7063
7064 CodechalEncoderState::ResizeOnResChange();
7065
7066 // need to re-allocate surfaces according to resolution
7067 m_swScoreboardState->ReleaseResources();
7068 }
7069
ResizeBufferOffset()7070 void CodechalEncHevcStateG11::ResizeBufferOffset()
7071 {
7072 CODECHAL_ENCODE_FUNCTION_ENTER;
7073
7074 uint32_t size = 0;
7075 uint32_t numLcu64 = m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 64 / 64;
7076 MBENC_COMBINED_BUFFER2 fixedBuf;
7077
7078 //Re-Calculate m_encBCombinedBuffer2 Size and Offsets
7079 m_historyOutBufferSize = MOS_ALIGN_CEIL(32 * numLcu64, CODECHAL_CACHELINE_SIZE);
7080 m_threadTaskBufferSize = MOS_ALIGN_CEIL(96 * numLcu64, CODECHAL_CACHELINE_SIZE);
7081
7082 size = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE) + m_historyOutBufferSize + m_threadTaskBufferSize;
7083
7084 m_historyOutBufferOffset = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE);
7085 m_threadTaskBufferOffset = m_historyOutBufferOffset + m_historyOutBufferSize;
7086 }
7087
PicCodingTypeToSliceType(uint16_t pictureCodingType)7088 uint8_t CodechalEncHevcStateG11::PicCodingTypeToSliceType(uint16_t pictureCodingType)
7089 {
7090 uint8_t sliceType = 0;
7091
7092 switch (pictureCodingType)
7093 {
7094 case I_TYPE:
7095 sliceType = CODECHAL_ENCODE_HEVC_I_SLICE;
7096 break;
7097 case P_TYPE:
7098 sliceType = CODECHAL_ENCODE_HEVC_P_SLICE;
7099 break;
7100 case B_TYPE:
7101 case B1_TYPE:
7102 case B2_TYPE:
7103 sliceType = CODECHAL_ENCODE_HEVC_B_SLICE;
7104 break;
7105 default:
7106 CODECHAL_ENCODE_ASSERT(false);
7107 }
7108 return sliceType;
7109 }
7110
7111 // The following code is from the kernel ULT
InitMediaObjectWalker(uint32_t threadSpaceWidth,uint32_t threadSpaceHeight,uint32_t colorCountMinusOne,DependencyPattern dependencyPattern,uint32_t childThreadNumber,uint32_t localLoopExecCount,MHW_WALKER_PARAMS & walkerParams)7112 MOS_STATUS CodechalEncHevcStateG11::InitMediaObjectWalker(
7113 uint32_t threadSpaceWidth,
7114 uint32_t threadSpaceHeight,
7115 uint32_t colorCountMinusOne,
7116 DependencyPattern dependencyPattern,
7117 uint32_t childThreadNumber,
7118 uint32_t localLoopExecCount,
7119 MHW_WALKER_PARAMS& walkerParams)
7120 {
7121 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7122
7123 walkerParams.ColorCountMinusOne = colorCountMinusOne;
7124 walkerParams.dwGlobalLoopExecCount = 0x3ff;
7125 walkerParams.dwLocalLoopExecCount = 0x3ff;
7126
7127 if (dependencyPattern == dependencyWavefrontHorizontal)
7128 {
7129 // Global
7130 walkerParams.GlobalResolution.x = threadSpaceWidth;
7131 walkerParams.GlobalResolution.y = threadSpaceHeight;
7132 walkerParams.GlobalStart.x = 0;
7133 walkerParams.GlobalStart.y = 0;
7134 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7135 walkerParams.GlobalOutlerLoopStride.y = 0;
7136 walkerParams.GlobalInnerLoopUnit.x = 0;
7137 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7138
7139 // Local
7140 walkerParams.BlockResolution.x = threadSpaceWidth;
7141 walkerParams.BlockResolution.y = threadSpaceHeight;
7142 walkerParams.LocalStart.x = 0;
7143 walkerParams.LocalStart.y = 0;
7144 walkerParams.LocalOutLoopStride.x = 1;
7145 walkerParams.LocalOutLoopStride.y = 0;
7146 walkerParams.LocalInnerLoopUnit.x = 0;
7147 walkerParams.LocalInnerLoopUnit.y = 1;
7148
7149 // Mid
7150 walkerParams.MiddleLoopExtraSteps = 0;
7151 walkerParams.MidLoopUnitX = 0;
7152 walkerParams.MidLoopUnitY = 0;
7153 }
7154 else
7155 if (dependencyPattern == dependencyWavefrontVertical)
7156 {
7157 // Global
7158 walkerParams.GlobalResolution.x = threadSpaceWidth;
7159 walkerParams.GlobalResolution.y = threadSpaceHeight;
7160 walkerParams.GlobalStart.x = 0;
7161 walkerParams.GlobalStart.y = 0;
7162 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7163 walkerParams.GlobalOutlerLoopStride.y = 0;
7164 walkerParams.GlobalInnerLoopUnit.x = 0;
7165 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7166
7167 // Local
7168 walkerParams.BlockResolution.x = threadSpaceWidth;
7169 walkerParams.BlockResolution.y = threadSpaceHeight;
7170 walkerParams.LocalStart.x = 0;
7171 walkerParams.LocalStart.y = 0;
7172 walkerParams.LocalOutLoopStride.x = 0;
7173 walkerParams.LocalOutLoopStride.y = 1;
7174 walkerParams.LocalInnerLoopUnit.x = 1;
7175 walkerParams.LocalInnerLoopUnit.y = 0;
7176
7177 // Mid
7178 walkerParams.MiddleLoopExtraSteps = 0;
7179 walkerParams.MidLoopUnitX = 0;
7180 walkerParams.MidLoopUnitY = 0;
7181 }
7182 else
7183 if (dependencyPattern == dependencyWavefront45Degree)
7184 {
7185 // Global
7186 walkerParams.GlobalResolution.x = threadSpaceWidth;
7187 walkerParams.GlobalResolution.y = threadSpaceHeight;
7188 walkerParams.GlobalStart.x = 0;
7189 walkerParams.GlobalStart.y = 0;
7190 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7191 walkerParams.GlobalOutlerLoopStride.y = 0;
7192 walkerParams.GlobalInnerLoopUnit.x = 0;
7193 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7194
7195 // Local
7196 walkerParams.BlockResolution.x = threadSpaceWidth;
7197 walkerParams.BlockResolution.y = threadSpaceHeight;
7198 walkerParams.LocalStart.x = 0;
7199 walkerParams.LocalStart.y = 0;
7200 walkerParams.LocalOutLoopStride.x = 1;
7201 walkerParams.LocalOutLoopStride.y = 0;
7202 walkerParams.LocalInnerLoopUnit.x = -1;
7203 walkerParams.LocalInnerLoopUnit.y = 1;
7204
7205 // Mid
7206 walkerParams.MiddleLoopExtraSteps = 0;
7207 walkerParams.MidLoopUnitX = 0;
7208 walkerParams.MidLoopUnitY = 0;
7209 }
7210 else
7211 if (dependencyPattern == dependencyWavefront26Degree)
7212 {
7213 // Global
7214 walkerParams.GlobalResolution.x = threadSpaceWidth;
7215 walkerParams.GlobalResolution.y = threadSpaceHeight;
7216 walkerParams.GlobalStart.x = 0;
7217 walkerParams.GlobalStart.y = 0;
7218 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7219 walkerParams.GlobalOutlerLoopStride.y = 0;
7220 walkerParams.GlobalInnerLoopUnit.x = 0;
7221 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7222
7223 // Local
7224 walkerParams.BlockResolution.x = threadSpaceWidth;
7225 walkerParams.BlockResolution.y = threadSpaceHeight;
7226 walkerParams.LocalStart.x = 0;
7227 walkerParams.LocalStart.y = 0;
7228 walkerParams.LocalOutLoopStride.x = 1;
7229 walkerParams.LocalOutLoopStride.y = 0;
7230 walkerParams.LocalInnerLoopUnit.x = -2;
7231 walkerParams.LocalInnerLoopUnit.y = 1;
7232
7233 // Mid
7234 walkerParams.MiddleLoopExtraSteps = 0;
7235 walkerParams.MidLoopUnitX = 0;
7236 walkerParams.MidLoopUnitY = 0;
7237 }
7238 else
7239 if ((dependencyPattern == dependencyWavefront45XDegree) ||
7240 (dependencyPattern == dependencyWavefront45XDegreeAlt))
7241 {
7242 // Global
7243 walkerParams.GlobalResolution.x = threadSpaceWidth;
7244 walkerParams.GlobalResolution.y = threadSpaceHeight;
7245 walkerParams.GlobalStart.x = 0;
7246 walkerParams.GlobalStart.y = 0;
7247 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7248 walkerParams.GlobalOutlerLoopStride.y = 0;
7249 walkerParams.GlobalInnerLoopUnit.x = 0;
7250 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7251
7252 // Local
7253 walkerParams.BlockResolution.x = threadSpaceWidth;
7254 walkerParams.BlockResolution.y = threadSpaceHeight;
7255 walkerParams.LocalStart.x = 0;
7256 walkerParams.LocalStart.y = 0;
7257 walkerParams.LocalOutLoopStride.x = 1;
7258 walkerParams.LocalOutLoopStride.y = 0;
7259 walkerParams.LocalInnerLoopUnit.x = -1;
7260 walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1;
7261
7262 // Mid
7263 walkerParams.MiddleLoopExtraSteps = childThreadNumber;
7264 walkerParams.MidLoopUnitX = 0;
7265 walkerParams.MidLoopUnitY = 1;
7266 }
7267 else
7268 if ((dependencyPattern == dependencyWavefront26XDegree) ||
7269 (dependencyPattern == dependencyWavefront26XDegreeAlt)) {
7270
7271 // Global
7272 walkerParams.GlobalResolution.x = threadSpaceWidth;
7273 walkerParams.GlobalResolution.y = threadSpaceHeight;
7274 walkerParams.GlobalStart.x = 0;
7275 walkerParams.GlobalStart.y = 0;
7276 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7277 walkerParams.GlobalOutlerLoopStride.y = 0;
7278 walkerParams.GlobalInnerLoopUnit.x = 0;
7279 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7280
7281 // Local
7282 walkerParams.BlockResolution.x = threadSpaceWidth;
7283 walkerParams.BlockResolution.y = threadSpaceHeight;
7284 walkerParams.LocalStart.x = 0;
7285 walkerParams.LocalStart.y = 0;
7286 walkerParams.LocalOutLoopStride.x = 1;
7287 walkerParams.LocalOutLoopStride.y = 0;
7288 walkerParams.LocalInnerLoopUnit.x = -2;
7289 walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1;
7290
7291 // Mid
7292 walkerParams.MiddleLoopExtraSteps = childThreadNumber;
7293 walkerParams.MidLoopUnitX = 0;
7294 walkerParams.MidLoopUnitY = 1;
7295 }
7296 else
7297 if (dependencyPattern == dependencyWavefront45XVp9Degree)
7298 {
7299 // Global
7300 walkerParams.GlobalResolution.x = threadSpaceWidth;
7301 walkerParams.GlobalResolution.y = threadSpaceHeight;
7302 walkerParams.GlobalStart.x = 0;
7303 walkerParams.GlobalStart.y = 0;
7304 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7305 walkerParams.GlobalOutlerLoopStride.y = 0;
7306 walkerParams.GlobalInnerLoopUnit.x = 0;
7307 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7308
7309 // Local
7310 walkerParams.BlockResolution.x = threadSpaceWidth;
7311 walkerParams.BlockResolution.y = threadSpaceHeight;
7312 walkerParams.LocalStart.x = 0;
7313 walkerParams.LocalStart.y = 0;
7314 walkerParams.LocalOutLoopStride.x = 1;
7315 walkerParams.LocalOutLoopStride.y = 0;
7316 walkerParams.LocalInnerLoopUnit.x = -1;
7317 walkerParams.LocalInnerLoopUnit.y = 4;
7318
7319 // Mid
7320 walkerParams.MiddleLoopExtraSteps = 3;
7321 walkerParams.MidLoopUnitX = 0;
7322 walkerParams.MidLoopUnitY = 1;
7323 }
7324 else
7325 if (dependencyPattern == dependencyWavefront26ZDegree)
7326 {
7327 // Global
7328 walkerParams.GlobalResolution.x = threadSpaceWidth;
7329 walkerParams.GlobalResolution.y = threadSpaceHeight;
7330 walkerParams.GlobalStart.x = 0;
7331 walkerParams.GlobalStart.y = 0;
7332 walkerParams.GlobalOutlerLoopStride.x = 2;
7333 walkerParams.GlobalOutlerLoopStride.y = 0;
7334 walkerParams.GlobalInnerLoopUnit.x = -4;
7335 walkerParams.GlobalInnerLoopUnit.y = 2;
7336
7337 // Local
7338 walkerParams.BlockResolution.x = 2;
7339 walkerParams.BlockResolution.y = 2;
7340 walkerParams.LocalStart.x = 0;
7341 walkerParams.LocalStart.y = 0;
7342 walkerParams.LocalOutLoopStride.x = 0;
7343 walkerParams.LocalOutLoopStride.y = 1;
7344 walkerParams.LocalInnerLoopUnit.x = 1;
7345 walkerParams.LocalInnerLoopUnit.y = 0;
7346
7347 // Mid
7348 walkerParams.MiddleLoopExtraSteps = 0;
7349 walkerParams.MidLoopUnitX = 0;
7350 walkerParams.MidLoopUnitY = 0;
7351 }
7352 else
7353 if (dependencyPattern == dependencyWavefront26ZigDegree)
7354 {
7355 int32_t size_x = threadSpaceWidth;//(threadSpaceWidth + 1)>> 1;
7356 int32_t size_y = threadSpaceHeight;//threadSpaceHeight << 1;
7357
7358 // Global
7359 walkerParams.GlobalResolution.x = size_x;
7360 walkerParams.GlobalResolution.y = size_y;
7361 walkerParams.GlobalStart.x = 0;
7362 walkerParams.GlobalStart.y = 0;
7363 walkerParams.GlobalOutlerLoopStride.x = size_x;
7364 walkerParams.GlobalOutlerLoopStride.y = 0;
7365 walkerParams.GlobalInnerLoopUnit.x = 0;
7366 walkerParams.GlobalInnerLoopUnit.y = size_y;
7367
7368 // Local
7369 walkerParams.BlockResolution.x = size_x;
7370 walkerParams.BlockResolution.y = size_y;
7371 walkerParams.LocalStart.x = 0;
7372 walkerParams.LocalStart.y = 0;
7373 walkerParams.LocalOutLoopStride.x = 1;
7374 walkerParams.LocalOutLoopStride.y = 0;
7375 walkerParams.LocalInnerLoopUnit.x = -2;
7376 walkerParams.LocalInnerLoopUnit.y = 4;
7377
7378 // Mid
7379 walkerParams.MiddleLoopExtraSteps = 3;
7380 walkerParams.MidLoopUnitX = 0;
7381 walkerParams.MidLoopUnitY = 1;
7382 }
7383 else
7384 if (dependencyPattern == dependencyWavefront45DDegree)
7385 {
7386 // Global
7387 walkerParams.GlobalResolution.x = threadSpaceWidth;
7388 walkerParams.GlobalResolution.y = threadSpaceHeight;
7389 walkerParams.GlobalStart.x = 0;
7390 walkerParams.GlobalStart.y = 0;
7391 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7392 walkerParams.GlobalOutlerLoopStride.y = 0;
7393 walkerParams.GlobalInnerLoopUnit.x = 0;
7394 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7395
7396 // Local
7397 walkerParams.BlockResolution.x = threadSpaceWidth;
7398 walkerParams.BlockResolution.y = threadSpaceHeight;
7399 walkerParams.LocalStart.x = threadSpaceWidth;
7400 walkerParams.LocalStart.y = 0;
7401 walkerParams.LocalOutLoopStride.x = 1;
7402 walkerParams.LocalOutLoopStride.y = 0;
7403 walkerParams.LocalInnerLoopUnit.x = -1;
7404 walkerParams.LocalInnerLoopUnit.y = 1;
7405
7406 // Mid
7407 walkerParams.MiddleLoopExtraSteps = 0;
7408 walkerParams.MidLoopUnitX = 0;
7409 walkerParams.MidLoopUnitY = 0;
7410 if (colorCountMinusOne > 0)
7411 {
7412 walkerParams.dwLocalLoopExecCount = localLoopExecCount;
7413 }
7414 }
7415 else
7416 if (dependencyPattern == dependencyWavefront26DDegree)
7417 {
7418 // Global
7419 walkerParams.GlobalResolution.x = threadSpaceWidth;
7420 walkerParams.GlobalResolution.y = threadSpaceHeight;
7421 walkerParams.GlobalStart.x = 0;
7422 walkerParams.GlobalStart.y = 0;
7423 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7424 walkerParams.GlobalOutlerLoopStride.y = 0;
7425 walkerParams.GlobalInnerLoopUnit.x = 0;
7426 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7427 // Local
7428 walkerParams.BlockResolution.x = threadSpaceWidth;
7429 walkerParams.BlockResolution.y = threadSpaceHeight;
7430 walkerParams.LocalStart.x = threadSpaceWidth;
7431 walkerParams.LocalStart.y = 0;
7432 walkerParams.LocalOutLoopStride.x = 1;
7433 walkerParams.LocalOutLoopStride.y = 0;
7434 walkerParams.LocalInnerLoopUnit.x = -2;
7435 walkerParams.LocalInnerLoopUnit.y = 1;
7436 // Mid
7437 walkerParams.MiddleLoopExtraSteps = 0;
7438 walkerParams.MidLoopUnitX = 0;
7439 walkerParams.MidLoopUnitY = 0;
7440
7441 if (colorCountMinusOne > 0)
7442 {
7443 walkerParams.dwLocalLoopExecCount = localLoopExecCount;
7444 }
7445 }
7446 else
7447 if (dependencyPattern == dependencyWavefront45XDDegree)
7448 {
7449 // Global
7450 walkerParams.GlobalResolution.x = threadSpaceWidth;
7451 walkerParams.GlobalResolution.y = threadSpaceHeight;
7452 walkerParams.GlobalStart.x = 0;
7453 walkerParams.GlobalStart.y = 0;
7454 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7455 walkerParams.GlobalOutlerLoopStride.y = 0;
7456 walkerParams.GlobalInnerLoopUnit.x = 0;
7457 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7458
7459 // Local
7460 walkerParams.BlockResolution.x = threadSpaceWidth;
7461 walkerParams.BlockResolution.y = threadSpaceHeight;
7462 walkerParams.LocalStart.x = threadSpaceWidth;
7463 walkerParams.LocalStart.y = 0;
7464 walkerParams.LocalOutLoopStride.x = 1;
7465 walkerParams.LocalOutLoopStride.y = 0;
7466 walkerParams.LocalInnerLoopUnit.x = -1;
7467 walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1;
7468
7469 // Mid
7470 walkerParams.MiddleLoopExtraSteps = childThreadNumber;
7471 walkerParams.MidLoopUnitX = 0;
7472 walkerParams.MidLoopUnitY = 1;
7473 if (colorCountMinusOne > 0)
7474 {
7475 walkerParams.dwLocalLoopExecCount = localLoopExecCount;
7476 }
7477 }
7478 else
7479 if (dependencyPattern == dependencyWavefront26XDDegree)
7480 {
7481
7482 // Global
7483 walkerParams.GlobalResolution.x = threadSpaceWidth;
7484 walkerParams.GlobalResolution.y = threadSpaceHeight;
7485 walkerParams.GlobalStart.x = 0;
7486 walkerParams.GlobalStart.y = 0;
7487 walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth;
7488 walkerParams.GlobalOutlerLoopStride.y = 0;
7489 walkerParams.GlobalInnerLoopUnit.x = 0;
7490 walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight;
7491 // Local
7492 walkerParams.BlockResolution.x = threadSpaceWidth;
7493 walkerParams.BlockResolution.y = threadSpaceHeight;
7494 walkerParams.LocalStart.x = threadSpaceWidth;
7495 walkerParams.LocalStart.y = 0;
7496 walkerParams.LocalOutLoopStride.x = 1;
7497 walkerParams.LocalOutLoopStride.y = 0;
7498 walkerParams.LocalInnerLoopUnit.x = -2;
7499 walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1;
7500 // Mid
7501 walkerParams.MiddleLoopExtraSteps = childThreadNumber;
7502 walkerParams.MidLoopUnitX = 0;
7503 walkerParams.MidLoopUnitY = 1;
7504
7505 if (colorCountMinusOne > 0)
7506 {
7507 walkerParams.dwLocalLoopExecCount = localLoopExecCount;
7508 }
7509 }
7510 else
7511 {
7512 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported walking pattern is observed\n");
7513 eStatus = MOS_STATUS_INVALID_PARAMETER;
7514 }
7515 return eStatus;
7516 }
7517
IsDegree45Needed()7518 bool CodechalEncHevcStateG11::IsDegree45Needed()
7519 {
7520 if(m_numberConcurrentGroup == 1 && m_numberEncKernelSubThread == 1)
7521 {
7522 return false;
7523 }
7524 return true;
7525 }
7526
DecideConcurrentGroupAndWaveFrontNumber()7527 void CodechalEncHevcStateG11::DecideConcurrentGroupAndWaveFrontNumber()
7528 {
7529 uint32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
7530 uint32_t widthInLcu = MOS_ROUNDUP_SHIFT((m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1), shift);
7531 uint32_t heightInLcu = MOS_ROUNDUP_SHIFT((m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1), shift);
7532 DependencyPattern walkerDegree;
7533
7534 //As per kernel ULT,for all non TU1 cases m_numberEncKernelSubThread should be set to 1
7535 // LCU32 has no multiple thread support,
7536 if (!m_isMaxLcu64 || m_hevcSeqParams->TargetUsage != 1)
7537 {
7538 m_numberEncKernelSubThread = 1;
7539 }
7540
7541 while(heightInLcu / m_numberConcurrentGroup == 0)
7542 {
7543 m_numberConcurrentGroup = m_numberConcurrentGroup >> 1;
7544 if(m_numberConcurrentGroup == 0)
7545 {
7546 // Try out all values and now have to use the default ones.
7547 // Concurrent group and wave-front split must be enabled together
7548 m_numberConcurrentGroup = 1;
7549 break;
7550 }
7551 }
7552
7553 if (m_numberConcurrentGroup>1)
7554 {
7555 m_numWavefrontInOneRegion = 0;
7556 while(m_numWavefrontInOneRegion == 0)
7557 {
7558 uint32_t shift = m_degree45Needed ? 0 : 1;
7559
7560 m_numWavefrontInOneRegion =
7561 (widthInLcu + ((heightInLcu - 1) << shift) + m_numberConcurrentGroup - 1) / m_numberConcurrentGroup;
7562
7563 if(m_numWavefrontInOneRegion > 0 )
7564 {
7565 // this is a valid setting and number of regisions is greater than or equal to 1
7566 break;
7567 }
7568 m_numberConcurrentGroup = m_numberConcurrentGroup >> 1;
7569 if(m_numberConcurrentGroup ==0 )
7570 {
7571 // Try out all values and now have to use the default ones.
7572 m_numberConcurrentGroup = 1;
7573 break;
7574 }
7575 }
7576 }
7577 else
7578 {
7579 m_numWavefrontInOneRegion = 0;
7580 }
7581
7582 m_numberEncKernelSubThread = MOS_MIN(m_numberEncKernelSubThread, m_hevcThreadTaskDataNum);
7583
7584 return;
7585 }
7586
UserFeatureKeyReport()7587 MOS_STATUS CodechalEncHevcStateG11::UserFeatureKeyReport()
7588 {
7589 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7590
7591 CODECHAL_ENCODE_FUNCTION_ENTER;
7592
7593 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::UserFeatureKeyReport());
7594
7595 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_REGION_NUMBER_ID, m_numberConcurrentGroup, m_osInterface->pOsContext);
7596 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_SUBTHREAD_NUM_ID, m_numberEncKernelSubThread, m_osInterface->pOsContext);
7597 #if (_DEBUG || _RELEASE_INTERNAL)
7598 CodecHalEncode_WriteKey64(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE, m_kmdVeOveride.Value, m_osInterface->pOsContext);
7599 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENABLE_ENCODE_VE_CTXSCHEDULING_ID, MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface), m_osInterface->pOsContext);
7600 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENCODE_USED_VDBOX_NUM_ID, m_numPipe, m_osInterface->pOsContext);
7601 #endif
7602
7603 if (m_pakOnlyTest)
7604 {
7605 CodecHalEncode_WriteStringKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_PAK_ONLY_ID, m_pakOnlyDataFolder, strlen(m_pakOnlyDataFolder), m_osInterface->pOsContext);
7606 }
7607
7608 return eStatus;
7609 }
7610
SetupSwScoreBoard(CodechalEncodeSwScoreboard::KernelParams * params)7611 MOS_STATUS CodechalEncHevcStateG11::SetupSwScoreBoard(CodechalEncodeSwScoreboard::KernelParams *params)
7612 {
7613 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7614
7615 if (Mos_ResourceIsNull(&m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource))
7616 {
7617 MOS_ZeroMemory(m_swScoreboardState->GetCurSwScoreboardSurface(), sizeof(*m_swScoreboardState->GetCurSwScoreboardSurface()));
7618
7619 MOS_ALLOC_GFXRES_PARAMS allocParamsForBuffer2D;
7620 MOS_ZeroMemory(&allocParamsForBuffer2D, sizeof(MOS_ALLOC_GFXRES_PARAMS));
7621 allocParamsForBuffer2D.Type = MOS_GFXRES_2D;
7622 allocParamsForBuffer2D.TileType = MOS_TILE_LINEAR;
7623 allocParamsForBuffer2D.Format = Format_R32U;
7624 allocParamsForBuffer2D.dwWidth = params->swScoreboardSurfaceWidth;
7625 allocParamsForBuffer2D.dwHeight = params->swScoreboardSurfaceHeight;
7626 allocParamsForBuffer2D.pBufName = "SW Scoreboard Init buffer";
7627
7628 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
7629 m_osInterface,
7630 &allocParamsForBuffer2D,
7631 &m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource);
7632
7633 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo(
7634 m_osInterface,
7635 m_swScoreboardState->GetCurSwScoreboardSurface()));
7636 }
7637
7638 if(m_swScoreboard == nullptr)
7639 {
7640 m_swScoreboard = (uint8_t*)MOS_AllocAndZeroMemory(params->scoreboardWidth * sizeof(uint32_t)*params->scoreboardHeight);
7641 InitSWScoreboard(m_swScoreboard, params->scoreboardWidth, params->scoreboardHeight,
7642 m_swScoreboardState->GetDependencyPattern(),
7643 (char)(params->numberOfChildThread));
7644 }
7645
7646 MOS_LOCK_PARAMS lockFlags;
7647
7648 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
7649 lockFlags.WriteOnly = 1;
7650 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(
7651 m_osInterface,
7652 &m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource,
7653 &lockFlags);
7654 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
7655
7656 for(uint32_t h = 0; h < params->scoreboardHeight; h++)
7657 {
7658 uint32_t s = params->scoreboardWidth * sizeof(uint32_t);
7659 MOS_SecureMemcpy(data, s, &m_swScoreboard[h*s], s);
7660 data += m_swScoreboardState->GetCurSwScoreboardSurface()->dwPitch;
7661 }
7662
7663 m_osInterface->pfnUnlockResource(
7664 m_osInterface,
7665 &m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource);
7666
7667 return eStatus;
7668 }
7669
SetDependency(uint8_t & numDependencies,char * scoreboardDeltaX,char * scoreboardDeltaY,uint32_t dependencyPattern,char childThreadNumber)7670 void CodechalEncHevcStateG11::SetDependency(
7671 uint8_t &numDependencies,
7672 char* scoreboardDeltaX,
7673 char* scoreboardDeltaY,
7674 uint32_t dependencyPattern,
7675 char childThreadNumber)
7676 {
7677 if (dependencyPattern == dependencyWavefrontHorizontal)
7678 {
7679 numDependencies = m_numDependencyHorizontal;
7680 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependencyHorizontal, m_dxWavefrontHorizontal, m_numDependencyHorizontal);
7681 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependencyHorizontal, m_dyWavefrontHorizontal, m_numDependencyHorizontal);
7682 }
7683 else if (dependencyPattern == dependencyWavefrontVertical)
7684 {
7685 numDependencies = m_numDependencyVertical;
7686 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependencyVertical, m_dxWavefrontVertical, m_numDependencyVertical);
7687 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependencyVertical, m_dyWavefrontVertical, m_numDependencyVertical);
7688 }
7689 else if (dependencyPattern == dependencyWavefront45Degree)
7690 {
7691 numDependencies = m_numDependency45Degree;
7692 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45Degree, m_dxWavefront45Degree, m_numDependency45Degree);
7693 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45Degree, m_dyWavefront45Degree, m_numDependency45Degree);
7694 }
7695 else if (dependencyPattern == dependencyWavefront26Degree ||
7696 dependencyPattern == dependencyWavefront26DDegree)
7697 {
7698 numDependencies = m_numDependency26Degree;
7699 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26Degree, m_dxWavefront26Degree, m_numDependency26Degree);
7700 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26Degree, m_dyWavefront26Degree, m_numDependency26Degree);
7701 }
7702 else if (dependencyPattern == dependencyWavefront45XDegree)
7703 {
7704 numDependencies = m_numDependency45xDegree;
7705 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45xDegree, m_dxWavefront45xDegree, m_numDependency45xDegree);
7706 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45xDegree, m_dyWavefront45xDegree, m_numDependency45xDegree);
7707 numDependencies = childThreadNumber + 2;
7708 scoreboardDeltaY[0] = childThreadNumber;
7709 }
7710 else if (dependencyPattern == dependencyWavefront26XDegree)
7711 {
7712 numDependencies = m_numDependency26xDegree;
7713 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26xDegree, m_dxWavefront26xDegree, m_numDependency26xDegree);
7714 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26xDegree, m_dyWavefront26xDegree, m_numDependency26xDegree);
7715 numDependencies = childThreadNumber + 3;
7716 scoreboardDeltaY[0] = childThreadNumber;
7717 }
7718 else if ((dependencyPattern == dependencyWavefront45XDegreeAlt) ||
7719 (dependencyPattern == dependencyWavefront45XDDegree))
7720 {
7721 numDependencies = m_numDependency45xDegreeAlt;
7722 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45xDegreeAlt, m_dxWavefront45xDegreeAlt, m_numDependency45xDegreeAlt);
7723 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45xDegreeAlt, m_dyWavefront45xDegreeAlt, m_numDependency45xDegreeAlt);
7724 scoreboardDeltaY[0] = childThreadNumber;
7725 }
7726 else if ((dependencyPattern == dependencyWavefront26XDegreeAlt) ||
7727 (dependencyPattern == dependencyWavefront26XDDegree))
7728 {
7729 numDependencies = m_numDependency26xDegreeAlt;
7730 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26xDegreeAlt, m_dxWavefront26xDegreeAlt, m_numDependency26xDegreeAlt);
7731 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26xDegreeAlt, m_dyWavefront26xDegreeAlt, m_numDependency26xDegreeAlt);
7732 scoreboardDeltaY[0] = childThreadNumber;
7733 }
7734 else if (dependencyPattern == dependencyWavefront45XVp9Degree)
7735 {
7736 numDependencies = m_numDependency45xVp9Degree;
7737 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45xVp9Degree, m_dxWavefront45xVp9Degree, m_numDependency45xVp9Degree);
7738 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45xVp9Degree, m_dyWavefront45xVp9Degree, m_numDependency45xVp9Degree);
7739 }
7740 else if (dependencyPattern == dependencyWavefront26ZDegree)
7741 {
7742 numDependencies = m_numDependency26zDegree;
7743 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26zDegree, m_dxWavefront26zDegree, m_numDependency26zDegree);
7744 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26zDegree, m_dyWavefront26zDegree, m_numDependency26zDegree);
7745 }
7746 else if (dependencyPattern == dependencyWavefront26ZigDegree)
7747 {
7748 numDependencies = m_numDependency26ZigDegree;
7749 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26ZigDegree, m_dxWavefront26ZigDegree, m_numDependency26ZigDegree);
7750 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26ZigDegree, m_dyWavefront26ZigDegree, m_numDependency26ZigDegree);
7751 }
7752 else if (dependencyPattern == dependencyWavefront45DDegree)
7753 {
7754 numDependencies = m_numDependency45Degree;
7755 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45Degree, m_dxWavefront45Degree, m_numDependency45Degree);
7756 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45Degree, m_dyWavefront45Degree, m_numDependency45Degree);
7757 }
7758 else
7759 {
7760 numDependencies = m_numDependencyNone;
7761 MOS_SecureMemcpy(scoreboardDeltaX, m_numDependencyNone, m_dxWavefrontNone, m_numDependencyNone);
7762 MOS_SecureMemcpy(scoreboardDeltaY, m_numDependencyNone, m_dyWavefrontNone, m_numDependencyNone);
7763 }
7764 }
7765
7766 // ========================================================================================
7767 // FUNCTION: InitSWScoreboard
7768 // DESCRIPTION: Initialize software scoreboard for a specific dependency pattern.
7769 // INPUTS: scoreboardWidth - Width of scoreboard in Entries
7770 // scoreboardHeight - Height of scoreboard in Entries
7771 // dependencyPattern - The Enumeration of the Dependency Pattern
7772 // OUTPUTS: scoreboard - Pointer to scoreboard in Memory
7773 // ========================================================================================
InitSWScoreboard(uint8_t * scoreboard,uint32_t scoreboardWidth,uint32_t scoreboardHeight,uint32_t dependencyPattern,char childThreadNumber)7774 void CodechalEncHevcStateG11::InitSWScoreboard(uint8_t* scoreboard, uint32_t scoreboardWidth, uint32_t scoreboardHeight, uint32_t dependencyPattern, char childThreadNumber)
7775 {
7776 // 1. Select Dependency Pattern
7777 uint8_t numDependencies;
7778 char scoreboardDeltaX[m_maxNumDependency];
7779 char scoreboardDeltaY[m_maxNumDependency];
7780 memset(scoreboardDeltaX, 0, sizeof(scoreboardDeltaX));
7781 memset(scoreboardDeltaY, 0, sizeof(scoreboardDeltaY));
7782
7783 SetDependency(numDependencies, scoreboardDeltaX, scoreboardDeltaY, dependencyPattern, childThreadNumber);
7784
7785 // 2. Initialize scoreboard (CPU Based)
7786 int32_t dependentLocationX = 0;
7787 int32_t dependentLocationY = 0;
7788 uint32_t* scoreboardInDws = (uint32_t*)scoreboard;
7789 int32_t totalThreadNumber = childThreadNumber + 1;
7790 for (int32_t y = 0; y < (int32_t)scoreboardHeight; y += totalThreadNumber)
7791 {
7792 for (int32_t x = 0; x < (int32_t)scoreboardWidth; x++)
7793 {
7794 scoreboardInDws[y*scoreboardWidth + x] = 0;
7795
7796 // Add dependencies accordingly
7797 for (int32_t i = 0; i < numDependencies; i++)
7798 {
7799 dependentLocationX = x + scoreboardDeltaX[i];
7800 dependentLocationY = y + scoreboardDeltaY[i];
7801 if ((dependentLocationX < 0) || (dependentLocationY < 0) ||
7802 (dependentLocationX >= (int32_t)scoreboardWidth) ||
7803 (dependentLocationY >= (int32_t)scoreboardHeight))
7804 {
7805 // Do not add dependency because thread does not exist
7806 }
7807 else
7808 {
7809 scoreboardInDws[y*scoreboardWidth + x] |= (1 << i);
7810 }
7811 } // End NumDep
7812 } // End x
7813
7814 for (int32_t n = y + 1; n<y + totalThreadNumber; n++)
7815 {
7816 for (int32_t k = 0; k < (int32_t)scoreboardWidth; k++)
7817 {
7818 scoreboardInDws[n*scoreboardWidth + k] = scoreboardInDws[y*scoreboardWidth + k];
7819 }
7820 }
7821
7822 } // End y
7823 }
7824
CreateMhwParams()7825 void CodechalEncHevcStateG11::CreateMhwParams()
7826 {
7827 m_sliceStateParams = MOS_New(MHW_VDBOX_HEVC_SLICE_STATE_G11);
7828 m_pipeModeSelectParams = MOS_New(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G11);
7829 m_pipeBufAddrParams = MOS_New(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS_G11);
7830 }
7831
CalculatePictureStateCommandSize()7832 MOS_STATUS CodechalEncHevcStateG11::CalculatePictureStateCommandSize()
7833 {
7834 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7835
7836 CODECHAL_ENCODE_FUNCTION_ENTER;
7837
7838 MHW_VDBOX_STATE_CMDSIZE_PARAMS_G11 stateCmdSizeParams;
7839 CODECHAL_ENCODE_CHK_STATUS_RETURN(
7840 m_hwInterface->GetHxxStateCommandSize(
7841 CODECHAL_ENCODE_MODE_HEVC,
7842 &m_defaultPictureStatesSize,
7843 &m_defaultPicturePatchListSize,
7844 &stateCmdSizeParams));
7845
7846 return eStatus;
7847 }
7848
AddHcpPipeBufAddrCmd(PMOS_COMMAND_BUFFER cmdBuffer)7849 MOS_STATUS CodechalEncHevcStateG11::AddHcpPipeBufAddrCmd(
7850 PMOS_COMMAND_BUFFER cmdBuffer)
7851 {
7852 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7853
7854 CODECHAL_ENCODE_FUNCTION_ENTER;
7855
7856 *m_pipeBufAddrParams = {};
7857 SetHcpPipeBufAddrParams(*m_pipeBufAddrParams);
7858 #ifdef _MMC_SUPPORTED
7859 m_mmcState->SetPipeBufAddr(m_pipeBufAddrParams);
7860 #endif
7861 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(cmdBuffer, m_pipeBufAddrParams));
7862
7863 return eStatus;
7864 }
7865
SetGpuCtxCreatOption()7866 MOS_STATUS CodechalEncHevcStateG11::SetGpuCtxCreatOption()
7867 {
7868 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7869
7870 CODECHAL_ENCODE_FUNCTION_ENTER;
7871
7872 if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
7873 {
7874 CodechalEncoderState::SetGpuCtxCreatOption();
7875 }
7876 else
7877 {
7878 m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS_ENHANCED);
7879 CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt);
7880
7881 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ConstructParmsForGpuCtxCreation(
7882 m_scalabilityState,
7883 (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
7884 }
7885
7886 return eStatus;
7887 }
7888
SetTileData(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 * tileCodingParams,uint32_t bitstreamBufSize)7889 MOS_STATUS CodechalEncHevcStateG11::SetTileData(
7890 MHW_VDBOX_HCP_TILE_CODING_PARAMS_G11* tileCodingParams,
7891 uint32_t bitstreamBufSize)
7892 {
7893 CODECHAL_ENCODE_FUNCTION_ENTER;
7894
7895 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7896
7897 if (!m_hevcPicParams->tiles_enabled_flag)
7898 {
7899 return eStatus;
7900 }
7901
7902 uint32_t colBd[100] = { 0 };
7903 uint32_t num_tile_columns = m_hevcPicParams->num_tile_columns_minus1 + 1;
7904 for (uint32_t i = 0; i < num_tile_columns; i++)
7905 {
7906 colBd[i + 1] = colBd[i] + m_hevcPicParams->tile_column_width[i];
7907 }
7908
7909 uint32_t rowBd[100] = { 0 };
7910 uint32_t num_tile_rows = m_hevcPicParams->num_tile_rows_minus1 + 1;
7911 for (uint32_t i = 0; i < num_tile_rows; i++)
7912 {
7913 rowBd[i + 1] = rowBd[i] + m_hevcPicParams->tile_row_height[i];
7914 }
7915
7916 m_numTiles = num_tile_rows * num_tile_columns;
7917
7918 uint32_t const uiNumCuRecordTab[] = { 1, 4, 16, 64 }; //LCU: 8x8->1, 16x16->4, 32x32->16, 64x64->64
7919 uint32_t numCuRecord = uiNumCuRecordTab[MOS_MIN(3, m_hevcSeqParams->log2_max_coding_block_size_minus3)];
7920 uint32_t bitstreamByteOffset = 0, saoRowstoreOffset = 0, cuLevelStreamoutOffset = 0, sseRowstoreOffset = 0;
7921 int32_t frameWidthInMinCb = m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1;
7922 int32_t frameHeightInMinCb = m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1;
7923 int32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
7924 uint32_t NumLCUInPic = 0;
7925
7926 for (uint32_t i = 0; i < num_tile_rows; i++)
7927 {
7928 for (uint32_t j = 0; j < num_tile_columns; j++)
7929 {
7930 NumLCUInPic += m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j];
7931 }
7932 }
7933
7934 uint32_t numSliceInTile = 0;
7935 for (uint32_t uiNumLCUsInTiles = 0, i = 0; i < num_tile_rows; i++)
7936 {
7937 for (uint32_t j = 0; j < num_tile_columns; j++)
7938 {
7939 uint32_t idx = i * num_tile_columns + j;
7940 uint32_t numLCUInTile = m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j];
7941
7942 tileCodingParams[idx].TileStartLCUX = colBd[j];
7943 tileCodingParams[idx].TileStartLCUY = rowBd[i];
7944
7945 tileCodingParams[idx].TileColumnStoreSelect = j % 2;
7946 tileCodingParams[idx].TileRowStoreSelect = i % 2;
7947
7948 if (j != num_tile_columns - 1)
7949 {
7950 tileCodingParams[idx].TileWidthInMinCbMinus1 = (m_hevcPicParams->tile_column_width[j] << shift) - 1;
7951 tileCodingParams[idx].IsLastTileofRow = false;
7952 }
7953 else
7954 {
7955 tileCodingParams[idx].TileWidthInMinCbMinus1 = (frameWidthInMinCb - (colBd[j] << shift)) - 1;
7956 tileCodingParams[idx].IsLastTileofRow = true;
7957
7958 }
7959
7960 if (i != num_tile_rows - 1)
7961 {
7962 tileCodingParams[idx].IsLastTileofColumn = false;
7963 tileCodingParams[idx].TileHeightInMinCbMinus1 = (m_hevcPicParams->tile_row_height[i] << shift) - 1;
7964 }
7965 else
7966 {
7967 tileCodingParams[idx].TileHeightInMinCbMinus1 = (frameHeightInMinCb - (rowBd[i] << shift)) - 1;
7968 tileCodingParams[idx].IsLastTileofColumn = true;
7969 }
7970
7971 tileCodingParams[idx].NumOfTilesInFrame = m_numTiles;
7972 tileCodingParams[idx].NumOfTileColumnsInFrame = num_tile_columns;
7973 tileCodingParams[idx].CuRecordOffset = MOS_ALIGN_CEIL(((numCuRecord * uiNumLCUsInTiles) * m_hcpInterface->GetHevcEncCuRecordSize()),
7974 CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
7975 tileCodingParams[idx].NumberOfActiveBePipes = (m_numPipe > 1) ? m_numPipe : 1;
7976
7977 tileCodingParams[idx].PakTileStatisticsOffset = m_sizeOfHcpPakFrameStats * idx / CODECHAL_CACHELINE_SIZE;
7978 tileCodingParams[idx].TileSizeStreamoutOffset = idx;
7979 tileCodingParams[idx].Vp9ProbabilityCounterStreamoutOffset = 0;
7980 tileCodingParams[idx].presHcpSyncBuffer = &m_resHcpScalabilitySyncBuffer.sResource;
7981 tileCodingParams[idx].CuLevelStreamoutOffset = cuLevelStreamoutOffset;
7982 tileCodingParams[idx].SliceSizeStreamoutOffset = numSliceInTile;
7983 tileCodingParams[idx].SseRowstoreOffset = sseRowstoreOffset;
7984 tileCodingParams[idx].BitstreamByteOffset = bitstreamByteOffset;
7985 tileCodingParams[idx].SaoRowstoreOffset = saoRowstoreOffset;
7986
7987 cuLevelStreamoutOffset += MOS_ALIGN_CEIL((tileCodingParams[idx].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[idx].TileHeightInMinCbMinus1 + 1) * 16, CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
7988 sseRowstoreOffset += ((m_hevcPicParams->tile_column_width[j] + 3) * m_sizeOfSseSrcPixelRowStoreBufferPerLcu) / CODECHAL_CACHELINE_SIZE;
7989 saoRowstoreOffset += (MOS_ALIGN_CEIL(m_hevcPicParams->tile_column_width[j], 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU) / CODECHAL_CACHELINE_SIZE;
7990 uint64_t totalSizeTemp = (uint64_t)bitstreamBufSize * (uint64_t)numLCUInTile;
7991 uint32_t bitStreamSizePerTile = (uint32_t)(totalSizeTemp / (uint64_t)NumLCUInPic) + ((totalSizeTemp % (uint64_t)NumLCUInPic) ? 1 : 0);
7992 bitstreamByteOffset += MOS_ALIGN_CEIL(bitStreamSizePerTile, CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
7993 uiNumLCUsInTiles += numLCUInTile;
7994
7995 for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
7996 {
7997 bool lastSliceInTile = false, sliceInTile = false;
7998 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
7999 &tileCodingParams[idx],
8000 &sliceInTile,
8001 &lastSliceInTile));
8002 numSliceInTile += (sliceInTile ? 1 : 0);
8003 }
8004 }
8005 // same row store buffer for different tile rows.
8006 saoRowstoreOffset = 0;
8007 sseRowstoreOffset = 0;
8008 }
8009
8010 return eStatus;
8011 }
8012
IsSliceInTile(uint32_t sliceNumber,PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 currentTile,bool * sliceInTile,bool * lastSliceInTile)8013 MOS_STATUS CodechalEncHevcStateG11::IsSliceInTile(
8014 uint32_t sliceNumber,
8015 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G11 currentTile,
8016 bool *sliceInTile,
8017 bool *lastSliceInTile)
8018 {
8019 CODECHAL_ENCODE_FUNCTION_ENTER;
8020
8021 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8022
8023 CODECHAL_ENCODE_CHK_NULL_RETURN(currentTile);
8024 CODECHAL_ENCODE_CHK_NULL_RETURN(sliceInTile);
8025 CODECHAL_ENCODE_CHK_NULL_RETURN(lastSliceInTile);
8026
8027 uint32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
8028 uint32_t residual = (1 << shift) - 1;
8029 uint32_t frameWidthInLCU = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift;
8030 uint32_t frameHeightInLCU = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift;
8031
8032 PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = &m_hevcSliceParams[sliceNumber];
8033 uint32_t sliceStartLCU = hevcSlcParams->slice_segment_address;
8034 uint32_t sliceLCUx = sliceStartLCU % frameWidthInLCU;
8035 uint32_t sliceLCUy = sliceStartLCU / frameWidthInLCU;
8036
8037 uint32_t tile_column_width = (currentTile->TileWidthInMinCbMinus1 + 1 + residual) >> shift;
8038 uint32_t tile_row_height = (currentTile->TileHeightInMinCbMinus1 + 1 + residual) >> shift;
8039 if (sliceLCUx < currentTile->TileStartLCUX ||
8040 sliceLCUy < currentTile->TileStartLCUY ||
8041 sliceLCUx >= currentTile->TileStartLCUX + tile_column_width ||
8042 sliceLCUy >= currentTile->TileStartLCUY + tile_row_height
8043 )
8044 {
8045 // slice start is not in the tile boundary
8046 *lastSliceInTile = *sliceInTile = false;
8047 return eStatus;
8048 }
8049
8050 sliceLCUx += (hevcSlcParams->NumLCUsInSlice - 1) % tile_column_width;
8051 sliceLCUy += (hevcSlcParams->NumLCUsInSlice - 1) / tile_column_width;
8052
8053 if (sliceLCUx >= currentTile->TileStartLCUX + tile_column_width)
8054 {
8055 sliceLCUx -= tile_column_width;
8056 sliceLCUy++;
8057 }
8058
8059 if (sliceLCUx < currentTile->TileStartLCUX ||
8060 sliceLCUy < currentTile->TileStartLCUY ||
8061 sliceLCUx >= currentTile->TileStartLCUX + tile_column_width ||
8062 sliceLCUy >= currentTile->TileStartLCUY + tile_row_height
8063 )
8064 {
8065 // last LCU of the slice is out of the tile boundary
8066 *lastSliceInTile = *sliceInTile = false;
8067 return eStatus;
8068 }
8069
8070 *sliceInTile = true;
8071
8072 sliceLCUx++;
8073 sliceLCUy++;
8074
8075 // the end of slice is at the boundary of tile
8076 *lastSliceInTile = (
8077 sliceLCUx == currentTile->TileStartLCUX + tile_column_width &&
8078 sliceLCUy == currentTile->TileStartLCUY + tile_row_height);
8079
8080 return eStatus;
8081 }
8082
8083 #if USE_CODECHAL_DEBUG_TOOL
8084
8085 //MOS_STATUS CodechalEncHevcStateG11::CodecHal_DbgDumpHEVCMbEncCurbeG11(
8086 // CodechalDebugInterface *pDebugInterface,
8087 // CODECHAL_MEDIA_STATE_TYPE Function,
8088 // PMOS_RESOURCE presDBuffer)
8089 //{
8090
8091 //#define WRITE_CURBE_FIELD_TO_FILE(field) {\
8092 // oss << "field = " << +pCurbeData->field << std::endl;}
8093 //
8094 // PMOS_INTERFACE m_osInterface = nullptr;
8095 // MOS_LOCK_PARAMS LockFlags;
8096 // CodechalEncHevcStateG11::MBENC_COMBINED_BUFFER1 *pEncComBuf1 = nullptr;
8097 //
8098 // CODECHAL_DEBUG_FUNCTION_ENTER;
8099 //
8100 // CODECHAL_DEBUG_CHK_NULL(pDebugInterface);
8101 // CODECHAL_DEBUG_CHK_NULL(pDebugInterface->pOsInterface);
8102 // CODECHAL_DEBUG_CHK_NULL(pDebugInterface->pHwInterface);
8103 // m_osInterface = pDebugInterface->pOsInterface;
8104 //
8105 // if (!pDebugInterface->DumpIsEnabled(CodechalDbgAttr::attrCurbe))
8106 // {
8107 // return MOS_STATUS_SUCCESS;
8108 // }
8109 //
8110 // MOS_ZeroMemory(&LockFlags, sizeof(MOS_LOCK_PARAMS));
8111 // LockFlags.ReadOnly = 1;
8112 //
8113 // pEncComBuf1 = (CodechalEncHevcStateG11::MBENC_COMBINED_BUFFER1*)m_osInterface->pfnLockResource(
8114 // m_osInterface,
8115 // presDBuffer,
8116 // &LockFlags);
8117 //
8118 // CodechalEncHevcStateG11::MBENC_CURBE* pCurbeData = &pEncComBuf1->Curbe;
8119 //
8120 // std::ostringstream oss;
8121 // oss.setf(std::ios::showbase | std::ios::uppercase);
8122 //
8123 // oss << "# CURBE Parameters:" << std::endl;
8124 //
8125 // WRITE_CURBE_FIELD_TO_FILE(FrameWidthInSamples);
8126 // WRITE_CURBE_FIELD_TO_FILE(FrameHeightInSamples);
8127 //
8128 // WRITE_CURBE_FIELD_TO_FILE(Log2MaxCUSize);
8129 // WRITE_CURBE_FIELD_TO_FILE(Log2MinCUSize);
8130 // WRITE_CURBE_FIELD_TO_FILE(Log2MaxTUSize);
8131 // WRITE_CURBE_FIELD_TO_FILE(Log2MinTUSize);
8132 // WRITE_CURBE_FIELD_TO_FILE(MaxIntraRdeIter);
8133 // WRITE_CURBE_FIELD_TO_FILE(QPType);
8134 // WRITE_CURBE_FIELD_TO_FILE(MaxTransformDepthInter);
8135 // WRITE_CURBE_FIELD_TO_FILE(MaxTransformDepthIntra);
8136 // WRITE_CURBE_FIELD_TO_FILE(Log2ParallelMergeLevel);
8137 //
8138 // WRITE_CURBE_FIELD_TO_FILE(CornerNeighborPixel);
8139 // WRITE_CURBE_FIELD_TO_FILE(IntraNeighborAvailFlags);
8140 // WRITE_CURBE_FIELD_TO_FILE(ChromaFormatType);
8141 // WRITE_CURBE_FIELD_TO_FILE(SubPelMode);
8142 // WRITE_CURBE_FIELD_TO_FILE(InterSADMeasure);
8143 // WRITE_CURBE_FIELD_TO_FILE(IntraSADMeasure);
8144 // WRITE_CURBE_FIELD_TO_FILE(IntraPrediction);
8145 // WRITE_CURBE_FIELD_TO_FILE(RefIDCostMode);
8146 // WRITE_CURBE_FIELD_TO_FILE(TUBasedCostSetting);
8147 //
8148 // WRITE_CURBE_FIELD_TO_FILE(ExplictModeEn);
8149 // WRITE_CURBE_FIELD_TO_FILE(AdaptiveEn);
8150 // WRITE_CURBE_FIELD_TO_FILE(EarlyImeSuccessEn);
8151 // WRITE_CURBE_FIELD_TO_FILE(IntraSpeedMode);
8152 // WRITE_CURBE_FIELD_TO_FILE(IMECostCentersSel);
8153 // WRITE_CURBE_FIELD_TO_FILE(RDEQuantRoundValue);
8154 // WRITE_CURBE_FIELD_TO_FILE(IMERefWindowSize);
8155 // WRITE_CURBE_FIELD_TO_FILE(IntraComputeType);
8156 // WRITE_CURBE_FIELD_TO_FILE(Depth0IntraPredition);
8157 // WRITE_CURBE_FIELD_TO_FILE(TUDepthControl);
8158 // WRITE_CURBE_FIELD_TO_FILE(IntraTuRecFeedbackDisable);
8159 // WRITE_CURBE_FIELD_TO_FILE(MergeListBiDisable);
8160 // WRITE_CURBE_FIELD_TO_FILE(EarlyImeStop);
8161 //
8162 // WRITE_CURBE_FIELD_TO_FILE(FrameQP);
8163 // WRITE_CURBE_FIELD_TO_FILE(FrameQPSign);
8164 // WRITE_CURBE_FIELD_TO_FILE(ConcurrentGroupNum);
8165 // WRITE_CURBE_FIELD_TO_FILE(NumofUnitInWaveFront);
8166 //
8167 // WRITE_CURBE_FIELD_TO_FILE(LoadBalenceEnable);
8168 // WRITE_CURBE_FIELD_TO_FILE(NumberofMultiFrame);
8169 // WRITE_CURBE_FIELD_TO_FILE(Degree45);
8170 // WRITE_CURBE_FIELD_TO_FILE(Break12Dependency);
8171 // WRITE_CURBE_FIELD_TO_FILE(ThreadNumber);
8172 //
8173 // WRITE_CURBE_FIELD_TO_FILE(Pic_init_qp_B);
8174 // WRITE_CURBE_FIELD_TO_FILE(Pic_init_qp_P);
8175 // WRITE_CURBE_FIELD_TO_FILE(Pic_init_qp_I);
8176 //
8177 // WRITE_CURBE_FIELD_TO_FILE(NumofRowTile);
8178 // WRITE_CURBE_FIELD_TO_FILE(NumofColumnTile);
8179 //
8180 // WRITE_CURBE_FIELD_TO_FILE(TransquantBypassEnableFlag);
8181 // WRITE_CURBE_FIELD_TO_FILE(PCMEnabledFlag);
8182 // WRITE_CURBE_FIELD_TO_FILE(CuQpDeltaEnabledFlag);
8183 // WRITE_CURBE_FIELD_TO_FILE(Stepping);
8184 // WRITE_CURBE_FIELD_TO_FILE(WaveFrontSplitsEnable);
8185 // WRITE_CURBE_FIELD_TO_FILE(HMEFlag);
8186 // WRITE_CURBE_FIELD_TO_FILE(SuperHME);
8187 // WRITE_CURBE_FIELD_TO_FILE(UltraHME);
8188 // WRITE_CURBE_FIELD_TO_FILE(Cu64SkipCheckOnly);
8189 // WRITE_CURBE_FIELD_TO_FILE(EnableCu64Check);
8190 // WRITE_CURBE_FIELD_TO_FILE(Cu642Nx2NCheckOnly);
8191 // WRITE_CURBE_FIELD_TO_FILE(EnableCu64AmpCheck);
8192 // WRITE_CURBE_FIELD_TO_FILE(DisablePIntra);
8193 // WRITE_CURBE_FIELD_TO_FILE(DisableIntraTURec);
8194 // WRITE_CURBE_FIELD_TO_FILE(InheritIntraModeFromTU0);
8195 // WRITE_CURBE_FIELD_TO_FILE(CostScalingForRA);
8196 // WRITE_CURBE_FIELD_TO_FILE(DisableIntraNxN);
8197 //
8198 // WRITE_CURBE_FIELD_TO_FILE(MaxRefIdxL0);
8199 // WRITE_CURBE_FIELD_TO_FILE(MaxRefIdxL1);
8200 // WRITE_CURBE_FIELD_TO_FILE(MaxBRefIdxL0);
8201 //
8202 // WRITE_CURBE_FIELD_TO_FILE(SkipEarlyTermination);
8203 // WRITE_CURBE_FIELD_TO_FILE(SkipEarlyTermSize);
8204 // WRITE_CURBE_FIELD_TO_FILE(Dynamic64Enable);
8205 // WRITE_CURBE_FIELD_TO_FILE(Dynamic64Order);
8206 // WRITE_CURBE_FIELD_TO_FILE(Dynamic64Th);
8207 // WRITE_CURBE_FIELD_TO_FILE(DynamicOrderTh);
8208 // WRITE_CURBE_FIELD_TO_FILE(PerBFrameQPOffset);
8209 // WRITE_CURBE_FIELD_TO_FILE(IncreaseExitThresh);
8210 // WRITE_CURBE_FIELD_TO_FILE(Dynamic64Min32);
8211 // WRITE_CURBE_FIELD_TO_FILE(LastFrameIsIntra);
8212 //
8213 // WRITE_CURBE_FIELD_TO_FILE(LenSP);
8214 // WRITE_CURBE_FIELD_TO_FILE(MaxNumSU);
8215 //
8216 // WRITE_CURBE_FIELD_TO_FILE(CostTableIndex);
8217 //
8218 // WRITE_CURBE_FIELD_TO_FILE(SliceType);
8219 // WRITE_CURBE_FIELD_TO_FILE(TemporalMvpEnableFlag);
8220 // WRITE_CURBE_FIELD_TO_FILE(CollocatedFromL0Flag);
8221 // WRITE_CURBE_FIELD_TO_FILE(theSameRefList);
8222 // WRITE_CURBE_FIELD_TO_FILE(IsLowDelay);
8223 // WRITE_CURBE_FIELD_TO_FILE(MaxNumMergeCand);
8224 // WRITE_CURBE_FIELD_TO_FILE(NumRefIdxL0);
8225 // WRITE_CURBE_FIELD_TO_FILE(NumRefIdxL1);
8226 //
8227 // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_0);
8228 // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_0);
8229 // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_1);
8230 // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_1);
8231 //
8232 // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_2);
8233 // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_2);
8234 // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_3);
8235 // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_3);
8236 //
8237 // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_4);
8238 // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_4);
8239 // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_5);
8240 // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_5);
8241 //
8242 // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_6);
8243 // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_6);
8244 // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_7);
8245 // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_7);
8246 //
8247 // WRITE_CURBE_FIELD_TO_FILE(LongTermReferenceFlags_L0);
8248 // WRITE_CURBE_FIELD_TO_FILE(LongTermReferenceFlags_L1);
8249 //
8250 // WRITE_CURBE_FIELD_TO_FILE(RefFrameWinWidth);
8251 // WRITE_CURBE_FIELD_TO_FILE(RefFrameWinHeight);
8252 //
8253 // WRITE_CURBE_FIELD_TO_FILE(RoundingInter);
8254 // WRITE_CURBE_FIELD_TO_FILE(RoundingIntra);
8255 // WRITE_CURBE_FIELD_TO_FILE(MaxThreadWidth);
8256 // WRITE_CURBE_FIELD_TO_FILE(MaxThreadHeight);
8257 //
8258 // const char *fileName = pDebugInterface->CreateFileName(
8259 // "_HEVCMBEnc",
8260 // CodechalDbgBufferType::bufCurbe,
8261 // CodechalDbgExtType::txt);
8262 //
8263 // std::ofstream ofs(fileName, std::ios::out);
8264 // ofs << oss.str();
8265 // ofs.close();
8266 //
8267 // if (m_osInterface && pEncComBuf1)
8268 // {
8269 // m_osInterface->pfnUnlockResource(
8270 // m_osInterface,
8271 // presDBuffer);
8272 // }
8273 //
8274 // return MOS_STATUS_SUCCESS;
8275 //}
8276
8277 #endif
VerifyCommandBufferSize()8278 MOS_STATUS CodechalEncHevcStateG11::VerifyCommandBufferSize()
8279 {
8280 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8281
8282 CODECHAL_ENCODE_FUNCTION_ENTER;
8283
8284 if (UseRenderCommandBuffer() || m_numPipe == 1)
8285 {
8286 // legacy mode & resize CommandBuffer Size for every BRC pass
8287 if (!m_singleTaskPhaseSupported)
8288 {
8289 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
8290 }
8291 return eStatus;
8292 }
8293
8294 // virtual engine
8295 uint32_t requestedSize =
8296 m_pictureStatesSize +
8297 m_extraPictureStatesSize +
8298 (m_sliceStatesSize * m_numSlices);
8299
8300 requestedSize += (requestedSize * m_numPassesInOnePipe + m_hucCommandsSize);
8301
8302 // Running in the multiple VDBOX mode
8303 int currentPipe = GetCurrentPipe();
8304 if (currentPipe < 0 || currentPipe >= m_numPipe)
8305 {
8306 eStatus = MOS_STATUS_INVALID_PARAMETER;
8307 return eStatus;
8308 }
8309 int currentPass = GetCurrentPass();
8310 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
8311 {
8312 eStatus = MOS_STATUS_INVALID_PARAMETER;
8313 return eStatus;
8314 }
8315
8316 if (IsFirstPipe() && m_osInterface->bUsesPatchList)
8317 {
8318 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
8319 }
8320
8321 PMOS_COMMAND_BUFFER cmdBuffer = m_singleTaskPhaseSupported ? &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass];
8322
8323 if (Mos_ResourceIsNull(&cmdBuffer->OsResource) ||
8324 m_sizeOfVeBatchBuffer < requestedSize)
8325 {
8326 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
8327
8328 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
8329 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
8330 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
8331 allocParamsForBufferLinear.Format = Format_Buffer;
8332 allocParamsForBufferLinear.dwBytes = requestedSize;
8333 allocParamsForBufferLinear.pBufName = "Batch buffer for each VDBOX";
8334
8335 if (!Mos_ResourceIsNull(&cmdBuffer->OsResource))
8336 {
8337 if (cmdBuffer->pCmdBase)
8338 {
8339 m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
8340 }
8341 m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
8342 }
8343
8344 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
8345 m_osInterface,
8346 &allocParamsForBufferLinear,
8347 &cmdBuffer->OsResource));
8348
8349 m_sizeOfVeBatchBuffer = requestedSize;
8350 }
8351
8352 if (cmdBuffer->pCmdBase == nullptr)
8353 {
8354 MOS_LOCK_PARAMS lockParams;
8355 MOS_ZeroMemory(&lockParams, sizeof(lockParams));
8356 lockParams.WriteOnly = true;
8357 cmdBuffer->pCmdPtr = cmdBuffer->pCmdBase = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &cmdBuffer->OsResource, &lockParams);
8358 cmdBuffer->iRemaining = m_sizeOfVeBatchBuffer;
8359 cmdBuffer->iOffset = 0;
8360
8361 if (cmdBuffer->pCmdBase == nullptr)
8362 {
8363 eStatus = MOS_STATUS_NULL_POINTER;
8364 return eStatus;
8365 }
8366 }
8367
8368 return eStatus;
8369 }
8370
GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)8371 MOS_STATUS CodechalEncHevcStateG11::GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)
8372 {
8373 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8374
8375 CODECHAL_ENCODE_FUNCTION_ENTER;
8376
8377 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
8378 CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
8379
8380 if (UseRenderCommandBuffer() || m_numPipe == 1)
8381 {
8382 // legacy mode
8383 m_realCmdBuffer.pCmdBase = m_realCmdBuffer.pCmdPtr = nullptr;
8384 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, 0));
8385 return eStatus;
8386 }
8387
8388 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &m_realCmdBuffer, 0));
8389
8390 int currentPipe = GetCurrentPipe();
8391 if (currentPipe < 0 || currentPipe >= m_numPipe)
8392 {
8393 eStatus = MOS_STATUS_INVALID_PARAMETER;
8394 return eStatus;
8395 }
8396 int currentPass = GetCurrentPass();
8397 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
8398 {
8399 eStatus = MOS_STATUS_INVALID_PARAMETER;
8400 return eStatus;
8401 }
8402
8403 *cmdBuffer = m_singleTaskPhaseSupported ? m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass];
8404
8405 if (m_osInterface->osCpInterface->IsCpEnabled() && cmdBuffer->iOffset == 0)
8406 {
8407 // Insert CP Prolog
8408 CODECHAL_ENCODE_NORMALMESSAGE("Adding cp prolog for secure scalable encode");
8409 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, cmdBuffer));
8410 }
8411 return eStatus;
8412 }
8413
ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)8414 MOS_STATUS CodechalEncHevcStateG11::ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)
8415 {
8416 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8417
8418 CODECHAL_ENCODE_FUNCTION_ENTER;
8419
8420 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
8421
8422 if (UseRenderCommandBuffer() || m_numPipe == 1)
8423 {
8424 // legacy mode
8425 m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, 0);
8426 return eStatus;
8427 }
8428
8429 int currentPipe = GetCurrentPipe();
8430 if (currentPipe < 0 || currentPipe >= m_numPipe)
8431 {
8432 eStatus = MOS_STATUS_INVALID_PARAMETER;
8433 return eStatus;
8434 }
8435 int currentPass = GetCurrentPass();
8436 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
8437 {
8438 eStatus = MOS_STATUS_INVALID_PARAMETER;
8439 return eStatus;
8440 }
8441 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
8442 m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][passIndex] = *cmdBuffer;
8443 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
8444
8445 return eStatus;
8446 }
8447
SubmitCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer,bool bNullRendering)8448 MOS_STATUS CodechalEncHevcStateG11::SubmitCommandBuffer(
8449 PMOS_COMMAND_BUFFER cmdBuffer,
8450 bool bNullRendering)
8451 {
8452 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8453
8454 CODECHAL_ENCODE_FUNCTION_ENTER;
8455
8456 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
8457
8458 if (UseRenderCommandBuffer() || m_numPipe == 1)
8459 {
8460 // legacy mode
8461 if (!UseRenderCommandBuffer()) // Set VE Hints for video contexts only
8462 {
8463 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(cmdBuffer));
8464 }
8465 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, bNullRendering));
8466 return eStatus;
8467 }
8468
8469 bool cmdBufferReadyForSubmit = IsLastPipe();
8470
8471 // In STF, Hold the command buffer submission till last pass
8472 if (m_singleTaskPhaseSupported)
8473 {
8474 cmdBufferReadyForSubmit = cmdBufferReadyForSubmit && IsLastPass();
8475 }
8476
8477 if(!cmdBufferReadyForSubmit)
8478 {
8479 return eStatus;
8480 }
8481
8482 int currentPass = GetCurrentPass();
8483 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
8484 {
8485 eStatus = MOS_STATUS_INVALID_PARAMETER;
8486 return eStatus;
8487 }
8488 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
8489
8490 for (uint32_t i = 0; i < m_numPipe; i++)
8491 {
8492 PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex];
8493
8494 if(cmdBuffer->pCmdBase)
8495 {
8496 m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
8497 }
8498
8499 cmdBuffer->pCmdBase = 0;
8500 cmdBuffer->iOffset = cmdBuffer->iRemaining = 0;
8501 }
8502 m_sizeOfVeBatchBuffer = 0;
8503
8504 if(eStatus == MOS_STATUS_SUCCESS)
8505 {
8506 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(&m_realCmdBuffer));
8507 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
8508 }
8509
8510 return eStatus;
8511 }
SendPrologWithFrameTracking(PMOS_COMMAND_BUFFER cmdBuffer,bool frameTrackingRequested,MHW_MI_MMIOREGISTERS * mmioRegister)8512 MOS_STATUS CodechalEncHevcStateG11::SendPrologWithFrameTracking(
8513 PMOS_COMMAND_BUFFER cmdBuffer,
8514 bool frameTrackingRequested,
8515 MHW_MI_MMIOREGISTERS *mmioRegister)
8516 {
8517 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8518
8519 CODECHAL_ENCODE_FUNCTION_ENTER;
8520
8521 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
8522
8523 if (UseRenderCommandBuffer())
8524 {
8525 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SendPrologWithFrameTracking(cmdBuffer, frameTrackingRequested, mmioRegister));
8526 return eStatus;
8527 }
8528
8529 if (!IsLastPipe())
8530 {
8531 return eStatus;
8532 }
8533
8534 PMOS_COMMAND_BUFFER commandBufferInUse;
8535 if (m_realCmdBuffer.pCmdBase)
8536 {
8537 commandBufferInUse = &m_realCmdBuffer;
8538 }
8539 else
8540 {
8541 if (cmdBuffer && cmdBuffer->pCmdBase)
8542 {
8543 commandBufferInUse = cmdBuffer;
8544 }
8545 else
8546 {
8547 eStatus = MOS_STATUS_INVALID_PARAMETER;
8548 return eStatus;
8549 }
8550 }
8551 // initialize command buffer attributes
8552 commandBufferInUse->Attributes.bTurboMode = m_hwInterface->m_turboMode;
8553 commandBufferInUse->Attributes.dwNumRequestedEUSlices = m_hwInterface->m_numRequestedEuSlices;
8554 commandBufferInUse->Attributes.dwNumRequestedSubSlices = m_hwInterface->m_numRequestedSubSlices;
8555 commandBufferInUse->Attributes.dwNumRequestedEUs = m_hwInterface->m_numRequestedEus;
8556 commandBufferInUse->Attributes.bValidPowerGatingRequest = true;
8557
8558 if (frameTrackingRequested && m_frameTrackingEnabled)
8559 {
8560 commandBufferInUse->Attributes.bEnableMediaFrameTracking = true;
8561 commandBufferInUse->Attributes.resMediaFrameTrackingSurface =
8562 &m_encodeStatusBuf.resStatusBuffer;
8563 commandBufferInUse->Attributes.dwMediaFrameTrackingTag = m_storeData;
8564 // Set media frame tracking address offset(the offset from the encoder status buffer page)
8565 commandBufferInUse->Attributes.dwMediaFrameTrackingAddrOffset = 0;
8566 }
8567
8568 MHW_GENERIC_PROLOG_PARAMS genericPrologParams;
8569 MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
8570 genericPrologParams.pOsInterface = m_hwInterface->GetOsInterface();
8571 genericPrologParams.pvMiInterface = m_hwInterface->GetMiInterface();
8572 genericPrologParams.bMmcEnabled = m_mmcState ? m_mmcState->IsMmcEnabled() : false;
8573 genericPrologParams.dwStoreDataValue = m_storeData - 1;
8574
8575 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(commandBufferInUse, &genericPrologParams));
8576
8577 return eStatus;
8578 }
8579
SetSliceStructs()8580 MOS_STATUS CodechalEncHevcStateG11::SetSliceStructs()
8581 {
8582 CODECHAL_ENCODE_FUNCTION_ENTER;
8583 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8584 eStatus = CodechalEncodeHevcBase::SetSliceStructs();
8585 m_numPassesInOnePipe = m_numPasses;
8586 m_numPasses = (m_numPasses + 1) * m_numPipe - 1;
8587 return eStatus;
8588 }
8589
AllocateTileStatistics()8590 MOS_STATUS CodechalEncHevcStateG11::AllocateTileStatistics()
8591 {
8592 CODECHAL_ENCODE_FUNCTION_ENTER;
8593
8594 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8595
8596 if (!m_hevcPicParams->tiles_enabled_flag)
8597 {
8598 return eStatus;
8599 }
8600
8601 auto num_tile_rows = m_hevcPicParams->num_tile_rows_minus1 + 1;
8602 auto num_tile_columns = m_hevcPicParams->num_tile_columns_minus1 + 1;
8603 auto num_tiles = num_tile_rows*num_tile_columns;
8604
8605 MOS_ZeroMemory(&m_hevcFrameStatsOffset, sizeof(HEVC_TILE_STATS_INFO));
8606 MOS_ZeroMemory(&m_hevcTileStatsOffset, sizeof(HEVC_TILE_STATS_INFO));
8607 MOS_ZeroMemory(&m_hevcStatsSize, sizeof(HEVC_TILE_STATS_INFO));
8608
8609 MOS_LOCK_PARAMS lockFlagsWriteOnly;
8610 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
8611 lockFlagsWriteOnly.WriteOnly = true;
8612
8613 // Set the maximum size based on frame level statistics.
8614 m_hevcStatsSize.uiTileSizeRecord = CODECHAL_CACHELINE_SIZE;
8615 m_hevcStatsSize.uiHevcPakStatistics = m_sizeOfHcpPakFrameStats;
8616 m_hevcStatsSize.uiVdencStatistics = 0;
8617 m_hevcStatsSize.uiHevcSliceStreamout = CODECHAL_CACHELINE_SIZE;
8618
8619 // Maintain the offsets to use for patching addresses in to the HuC Pak Integration kernel Aggregated Frame Statistics Output Buffer
8620 // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions
8621 m_hevcFrameStatsOffset.uiTileSizeRecord = 0; // Tile Size Record is not present in resHuCPakAggregatedFrameStatsBuffer
8622 m_hevcFrameStatsOffset.uiHevcPakStatistics = 0;
8623 m_hevcFrameStatsOffset.uiVdencStatistics = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcPakStatistics + m_hevcStatsSize.uiHevcPakStatistics, CODECHAL_PAGE_SIZE);
8624 m_hevcFrameStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiVdencStatistics + m_hevcStatsSize.uiVdencStatistics, CODECHAL_PAGE_SIZE);
8625
8626 // Frame level statistics
8627 m_hwInterface->m_pakIntAggregatedFrameStatsSize = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcSliceStreamout + (m_hevcStatsSize.uiHevcSliceStreamout * CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6), CODECHAL_PAGE_SIZE);
8628
8629 // HEVC Frame Statistics Buffer - Output from HuC PAK Integration kernel
8630 if (Mos_ResourceIsNull(&m_resHuCPakAggregatedFrameStatsBuffer.sResource))
8631 {
8632 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
8633 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
8634 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
8635 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
8636 allocParamsForBufferLinear.Format = Format_Buffer;
8637 allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntAggregatedFrameStatsSize;
8638 allocParamsForBufferLinear.pBufName = "HCP Aggregated Frame Statistics Streamout Buffer";
8639
8640 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
8641 m_osInterface,
8642 &allocParamsForBufferLinear,
8643 &m_resHuCPakAggregatedFrameStatsBuffer.sResource));
8644 m_resHuCPakAggregatedFrameStatsBuffer.dwSize = m_hwInterface->m_pakIntAggregatedFrameStatsSize;
8645
8646 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
8647 m_osInterface,
8648 &m_resHuCPakAggregatedFrameStatsBuffer.sResource,
8649 &lockFlagsWriteOnly);
8650
8651 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8652 MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
8653 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource);
8654 }
8655
8656 // Maintain the offsets to use for patching addresses in to the Tile Based Statistics Buffer
8657 // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions
8658 m_hevcTileStatsOffset.uiTileSizeRecord = 0; // TileReord is in a separated resource
8659 m_hevcTileStatsOffset.uiHevcPakStatistics = 0; // PakStaticstics is head of m_resTileBasedStatisticsBuffer
8660 m_hevcTileStatsOffset.uiVdencStatistics = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcPakStatistics + (m_hevcStatsSize.uiHevcPakStatistics * num_tiles), CODECHAL_PAGE_SIZE);
8661 m_hevcTileStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiVdencStatistics + (m_hevcStatsSize.uiVdencStatistics * num_tiles), CODECHAL_PAGE_SIZE);
8662 // Combined statistics size for all tiles
8663 m_hwInterface->m_pakIntTileStatsSize = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcSliceStreamout + m_hevcStatsSize.uiHevcSliceStreamout * CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6, CODECHAL_PAGE_SIZE);
8664
8665 // Tile size record size for all tiles
8666 m_hwInterface->m_tileRecordSize = m_hevcStatsSize.uiTileSizeRecord * num_tiles;
8667
8668 if (Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource) || m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_pakIntTileStatsSize)
8669 {
8670 if (!Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource))
8671 {
8672 m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource);
8673 }
8674 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
8675 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
8676 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
8677 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
8678 allocParamsForBufferLinear.Format = Format_Buffer;
8679 allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntTileStatsSize;
8680 allocParamsForBufferLinear.pBufName = "HCP Tile Level Statistics Streamout Buffer";
8681
8682 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
8683 m_osInterface,
8684 &allocParamsForBufferLinear,
8685 &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource));
8686 m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize = m_hwInterface->m_pakIntTileStatsSize;
8687
8688 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
8689 m_osInterface,
8690 &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
8691 &lockFlagsWriteOnly);
8692 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8693
8694 MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
8695 m_osInterface->pfnUnlockResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource);
8696 }
8697
8698 if (Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource) || m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_tileRecordSize)
8699 {
8700 if (!Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource))
8701 {
8702 m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource);
8703 }
8704 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
8705 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
8706 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
8707 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
8708 allocParamsForBufferLinear.Format = Format_Buffer;
8709 allocParamsForBufferLinear.dwBytes = m_hwInterface->m_tileRecordSize;
8710 allocParamsForBufferLinear.pBufName = "Tile Record Buffer";
8711
8712 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
8713 m_osInterface,
8714 &allocParamsForBufferLinear,
8715 &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource));
8716 m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize = m_hwInterface->m_tileRecordSize;
8717
8718 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
8719 m_osInterface,
8720 &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource,
8721 &lockFlagsWriteOnly);
8722 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8723
8724 MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
8725 m_osInterface->pfnUnlockResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource);
8726 }
8727
8728 return eStatus;
8729 }
8730
SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams)8731 void CodechalEncHevcStateG11::SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams)
8732 {
8733 CODECHAL_ENCODE_FUNCTION_ENTER;
8734
8735 CodechalEncodeHevcBase::SetHcpPipeBufAddrParams(pipeBufAddrParams);
8736
8737 PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex];
8738 if (!Mos_ResourceIsNull(&tileStatisticsBuffer->sResource) && (m_numPipe > 1))
8739 {
8740 pipeBufAddrParams.presLcuBaseAddressBuffer = &tileStatisticsBuffer->sResource;
8741 pipeBufAddrParams.dwLcuStreamOutOffset = m_hevcTileStatsOffset.uiHevcSliceStreamout;
8742 pipeBufAddrParams.presFrameStatStreamOutBuffer = &tileStatisticsBuffer->sResource;
8743 pipeBufAddrParams.dwFrameStatStreamOutOffset = m_hevcTileStatsOffset.uiHevcPakStatistics;
8744 }
8745 }
8746
ReadSseStatistics(PMOS_COMMAND_BUFFER cmdBuffer)8747 MOS_STATUS CodechalEncHevcStateG11::ReadSseStatistics(PMOS_COMMAND_BUFFER cmdBuffer)
8748 {
8749 CODECHAL_ENCODE_FUNCTION_ENTER;
8750
8751 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8752
8753 if (!m_sseEnabled)
8754 {
8755 return eStatus;
8756 }
8757
8758 // encodeStatus is offset by 2 DWs in the resource
8759 uint32_t sseOffsetinBytes = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2 + m_encodeStatusBuf.dwSumSquareErrorOffset;
8760 for (auto i = 0; i < 6; i++) // 64 bit SSE values for luma/ chroma channels need to be copied
8761 {
8762 MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
8763 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
8764 miCpyMemMemParams.presSrc = m_hevcPicParams->tiles_enabled_flag && (m_numPipe > 1) ? &m_resHuCPakAggregatedFrameStatsBuffer.sResource : &m_resFrameStatStreamOutBuffer;
8765 miCpyMemMemParams.dwSrcOffset = (HEVC_PAK_STATISTICS_SSE_OFFSET + i) * sizeof(uint32_t); // SSE luma offset is located at DW32 in Frame statistics, followed by chroma
8766 miCpyMemMemParams.presDst = &m_encodeStatusBuf.resStatusBuffer;
8767 miCpyMemMemParams.dwDstOffset = sseOffsetinBytes + i * sizeof(uint32_t);
8768 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
8769 }
8770
8771 return eStatus;
8772 }
8773
SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS & indObjBaseAddrParams)8774 void CodechalEncHevcStateG11::SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS& indObjBaseAddrParams)
8775 {
8776 PCODECHAL_ENCODE_BUFFER tileRecordBuffer = &m_tileRecordBuffer[m_virtualEngineBbIndex];
8777 bool useTileRecordBuffer = !Mos_ResourceIsNull(&tileRecordBuffer->sResource);
8778
8779 MOS_ZeroMemory(&indObjBaseAddrParams, sizeof(indObjBaseAddrParams));
8780 indObjBaseAddrParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
8781 indObjBaseAddrParams.presMvObjectBuffer = &m_resMbCodeSurface;
8782 indObjBaseAddrParams.dwMvObjectOffset = m_mvOffset;
8783 indObjBaseAddrParams.dwMvObjectSize = m_mbCodeSize - m_mvOffset;
8784 indObjBaseAddrParams.presPakBaseObjectBuffer = &m_resBitstreamBuffer;
8785 indObjBaseAddrParams.dwPakBaseObjectSize = m_bitstreamUpperBound;
8786 indObjBaseAddrParams.presPakTileSizeStasBuffer = useTileRecordBuffer ? &tileRecordBuffer->sResource : nullptr;
8787 indObjBaseAddrParams.dwPakTileSizeStasBufferSize = useTileRecordBuffer ? m_hwInterface->m_tileRecordSize : 0;
8788 indObjBaseAddrParams.dwPakTileSizeRecordOffset = useTileRecordBuffer ? m_hevcTileStatsOffset.uiTileSizeRecord : 0;
8789 }
8790
UpdateCmdBufAttribute(PMOS_COMMAND_BUFFER cmdBuffer,bool renderEngineInUse)8791 MOS_STATUS CodechalEncHevcStateG11::UpdateCmdBufAttribute(
8792 PMOS_COMMAND_BUFFER cmdBuffer,
8793 bool renderEngineInUse)
8794 {
8795 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8796
8797 // should not be there. Will remove it in the next change
8798 CODECHAL_ENCODE_FUNCTION_ENTER;
8799 if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe)
8800 {
8801 PMOS_CMD_BUF_ATTRI_VE attriExt =
8802 (PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe);
8803
8804 memset((void *)attriExt, 0, sizeof(MOS_CMD_BUF_ATTRI_VE));
8805 attriExt->bUseVirtualEngineHint =
8806 attriExt->VEngineHintParams.NeedSyncWithPrevious = !renderEngineInUse;
8807 }
8808
8809 return eStatus;
8810 }
8811
SetAndPopulateVEHintParams(PMOS_COMMAND_BUFFER cmdBuffer)8812 MOS_STATUS CodechalEncHevcStateG11::SetAndPopulateVEHintParams(
8813 PMOS_COMMAND_BUFFER cmdBuffer)
8814 {
8815 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8816
8817 CODECHAL_ENCODE_FUNCTION_ENTER;
8818
8819 if (!MOS_VE_SUPPORTED(m_osInterface))
8820 {
8821 return eStatus;
8822 }
8823
8824 CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS scalSetParms;
8825 MOS_ZeroMemory(&scalSetParms, sizeof(CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS));
8826
8827 if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
8828 {
8829 scalSetParms.bNeedSyncWithPrevious = true;
8830 }
8831
8832 int32_t currentPass = GetCurrentPass();
8833 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
8834 {
8835 eStatus = MOS_STATUS_INVALID_PARAMETER;
8836 return eStatus;
8837 }
8838 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
8839 if (m_numPipe >= 2)
8840 {
8841 for (auto i = 0; i < m_numPipe; i++)
8842 {
8843 scalSetParms.veBatchBuffer[i] = m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex].OsResource;
8844 }
8845 }
8846
8847 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_SetHintParams(this, m_scalabilityState, &scalSetParms));
8848 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
8849 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_PopulateHintParams(m_scalabilityState, cmdBuffer));
8850
8851 return eStatus;
8852 }
8853
8854 #if USE_CODECHAL_DEBUG_TOOL
DumpFrameStatsBuffer(CodechalDebugInterface * debugInterface)8855 MOS_STATUS CodechalEncHevcStateG11::DumpFrameStatsBuffer(CodechalDebugInterface* debugInterface)
8856 {
8857 CODECHAL_ENCODE_CHK_NULL_RETURN(debugInterface);
8858
8859 PMOS_RESOURCE resBuffer = &m_resFrameStatStreamOutBuffer;
8860 uint32_t offset = 0;
8861 uint32_t num_tiles = 1;
8862 //In scalable mode, HEVC PAK Frame Statistics gets dumped out for each tile
8863 if ( m_numPipe > 1)
8864 {
8865 resBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;
8866 offset = m_hevcTileStatsOffset.uiHevcPakStatistics;
8867 num_tiles = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1);
8868 }
8869 uint32_t size = MOS_ALIGN_CEIL(m_sizeOfHcpPakFrameStats * num_tiles, CODECHAL_CACHELINE_SIZE);
8870
8871 CODECHAL_ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
8872 resBuffer,
8873 CodechalDbgAttr::attrFrameState,
8874 "FrameStatus",
8875 size,
8876 offset,
8877 CODECHAL_NUM_MEDIA_STATES));
8878
8879 return MOS_STATUS_SUCCESS;
8880 }
8881
DumpPakOutput()8882 MOS_STATUS CodechalEncHevcStateG11::DumpPakOutput()
8883 {
8884 std::string currPassName = "PAK_PASS" + std::to_string((int)m_currPass);
8885
8886 CODECHAL_DEBUG_TOOL(
8887 int32_t currentPass = GetCurrentPass();
8888 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
8889 &m_resPakcuLevelStreamoutData.sResource,
8890 CodechalDbgAttr::attrCUStreamout,
8891 currPassName.data(),
8892 m_resPakcuLevelStreamoutData.dwSize,
8893 0,
8894 CODECHAL_NUM_MEDIA_STATES));
8895 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
8896 &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
8897 CodechalDbgAttr::attrTileBasedStats,
8898 currPassName.data(),
8899 m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize,
8900 0,
8901 CODECHAL_NUM_MEDIA_STATES));
8902 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
8903 &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite],
8904 CodechalDbgAttr::attrBrcPakStats,
8905 currPassName.data(),
8906 m_hevcBrcPakStatisticsSize,
8907 0,
8908 CODECHAL_NUM_MEDIA_STATES));
8909 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
8910 &m_HucStitchCmdBatchBuffer.OsResource,
8911 CodechalDbgAttr::attr2ndLvlBatchMfx,
8912 currPassName.data(),
8913 m_hwInterface->m_HucStitchCmdBatchBufferSize,
8914 0,
8915 CODECHAL_NUM_MEDIA_STATES));
8916 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
8917 &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass],
8918 CodechalDbgAttr::attrHuCStitchDataBuf,
8919 currPassName.data(),
8920 sizeof(HucCommandData),
8921 0,
8922 CODECHAL_NUM_MEDIA_STATES));
8923 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
8924 &m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass],
8925 sizeof(HucPakStitchDmemEncG11),
8926 currentPass,
8927 hucRegionDumpPakIntegrate));
8928 )
8929
8930 return MOS_STATUS_SUCCESS;
8931 }
8932 #endif
8933
EncodeMeKernel()8934 MOS_STATUS CodechalEncHevcStateG11::EncodeMeKernel()
8935 {
8936 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8937
8938 CODECHAL_ENCODE_FUNCTION_ENTER;
8939
8940 // Walker must be used for HME call and scaling one
8941 CODECHAL_ENCODE_ASSERT(m_hwWalker);
8942
8943 if (m_hmeKernel && m_hmeKernel->Is4xMeEnabled())
8944 {
8945 CodechalKernelHme::CurbeParam curbeParam;
8946 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMeCurbeParams(curbeParam));
8947
8948 CodechalKernelHme::SurfaceParams surfaceParam;
8949 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMeSurfaceParams(surfaceParam));
8950
8951 m_hmeKernel->setnoMEKernelForPFrame(m_lowDelay);
8952
8953 if (m_hmeKernel->Is16xMeEnabled())
8954 {
8955 if (m_hmeKernel->Is32xMeEnabled())
8956 {
8957 surfaceParam.downScaledWidthInMb = m_downscaledWidthInMb32x;
8958 surfaceParam.downScaledHeightInMb = m_downscaledFrameFieldHeightInMb32x;
8959 surfaceParam.downScaledBottomFieldOffset = m_scaled32xBottomFieldOffset;
8960 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel32x));
8961 }
8962 surfaceParam.downScaledWidthInMb = m_downscaledWidthInMb16x;
8963 surfaceParam.downScaledHeightInMb = m_downscaledFrameFieldHeightInMb16x;
8964 surfaceParam.downScaledBottomFieldOffset = m_scaled16xBottomFieldOffset;
8965 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel16x));
8966 }
8967 surfaceParam.downScaledWidthInMb = m_downscaledWidthInMb4x;
8968 surfaceParam.downScaledHeightInMb = m_downscaledFrameFieldHeightInMb4x;
8969 surfaceParam.downScaledBottomFieldOffset = m_scaledBottomFieldOffset;
8970 curbeParam.brcEnable = m_brcEnabled;
8971 curbeParam.sumMVThreshold = m_sumMVThreshold;
8972 surfaceParam.meSumMvandDistortionBuffer = m_mvAndDistortionSumSurface;
8973 m_lastTaskInPhase = true;
8974
8975 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel4x));
8976 }
8977
8978 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::DumpHMESurfaces());
8979
8980 return eStatus;
8981 }
8982