1 /*
2 * Copyright (c) 2017-2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_encode_hevc_mbenc_g12.cpp
24 //! \brief HEVC dual-pipe encoder mbenc kernels for GEN12.
25 //!
26
27 #include "codechal_encode_hevc_mbenc_g12.h"
28 #include "codechal_encode_hevc_brc_g12.h"
29 #include "mhw_vdbox_hcp_g12_X.h"
30 #include "codechal_kernel_hme_mdf_g12.h"
31 #include "codechal_kernel_header_g12.h"
32 #include "Gen12_HEVC_B_LCU32.h"
33 #include "Gen12_HEVC_B_LCU64.h"
34 #include "cm_wrapper.h"
35
36 #include "Gen12_HEVC_BRC_INIT.h"
37 #include "Gen12_HEVC_BRC_RESET.h"
38 #include "Gen12_HEVC_BRC_UPDATE.h"
39 #include "Gen12_HEVC_BRC_LCUQP.h"
40 #include "Gen12LP_CoarseIntra_genx.h"
41 #include "Gen12LP_WeightedPrediction_genx.h"
42
43 #if USE_PROPRIETARY_CODE
44 #include "cm_device_rt.h"
45 #endif
46
47 #if MOS_MEDIASOLO_SUPPORTED
48 #include "mos_os_solo.h"
49 #endif // (_DEBUG || _RELEASE_INTERNAL)
50
CodecHalHevcMbencG12(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)51 CodecHalHevcMbencG12::CodecHalHevcMbencG12(CodechalHwInterface* hwInterface,
52 CodechalDebugInterface* debugInterface,
53 PCODECHAL_STANDARD_INFO standardInfo)
54 : CodechalEncHevcStateG12(hwInterface, debugInterface, standardInfo)
55 {
56 m_useMdf = true;
57 for (int32_t idx = 0; idx < MAX_VME_FWD_REF + MAX_VME_BWD_REF; idx++)
58 {
59 m_surfRefArray[idx] = nullptr;
60 m_surf2XArray[idx] = nullptr;
61 }
62
63 }
64
~CodecHalHevcMbencG12()65 CodecHalHevcMbencG12::~CodecHalHevcMbencG12() {
66 CODECHAL_ENCODE_FUNCTION_ENTER;
67
68 if (m_wpState)
69 {
70 MOS_Delete(m_wpState);
71 m_wpState = nullptr;
72 }
73
74 if (m_intraDistKernel)
75 {
76 MOS_Delete(m_intraDistKernel);
77 m_intraDistKernel = nullptr;
78 }
79
80 if (m_hmeKernel)
81 {
82 MOS_Delete(m_hmeKernel);
83 m_hmeKernel = nullptr;
84 }
85
86 if (m_swScoreboardState)
87 {
88 MOS_Delete(m_swScoreboardState);
89 m_swScoreboardState = nullptr;
90 }
91
92 DestroyMDFResources();
93 }
94
AllocateEncResources()95 MOS_STATUS CodecHalHevcMbencG12::AllocateEncResources()
96 {
97 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
98
99 CODECHAL_ENCODE_FUNCTION_ENTER;
100
101 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcStateG12::AllocateEncResources());
102
103 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateMeResources());
104
105 if (m_hmeSupported)
106 {
107 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->AllocateResources());
108 }
109 // Intermediate CU Record Surface
110 if (!m_intermediateCuRecordLcu32)
111 {
112 //MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE
113 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateSurface2D(
114 m_widthAlignedLcu32,
115 m_heightAlignedLcu32 >> 1,
116 Format_A8,
117 m_intermediateCuRecordLcu32));
118 }
119
120 // Scratch Surface
121 if (!m_scratchSurf)
122 {
123 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateSurface2D(
124 m_widthAlignedLcu32 >> 3,
125 m_heightAlignedLcu32 >> 5,
126 Format_A8,
127 m_scratchSurf));
128 }
129
130 // Enc constant table for B
131 if (!m_constTableB)
132 {
133 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateBuffer(
134 m_encConstantDataLutSize,
135 m_constTableB));
136 }
137
138 // Load Balance surface size
139 if (!m_loadBalance)
140 {
141 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateBuffer(
142 m_threadMapSize,
143 m_loadBalance));
144 }
145
146 //Debug surface
147 if (!m_dbgSurface)
148 {
149 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateBuffer(
150 m_debugSurfaceSize,
151 m_dbgSurface));
152 }
153
154 return MOS_STATUS_SUCCESS;
155 }
156
157
AllocateMeResources()158 MOS_STATUS CodecHalHevcMbencG12::AllocateMeResources()
159 {
160 CODECHAL_ENCODE_FUNCTION_ENTER;
161 if (m_hmeSupported)
162 {
163 // BRC Distortion Surface
164 if (!m_brcBuffers.meBrcDistortionSurface)
165 {
166 uint32_t width = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x << 3), 64);
167 uint32_t height = MOS_ALIGN_CEIL((m_downscaledHeightInMb4x << 2), 8) << 1;
168
169 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateSurface2D(
170 width,
171 height,
172 Format_A8,
173 m_brcBuffers.meBrcDistortionSurface));
174 }
175
176 // MV and Distortion Summation Surface
177 if (!m_brcBuffers.mvAndDistortionSumSurface)
178 {
179 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateBuffer(
180 m_mvdistSummationSurfSize,
181 m_brcBuffers.mvAndDistortionSumSurface));
182 CmEvent *event = nullptr;
183 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_brcBuffers.mvAndDistortionSumSurface->InitSurface(0, event));
184 }
185 }
186 return MOS_STATUS_SUCCESS;
187 }
188
AllocateBrcResources()189 MOS_STATUS CodecHalHevcMbencG12::AllocateBrcResources()
190 {
191 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::AllocateBrcResources());
192
193 // BRC Intra Distortion Surface
194 uint32_t width = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x << 3), 64);
195 uint32_t height = MOS_ALIGN_CEIL((m_downscaledHeightInMb4x << 2), 8) << 1;
196
197 if (!m_brcBuffers.brcIntraDistortionSurface)
198 {
199 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateSurface2D(
200 width,
201 height,
202 Format_A8,
203 m_brcBuffers.brcIntraDistortionSurface));
204 }
205
206 return MOS_STATUS_SUCCESS;
207 }
208
FreeBrcResources()209 MOS_STATUS CodecHalHevcMbencG12::FreeBrcResources()
210 {
211 CODECHAL_ENCODE_FUNCTION_ENTER;
212
213 CodechalEncHevcState::FreeBrcResources();
214
215 if (m_brcBuffers.brcIntraDistortionSurface)
216 {
217 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_brcBuffers.brcIntraDistortionSurface))
218 }
219
220 return MOS_STATUS_SUCCESS;
221 }
222
FreeMeResources()223 MOS_STATUS CodecHalHevcMbencG12::FreeMeResources()
224 {
225 CODECHAL_ENCODE_FUNCTION_ENTER;
226
227 if (m_brcBuffers.meBrcDistortionSurface)
228 {
229 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_brcBuffers.meBrcDistortionSurface))
230 }
231
232 if (m_brcBuffers.mvAndDistortionSumSurface)
233 {
234 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_brcBuffers.mvAndDistortionSumSurface));
235 }
236
237 return MOS_STATUS_SUCCESS;
238 }
239
FreeEncResources()240 MOS_STATUS CodecHalHevcMbencG12::FreeEncResources()
241 {
242 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
243
244 CODECHAL_ENCODE_FUNCTION_ENTER;
245
246 CODECHAL_ENCODE_CHK_STATUS_RETURN(FreeMeResources());
247
248 if (m_intermediateCuRecordLcu32)
249 {
250 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_intermediateCuRecordLcu32));
251 m_intermediateCuRecordLcu32 = nullptr;
252 }
253 if (m_scratchSurf)
254 {
255 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_scratchSurf));
256 m_scratchSurf = nullptr;
257 }
258 if (m_cu16X16QpIn)
259 {
260 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_cu16X16QpIn));
261 m_cu16X16QpIn = nullptr;
262 }
263 if (m_constTableB)
264 {
265 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_constTableB));
266 m_constTableB = nullptr;
267 }
268 if (m_cuSplitSurf)
269 {
270 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_cuSplitSurf));
271 m_cuSplitSurf = nullptr;
272 }
273 if (m_loadBalance)
274 {
275 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_loadBalance));
276 m_loadBalance = nullptr;
277 }
278 if (m_dbgSurface)
279 {
280 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_dbgSurface));
281 m_dbgSurface = nullptr;
282 }
283
284 if (m_lcuLevelData)
285 {
286 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_lcuLevelData));
287 m_lcuLevelData = nullptr;
288 }
289 if (m_reconWithBoundaryPix)
290 {
291 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_reconWithBoundaryPix));
292 m_reconWithBoundaryPix = nullptr;
293 }
294
295 //container surfaces
296 if (m_curSurf)
297 {
298 m_curSurf->NotifyUmdResourceChanged(nullptr);
299 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_curSurf));
300 m_curSurf = nullptr;
301 }
302 if (m_mbCodeBuffer)
303 {
304 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_mbCodeBuffer));
305 m_mbCodeBuffer = nullptr;
306 }
307 if (m_swScoreboardSurf)
308 {
309 m_swScoreboardSurf->NotifyUmdResourceChanged(nullptr);
310 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_swScoreboardSurf));
311 m_swScoreboardSurf = nullptr;
312 }
313 if (m_curSurf2X)
314 {
315 m_curSurf2X->NotifyUmdResourceChanged(nullptr);
316 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_curSurf2X));
317 m_curSurf2X = nullptr;
318 }
319 if (m_histInBuffer)
320 {
321 m_histInBuffer->NotifyUmdResourceChanged(nullptr);
322 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_histInBuffer));
323 m_histInBuffer = nullptr;
324 }
325 if (m_histOutBuffer)
326 {
327 m_histOutBuffer->NotifyUmdResourceChanged(nullptr);
328 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_histOutBuffer));
329 m_histOutBuffer = nullptr;
330 }
331 for (int32_t idx = 0; idx < MAX_VME_FWD_REF + MAX_VME_BWD_REF; idx++)
332 {
333 if (m_surfRefArray[idx])
334 {
335 m_surfRefArray[idx]->NotifyUmdResourceChanged(nullptr);
336 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_surfRefArray[idx]));
337 m_surfRefArray[idx] = nullptr;
338 }
339 if (m_surf2XArray[idx])
340 {
341 m_surf2XArray[idx]->NotifyUmdResourceChanged(nullptr);
342 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_surf2XArray[idx]));
343 m_surf2XArray[idx] = nullptr;
344 }
345 }
346
347 //Free MDF objects
348 if (m_cmKrnB)
349 {
350 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroyKernel(m_cmKrnB));
351 m_cmKrnB = nullptr;
352 }
353 if (m_cmKrnB64)
354 {
355 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroyKernel(m_cmKrnB64));
356 m_cmKrnB64 = nullptr;
357 }
358 if (m_cmProgramB)
359 {
360 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroyProgram(m_cmProgramB));
361 m_cmProgramB = nullptr;
362 }
363 if (m_cmProgramB64)
364 {
365 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroyProgram(m_cmProgramB64));
366 m_cmProgramB64 = nullptr;
367 }
368 if (m_hevcBrcG12)
369 {
370 MOS_Delete(m_hevcBrcG12);
371 }
372
373 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcStateG12::FreeEncResources());
374
375 if (m_threadSpace)
376 {
377 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroyThreadSpace(m_threadSpace));
378 m_threadSpace = nullptr;
379 }
380
381 return eStatus;
382 }
383
AllocateMDFResources()384 MOS_STATUS CodecHalHevcMbencG12::AllocateMDFResources()
385 {
386 uint32_t devOp = CM_DEVICE_CREATE_OPTION_SCRATCH_SPACE_DISABLE | CM_DEVICE_CONFIG_FAST_PATH_ENABLE;
387
388 if (!m_mfeEnabled)
389 {
390 //create CM device
391 if (!m_cmDev)
392 {
393 CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface);
394 m_osInterface->pfnNotifyStreamIndexSharing(m_osInterface);
395 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnCreateCmDevice(
396 m_osInterface->pOsContext,
397 m_cmDev,
398 devOp,
399 CM_DEVICE_CREATE_PRIORITY_DEFAULT));
400 }
401
402 if (!m_surfIndexArray)
403 {
404 m_surfIndexArray = (MBencSurfaceIndex *) new (std::nothrow) (SurfaceIndex [m_maxMfeSurfaces][m_maxMultiFrames]);
405 CODECHAL_ENCODE_CHK_NULL_RETURN(m_surfIndexArray);
406 }
407 }
408 else
409 {
410 //create CM device
411 if (!m_cmDev)
412 {
413 if (!m_mfeEncodeSharedState->pCmDev)
414 {
415 CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface);
416 m_osInterface->pfnNotifyStreamIndexSharing(m_osInterface);
417 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnCreateCmDevice(
418 m_osInterface->pOsContext,
419 m_cmDev,
420 devOp,
421 CM_DEVICE_CREATE_PRIORITY_DEFAULT));
422
423 m_mfeEncodeSharedState->pCmDev = m_cmDev;
424 }
425 else
426 {
427 m_cmDev = m_mfeEncodeSharedState->pCmDev;
428 }
429 }
430
431 if (!m_mfeEncodeSharedState->commonSurface)
432 {
433 m_surfIndexArray = (MBencSurfaceIndex *) new (std::nothrow) ( SurfaceIndex [m_maxMfeSurfaces][m_maxMultiFrames]);
434 CODECHAL_ENCODE_CHK_NULL_RETURN(m_surfIndexArray);
435 m_mfeEncodeSharedState->commonSurface = reinterpret_cast<SurfaceIndex *>(m_surfIndexArray);
436 }
437 else
438 {
439 m_surfIndexArray = reinterpret_cast<MBencSurfaceIndex *>(m_mfeEncodeSharedState->commonSurface);
440 }
441
442 if (!m_mfeEncodeSharedState->maxThreadWidthFrames)
443 {
444 m_mfeEncodeSharedState->maxThreadWidthFrames = MOS_NewArray(uint32_t, m_maxMultiFrames);
445 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mfeEncodeSharedState->maxThreadWidthFrames);
446 }
447 }
448
449 //create CM Queue
450 if (!m_cmQueue)
451 {
452 CM_QUEUE_CREATE_OPTION queueCreateOption = CM_DEFAULT_QUEUE_CREATE_OPTION;
453 if (m_computeContextEnabled)
454 {
455 queueCreateOption.QueueType = CM_QUEUE_TYPE_COMPUTE;
456 }
457 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateQueueEx(m_cmQueue, queueCreateOption));
458 }
459
460 //create CM task
461 if (!m_cmTask)
462 {
463 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateTask(m_cmTask));
464 }
465
466 return MOS_STATUS_SUCCESS;
467 }
468
DestroyMDFResources()469 MOS_STATUS CodecHalHevcMbencG12::DestroyMDFResources()
470 {
471 if (m_cmDev && m_cmTask)
472 {
473 m_cmDev->DestroyTask(m_cmTask);
474 m_cmTask = nullptr;
475 }
476
477 if (!m_mfeEnabled)
478 {
479 delete[] m_surfIndexArray;
480 m_surfIndexArray = nullptr;
481 if (m_osInterface != nullptr)
482 {
483 m_osInterface->pfnDestroyCmDevice(m_cmDev);
484 m_cmDev = nullptr;
485 }
486 }
487 else
488 {
489 if (m_mfeLastStream)
490 {
491 MOS_DeleteArray(m_mfeEncodeSharedState->maxThreadWidthFrames);
492 m_mfeEncodeSharedState->maxThreadWidthFrames = nullptr;
493
494 delete[] m_surfIndexArray;
495 m_surfIndexArray = nullptr;
496 m_mfeEncodeSharedState->commonSurface = nullptr;
497 if (m_osInterface != nullptr)
498 {
499 m_osInterface->pfnDestroyCmDevice(m_cmDev);
500 m_mfeEncodeSharedState->pCmDev = m_cmDev = nullptr;
501 }
502 }
503 else
504 {
505 m_surfIndexArray = nullptr;
506 m_cmDev = nullptr;
507 }
508 }
509
510 return MOS_STATUS_SUCCESS;
511 }
512
InitKernelState()513 MOS_STATUS CodecHalHevcMbencG12::InitKernelState()
514 {
515 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
516
517 CODECHAL_ENCODE_FUNCTION_ENTER;
518
519 m_colorBitMfeEnabled = m_mfeEnabled ? true : false;
520
521 // Create weighted prediction kernel state
522 m_wpState = MOS_New(CodechalEncodeWPMdfG12, this);
523 CODECHAL_ENCODE_CHK_NULL_RETURN(m_wpState);
524 CODECHAL_ENCODE_CHK_STATUS_RETURN(((CodechalEncodeWPMdfG12 *)m_wpState)->InitKernelStateIsa((void *)GEN12LP_WEIGHTEDPREDICTION_GENX, GEN12LP_WEIGHTEDPREDICTION_GENX_SIZE));
525
526 // create intra distortion kernel
527 m_intraDistKernel = MOS_New(CodechalKernelIntraDistMdfG12, this);
528 CODECHAL_ENCODE_CHK_NULL_RETURN(m_intraDistKernel);
529 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_intraDistKernel->InitializeKernelIsa(
530 (void*)GEN12LP_COARSEINTRA_GENX,
531 GEN12LP_COARSEINTRA_GENX_SIZE));
532
533 // Create SW scoreboard init kernel state
534 CODECHAL_ENCODE_CHK_NULL_RETURN(m_swScoreboardState = MOS_New(CodechalEncodeSwScoreboardMdfG12, this));
535 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_swScoreboardState->InitKernelState());
536 // Create Hme kernel
537 m_hmeKernel = MOS_New(CodechalKernelHmeMdfG12, this);
538 CODECHAL_ENCODE_CHK_NULL_RETURN(m_hmeKernel)
539
540 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->LoadProgram((void *)GEN12_HEVC_B_LCU32,
541 GEN12_HEVC_B_LCU32_SIZE,
542 m_cmProgramB,
543 "-nojitter"));
544
545 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateKernel(m_cmProgramB,
546 "Gen12_HEVC_Enc_B",
547 m_cmKrnB));
548
549 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->LoadProgram((void *)GEN12_HEVC_B_LCU64,
550 GEN12_HEVC_B_LCU64_SIZE,
551 m_cmProgramB64,
552 "-nojitter"));
553
554 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateKernel(m_cmProgramB64,
555 "Gen12_HEVC_Enc_LCU64_B",
556 m_cmKrnB64));
557
558 CODECHAL_ENCODE_CHK_NULL_RETURN(m_hevcBrcG12 = MOS_New(CodecHalHevcBrcG12, this));
559 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hevcBrcG12->InitBrcKernelState());
560 return eStatus;
561 }
562
GetMaxBtCount()563 uint32_t CodecHalHevcMbencG12::GetMaxBtCount()
564 {
565 uint16_t btIdxAlignment = m_hwInterface->GetRenderInterface()->m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
566 uint32_t btCountPhase2 = btIdxAlignment;
567 return btCountPhase2;
568 }
569
SetupKernelArgsB()570 MOS_STATUS CodecHalHevcMbencG12::SetupKernelArgsB()
571 {
572 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
573
574 //Setup surfaces
575 //Setup first combined 1D surface
576 int idx = 0;
577 SurfaceIndex *surfIndex = nullptr;
578 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_combinedBuffer1->GetIndex(surfIndex));
579
580 CODECHAL_ENCODE_CHK_NULL_RETURN(surfIndex);
581 (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
582
583 //Setup second combined 1D surface
584 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_combinedBuffer2->GetIndex(surfIndex));
585 (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
586
587 //VME Surface
588 (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *m_curVme;
589
590 //Curr Pic
591 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_curSurf->GetIndex(surfIndex));
592 (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
593
594 //Recon surface with populated boundary pixels.
595 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_reconWithBoundaryPix->GetIndex(surfIndex));
596 (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
597
598 //Intermediate CU Record Surface for I and B kernel
599 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_intermediateCuRecordLcu32->GetIndex(surfIndex));
600 (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
601
602 // PAK object command surface
603 (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *m_mbCodeSurfIdx;
604
605 // CU packet for PAK surface
606 (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *m_mvDataSurfIdx;
607
608 //Software Scoreboard surface
609 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_swScoreboardSurf->GetIndex(surfIndex));
610 (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
611
612 // CU 16x16 QP data input surface
613 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cu16X16QpIn->GetIndex(surfIndex));
614 (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
615
616 // Lcu level data input
617 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_lcuLevelData->GetIndex(surfIndex));
618 (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
619
620 //ColocatedCUMVDataSurface
621 if (m_colocCumvData)
622 {
623 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_colocCumvData->GetIndex(surfIndex));
624 }
625 (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
626
627 //HMEMotionPredDataSurface
628 if (m_hmeMotionPredData)
629 {
630 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeMotionPredData->GetIndex(surfIndex));
631 }
632 (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
633
634 if (m_isMaxLcu64)
635 {
636 if (m_curSurf2X)
637 {
638 (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *m_cur2XVme;
639
640 }
641 else
642 {
643 (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
644 }
645
646 }
647
648
649 // Kernel debug surface
650 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_dbgSurface->GetIndex(surfIndex));
651 (*m_surfIndexArray)[idx++][m_mfeEncodeParams.streamId] = *surfIndex;
652
653 //Init all the surfaces with dummy value
654 if ((!m_mfeEnabled) || (m_mfeFirstStream))
655 {
656 for (int i = 0; i < m_maxMfeSurfaces; i++)
657 {
658 for (int j = 1; j < m_maxMultiFrames; j++)
659 {
660 (*m_surfIndexArray)[i][j] = (*m_surfIndexArray)[i][0];
661 }
662 }
663 }
664
665 if ((m_mfeLastStream) || (!m_mfeEnabled))
666 {
667 CmKernel *cmKrn = nullptr;
668 if (m_isMaxLcu64)
669 {
670 cmKrn = m_cmKrnB64;
671 }
672 else
673 {
674 cmKrn = m_cmKrnB;
675 }
676
677 //Setup surfaces
678 //Setup first combined 1D surface
679 int idx = 0;
680 int commonIdx = 0;
681 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
682
683 //Setup second combined 1D surface
684 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
685
686 //VME Surface
687 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
688
689 //Curr Pic
690 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
691
692 //Recon surface with populated boundary pixels.
693 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
694
695 //Intermediate CU Record Surface for I and B kernel
696 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
697
698 // PAK object command surface
699 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
700
701 // CU packet for PAK surface
702 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
703
704 //Software Scoreboard surface
705 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
706
707 // CU 16x16 QP data input surface
708 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
709
710 // Lcu level data input
711 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
712
713 //ColocatedCUMVDataSurface
714 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
715
716 //HMEMotionPredDataSurface
717 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
718
719 if (m_isMaxLcu64)
720 {
721 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
722 }
723
724 //Enc const table
725 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_constTableB->GetIndex(surfIndex));
726 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex), surfIndex));
727
728 //load Balance surface
729 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_loadBalance->GetIndex(surfIndex));
730 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex), surfIndex));
731
732 //reserved entries
733 if (!m_isMaxLcu64)
734 {
735 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex), surfIndex));
736 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex), surfIndex));
737 }
738
739 // Kernel debug surface
740 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetKernelArg(idx++, sizeof(SurfaceIndex) * m_maxMultiFrames, &((*m_surfIndexArray)[commonIdx++][0])));
741 }
742
743 return eStatus;
744 }
745
SetColorBitRemap(uint8_t * remapTable,int32_t multiFrameNumber,int32_t curColor,int32_t * totalColor,int32_t * totalFrameAdj)746 void CodecHalHevcMbencG12::SetColorBitRemap(uint8_t * remapTable, int32_t multiFrameNumber, int32_t curColor, int32_t * totalColor, int32_t * totalFrameAdj)
747 {
748 if (multiFrameNumber == 1)
749 {
750 *totalColor = curColor;
751 uint8_t * curColorLOC;
752 for (int32_t i = 0; i < *totalColor; i++)
753 {
754 curColorLOC = (uint8_t *)(remapTable + i * m_frameColorMapEntrySize);
755
756 curColorLOC[m_frameColorMapLocCurFrame] = static_cast<uint8_t>(multiFrameNumber - 1);
757 curColorLOC[m_frameColorMapLocCurColor] = static_cast<uint8_t>(i);
758 curColorLOC[m_frameColorMapLocTotalFrame] = 0;
759 curColorLOC[m_frameColorMapLocTotalColor] = static_cast<uint8_t>(*totalColor);
760 }
761 }
762 else if (multiFrameNumber == 2)
763 {
764 *totalColor = ((curColor + 1) >> 1) << 2;
765 uint8_t * curColorLOC;
766 int32_t n1 = 0;
767 int32_t n2 = 0;
768 for (int32_t i = 0; i < *totalColor; i++)
769 {
770 curColorLOC = (uint8_t *)(remapTable + i * m_frameColorMapEntrySize);
771 curColorLOC[m_frameColorMapLocCurFrame] = i & 0x1;
772
773 if (curColorLOC[m_frameColorMapLocCurFrame] == 0)
774 {
775 if (n1 >= curColor)
776 curColorLOC[m_frameColorMapLocCurFrame] = m_frameColorMapFrameInvalid;
777 curColorLOC[m_frameColorMapLocCurColor] = static_cast<uint8_t>(n1);
778 n1++;
779 }
780 else if (curColorLOC[m_frameColorMapLocCurFrame] == 1)
781 {
782 if (n2 >= curColor)
783 curColorLOC[m_frameColorMapLocCurFrame] = m_frameColorMapFrameInvalid;
784 curColorLOC[m_frameColorMapLocCurColor] = static_cast<uint8_t>(n2);
785 n2++;
786 }
787 curColorLOC[m_frameColorMapLocTotalColor] = static_cast<uint8_t>(*totalColor);
788 if ((n1 >= curColor) && (n2 >= curColor))
789 {
790 *totalColor = i + 1;
791 break;
792 }
793 }
794 for (int32_t i = 0; i < *totalColor; i++)
795 {
796 curColorLOC = (uint8_t *)(remapTable + i * m_frameColorMapEntrySize);
797 curColorLOC[m_frameColorMapLocTotalColor] = static_cast<uint8_t>(*totalColor);
798 if (curColorLOC[m_frameColorMapLocCurFrame] != m_frameColorMapFrameInvalid)
799 curColorLOC[m_frameColorMapLocTotalFrame] = static_cast<uint8_t>(totalFrameAdj[curColorLOC[m_frameColorMapLocCurFrame]]);
800 else
801 curColorLOC[m_frameColorMapLocTotalFrame] = 0;
802 }
803 }
804 else if (multiFrameNumber == 3)
805 {
806 *totalColor = curColor << 2;
807 uint8_t * curColorLOC;
808 int32_t n1 = 0;
809 int32_t n2 = 0;
810 for (int32_t i = 0; i < *totalColor; i++)
811 {
812 curColorLOC = (uint8_t *)(remapTable + i * m_frameColorMapEntrySize);
813 if ((i & 3) < 2)
814 {
815 curColorLOC[m_frameColorMapLocCurFrame] = i & 0x3;
816 if (n1 >= curColor)
817 curColorLOC[m_frameColorMapLocCurFrame] = m_frameColorMapFrameInvalid;
818 curColorLOC[m_frameColorMapLocCurColor] = i >> 2;
819 curColorLOC[m_frameColorMapLocTotalFrame] = static_cast<uint8_t>(multiFrameNumber);
820 if ((i & 3) == 1)
821 n1++;
822 }
823 else
824 {
825 curColorLOC[m_frameColorMapLocCurFrame] = 2;
826 if (n2 >= curColor)
827 curColorLOC[m_frameColorMapLocCurFrame] = m_frameColorMapFrameInvalid;
828 curColorLOC[m_frameColorMapLocCurColor] = static_cast<uint8_t>(n2);
829 n2++;
830 }
831
832 if ((n1 >= curColor) && (n2 >= curColor))
833 {
834 *totalColor = i + 1;
835 break;
836 }
837 }
838 for (int32_t i = 0; i < *totalColor; i++)
839 {
840 curColorLOC = (uint8_t *)(remapTable + i * m_frameColorMapEntrySize);
841 curColorLOC[m_frameColorMapLocTotalColor] = static_cast<uint8_t>(*totalColor);
842 if (curColorLOC[m_frameColorMapLocCurFrame] != m_frameColorMapFrameInvalid)
843 curColorLOC[m_frameColorMapLocTotalFrame] = static_cast<uint8_t>(totalFrameAdj[curColorLOC[m_frameColorMapLocCurFrame]]);
844 else
845 curColorLOC[m_frameColorMapLocTotalFrame] = 0;
846 }
847 }
848 else if (multiFrameNumber == 4)
849 {
850 *totalColor = curColor << 2;
851 uint8_t * curColorLOC;
852 for (int32_t i = 0; i < *totalColor; i++)
853 {
854 curColorLOC = (uint8_t *)(remapTable + i * m_frameColorMapEntrySize);
855
856 curColorLOC[m_frameColorMapLocCurFrame] = i & 0x3;
857 curColorLOC[m_frameColorMapLocCurColor] = i >> 2;
858 curColorLOC[m_frameColorMapLocTotalFrame] = static_cast<uint8_t>(totalFrameAdj[curColorLOC[m_frameColorMapLocCurFrame]]);
859 curColorLOC[m_frameColorMapLocTotalColor] = static_cast<uint8_t>(*totalColor);
860 }
861 }
862 else
863 {
864 CODECHAL_ENCODE_ASSERTMESSAGE(" Error: MultiFrameNumber , not supported!");
865 }
866
867 return;
868 }
869
EncodeMbEncKernel(CODECHAL_MEDIA_STATE_TYPE encFunctionType)870 MOS_STATUS CodecHalHevcMbencG12::EncodeMbEncKernel(
871 CODECHAL_MEDIA_STATE_TYPE encFunctionType)
872 {
873 CODECHAL_ENCODE_FUNCTION_ENTER;
874 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
875
876 uint32_t walkerResolutionX, walkerResolutionY, maxthreadWidth, maxthreadHeight;
877
878 PerfTagSetting perfTag;
879 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_MBENC_KERNEL);
880
881 CmKernel *cmKrn = nullptr;
882 if (m_isMaxLcu64)
883 {
884 cmKrn = m_cmKrnB64;
885 if (m_hevcSeqParams->TargetUsage == 1)
886 {
887 walkerResolutionX = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE) >> 6;
888 walkerResolutionY = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE) >> 6;
889 }
890 else
891 {
892 walkerResolutionX = 2 * (MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE) >> 6);
893 walkerResolutionY = 2 * (MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE) >> 6);
894 }
895 }
896 else
897 {
898 cmKrn = m_cmKrnB;
899 walkerResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
900 walkerResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
901 }
902
903 if (m_numberConcurrentGroup > 1)
904 {
905 if (m_degree45Needed)
906 {
907 maxthreadWidth = walkerResolutionX;
908 maxthreadHeight = walkerResolutionX + (walkerResolutionX + walkerResolutionY + m_numberConcurrentGroup - 2) / m_numberConcurrentGroup;
909 }
910 else //for tu4 we ensure threadspace width and height is even or a multiple of 4
911 {
912 maxthreadWidth = (walkerResolutionX + 1) & 0xfffe; //ensuring width is even
913 maxthreadHeight = ((walkerResolutionX + 1) >> 1) + (walkerResolutionX + 2 * (((walkerResolutionY + 3) & 0xfffc) - 1) + (2 * m_numberConcurrentGroup - 1)) / (2 * m_numberConcurrentGroup);
914 }
915
916 maxthreadHeight *= m_numberEncKernelSubThread;
917 maxthreadHeight += 1;
918 }
919 else
920 {
921 maxthreadWidth = walkerResolutionX;
922 maxthreadHeight = walkerResolutionY;
923 maxthreadHeight *= m_numberEncKernelSubThread;
924 }
925
926 // Generate Lcu Level Data
927 CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateLcuLevelData(m_lcuLevelInputDataSurface[m_currRecycledBufIdx]));
928
929 // Generate Concurrent Thread Group Data
930 uint32_t curIdx = m_currRecycledBufIdx;
931 CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateConcurrentThreadGroupData(m_encBCombinedBuffer1[curIdx].sResource));
932
933 if (m_mfeEnabled)
934 {
935 if (m_mfeEncodeSharedState->maxTheadWidth < maxthreadWidth)
936 {
937 m_mfeEncodeSharedState->maxTheadWidth = maxthreadWidth;
938 }
939 if (m_mfeEncodeSharedState->maxTheadHeight < maxthreadHeight)
940 {
941 m_mfeEncodeSharedState->maxTheadHeight = maxthreadHeight;
942 }
943
944 m_mfeEncodeSharedState->maxThreadWidthFrames[m_mfeEncodeParams.submitIndex] = maxthreadWidth;
945 m_mfeLastStream = (m_mfeEncodeParams.submitIndex == m_mfeEncodeParams.submitNumber - 1);
946 m_mfeFirstStream = (m_mfeEncodeParams.submitIndex == 0);
947
948 if (m_mfeLastStream)
949 {
950 for (uint32_t i = 0; i < m_mfeEncodeParams.submitNumber; i++)
951 {
952 m_totalFrameAdj[i] = m_mfeEncodeSharedState->maxTheadWidth - m_mfeEncodeSharedState->maxThreadWidthFrames[i];
953 }
954 }
955 }
956
957 int32_t totalColor = m_numberConcurrentGroup;
958 if ((!m_mfeEnabled) || (m_mfeLastStream))
959 {
960 SetColorBitRemap(m_FrameBalance, m_mfeEncodeParams.submitNumber, m_numberConcurrentGroup, &totalColor, m_totalFrameAdj);
961 }
962
963 m_mbCodeIdxForTempMVP = 0xFF;
964 if (m_pictureCodingType == I_TYPE || m_hevcSeqParams->sps_temporal_mvp_enable_flag == false)
965 {
966 // No temoporal MVP in the I frame
967 m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
968 }
969 else
970 {
971 if (m_hevcPicParams->CollocatedRefPicIndex != 0xFF &&
972 m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC)
973 {
974 uint8_t frameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx;
975 // ref must be valid, ref list has max 127 entries
976 if (frameIdx < 0x7F && m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].PicEntry != 0xFF)
977 {
978 m_mbCodeIdxForTempMVP = m_refList[frameIdx]->ucScalingIdx;
979 }
980 }
981 if (m_mbCodeIdxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag)
982 {
983 // Temporal reference MV index is invalid and so disable the temporal MVP
984 CODECHAL_ENCODE_ASSERT(false);
985 m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
986 }
987 }
988
989 if (m_mfeEnabled && m_mfeLastStream)
990 {
991 //update the TS variables before submitting the kernels
992 maxthreadWidth = m_mfeEncodeSharedState->maxTheadWidth;
993 maxthreadHeight = m_mfeEncodeSharedState->maxTheadHeight;
994 }
995
996 if ((!m_mfeEnabled) || (m_mfeLastStream))
997 {
998 uint32_t threadCount = maxthreadWidth * maxthreadHeight * m_numberConcurrentGroup;
999 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->SetThreadCount(threadCount));
1000 }
1001
1002 // setup curbe, setup surfaces and send all kernel args
1003 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitCurbeDataB());
1004 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupSurfacesB());
1005 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupKernelArgsB());
1006
1007 if (m_mfeEnabled && (!m_mfeLastStream))
1008 {
1009 //Only last stream need to submit the kernels.
1010 return eStatus;
1011 }
1012
1013 if (m_threadSpace != nullptr && m_resolutionChanged)
1014 {
1015 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroyThreadSpace(m_threadSpace));
1016 m_threadSpace = nullptr;
1017 }
1018
1019 if (m_threadSpace == nullptr)
1020 {
1021 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateThreadSpace(
1022 maxthreadWidth,
1023 maxthreadHeight,
1024 m_threadSpace));
1025
1026 m_threadSpace->SetThreadSpaceColorCount(totalColor);
1027 }
1028
1029 switch (m_swScoreboardState->GetDependencyPattern())
1030 {
1031 case dependencyWavefront26Degree:
1032 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_threadSpace->SelectMediaWalkingPattern(CM_WALK_WAVEFRONT26));
1033 break;
1034 case dependencyWavefront26ZDegree:
1035 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_threadSpace->SelectMediaWalkingPattern(CM_WALK_WAVEFRONT26ZIG));
1036 break;
1037 case dependencyWavefront26DDegree:
1038 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_threadSpace->SelectMediaWalkingPattern(CM_WALK_WAVEFRONT26D));
1039 break;
1040 case dependencyWavefront26XDDegree:
1041 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_threadSpace->SelectMediaWalkingPattern(CM_WALK_WAVEFRONT26XD));
1042 break;
1043 case dependencyWavefront45XDDegree:
1044 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_threadSpace->SelectMediaWalkingPattern(CM_WALK_WAVEFRONT45XD_2));
1045 break;
1046 case dependencyWavefront45DDegree:
1047 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_threadSpace->SelectMediaWalkingPattern(CM_WALK_WAVEFRONT45D));
1048 break;
1049 case dependencyWavefront45Degree:
1050 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_threadSpace->SelectMediaWalkingPattern(CM_WALK_WAVEFRONT));
1051 break;
1052 case dependencyWavefront26XDegree:
1053 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_threadSpace->SelectMediaWalkingPattern(CM_WALK_WAVEFRONT26X));
1054 break;
1055 case dependencyWavefront26XDegreeAlt:
1056 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_threadSpace->SelectMediaWalkingPattern(CM_WALK_WAVEFRONT26XALT));
1057 break;
1058 default:
1059 CODECHAL_ENCODE_ASSERTMESSAGE("Walking pattern is not supported right now");
1060 eStatus = MOS_STATUS_INVALID_PARAMETER;
1061 return eStatus;
1062 }
1063
1064 CODECHAL_ENCODE_CHK_STATUS_RETURN(cmKrn->AssociateThreadSpace(m_threadSpace));
1065
1066 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmTask->AddKernel(cmKrn));
1067
1068 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1069 {
1070 CmEvent * event = CM_NO_EVENT;
1071 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmQueue->EnqueueFast(m_cmTask, event));
1072
1073 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmTask->Reset());
1074 m_lastTaskInPhase = false;
1075 }
1076 else
1077 {
1078 m_cmTask->AddSync();
1079 }
1080
1081 CODECHAL_DEBUG_TOOL(
1082 CODEC_REF_LIST currRefList = *(m_refList[m_currReconstructedPic.FrameIdx]);
1083 currRefList.RefPic = m_currOriginalPic;
1084
1085 m_debugInterface->m_currPic = m_currOriginalPic;
1086 m_debugInterface->m_bufferDumpFrameNum = m_storeData;
1087 m_debugInterface->m_frameType = m_pictureCodingType;
1088
1089 DumpMbEncPakOutput(&currRefList, m_debugInterface);
1090 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1091 &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
1092 CodechalDbgAttr::attrOutput,
1093 "HistoryOut",
1094 m_historyOutBufferSize,
1095 m_historyOutBufferOffset,
1096 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1097 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1098 &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
1099 CodechalDbgAttr::attrOutput,
1100 "CombinedBuffer2",
1101 m_encBCombinedBuffer2[m_currRecycledBufIdx].dwSize,
1102 0,
1103 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1104 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
1105 m_swScoreboardState->GetCurSwScoreboardSurface(),
1106 CodechalDbgAttr::attrOutput,
1107 "SBoutSurface",
1108 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1109 )
1110
1111 #if 0 // the dump should be done in the GetStatusReport. However, if ENC causes PAK hangs-up, there is no way to get them.
1112 {
1113 //CODECHAL_DEBUG_TOOL(
1114 currRefList = *(m_refList[m_currReconstructedPic.FrameIdx]);
1115 currRefList.RefPic = m_currOriginalPic;
1116
1117 m_debugInterface->m_currPic = m_currOriginalPic;
1118 m_debugInterface->m_bufferDumpFrameNum = m_storeData;
1119 m_debugInterface->m_frameType = m_pictureCodingType;
1120
1121 DumpMbEncPakOutput(&currRefList, m_debugInterface);
1122
1123 //CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeMbEncMbPakOutput(
1124 // m_debugInterface,
1125 // this,
1126 // &currRefList,
1127 // (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
1128 // CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
1129 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1130 &currRefList.resRefMbCodeBuffer,
1131 CodechalDbgAttr::attrOutput,
1132 "MbCode",
1133 m_picWidthInMb * m_frameFieldHeightInMb * 64,
1134 CodecHal_PictureIsBottomField(currRefList.RefPic) ? m_frameFieldHeightInMb * m_picWidthInMb * 64 : 0,
1135 (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
1136 CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
1137
1138 if (m_mvDataSize)
1139 {
1140 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1141 &currRefList.resRefMvDataBuffer,
1142 CodechalDbgAttr::attrOutput,
1143 "MbData",
1144 m_picWidthInMb * m_frameFieldHeightInMb * (32 * 4),
1145 CodecHal_PictureIsBottomField(currRefList.RefPic) ? MOS_ALIGN_CEIL(m_frameFieldHeightInMb * m_picWidthInMb * (32 * 4), 0x1000) : 0,
1146 (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
1147 CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
1148 }
1149 if (CodecHalIsFeiEncode(m_codecFunction))
1150 {
1151 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1152 &m_resDistortionBuffer,
1153 CodechalDbgAttr::attrOutput,
1154 "DistortionSurf",
1155 m_picWidthInMb * m_frameFieldHeightInMb * 48,
1156 CodecHal_PictureIsBottomField(currRefList.RefPic) ? MOS_ALIGN_CEIL(m_frameFieldHeightInMb * m_picWidthInMb * 48, 0x1000) : 0,
1157 (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ?
1158 CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2));
1159 }
1160 }
1161 #endif
1162 return eStatus;
1163 }
1164
1165 // ------------------------------------------------------------------------------
1166 //| Purpose: Setup Curbe for HEVC MbEnc I Kernels
1167 //| Return: N/A
1168 //------------------------------------------------------------------------------
InitCurbeDataB()1169 MOS_STATUS CodecHalHevcMbencG12::InitCurbeDataB()
1170 {
1171 uint32_t curIdx = m_currRecycledBufIdx;
1172 MOS_LOCK_PARAMS lockFlags;
1173 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1174
1175 uint8_t tuMapping = ((m_hevcSeqParams->TargetUsage) / 3) % 3; // Map TU 1,4,6 to 0,1,2
1176
1177 // Initialize the CURBE data
1178 MBENC_CURBE curbe;
1179
1180 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
1181 {
1182 curbe.QPType = QP_TYPE_CONSTANT;
1183 curbe.ROIEnable = (m_hevcPicParams->NumROI || m_mbQpDataEnabled) ? true : false;
1184 }
1185 else
1186 {
1187 curbe.QPType = m_lcuBrcEnabled ? QP_TYPE_CU_LEVEL : QP_TYPE_FRAME;
1188 }
1189
1190 // TU based settings
1191 curbe.EnableCu64Check = m_tuSettings[EnableCu64CheckTuParam][tuMapping];
1192 curbe.MaxNumIMESearchCenter = m_tuSettings[MaxNumIMESearchCenterTuParam][tuMapping];
1193 curbe.MaxTransformDepthInter = m_tuSettings[Log2TUMaxDepthInterTuParam][tuMapping];
1194 curbe.MaxTransformDepthIntra = m_tuSettings[Log2TUMaxDepthIntraTuParam][tuMapping];
1195 curbe.Dynamic64Order = m_tuSettings[Dynamic64OrderTuParam][tuMapping];
1196 curbe.DynamicOrderTh = m_tuSettings[DynamicOrderThTuParam][tuMapping];
1197 curbe.Dynamic64Enable = m_tuSettings[Dynamic64EnableTuParam][tuMapping];
1198 curbe.Dynamic64Th = m_tuSettings[Dynamic64ThTuParam][tuMapping];
1199 curbe.IncreaseExitThresh = m_tuSettings[IncreaseExitThreshTuParam][tuMapping];
1200 curbe.IntraSpotCheck = m_tuSettings[IntraSpotCheckFlagTuParam][tuMapping];
1201 curbe.Fake32Enable = m_tuSettings[Fake32EnableTuParam][tuMapping];
1202 curbe.Dynamic64Min32 = m_tuSettings[Dynamic64Min32][tuMapping];
1203
1204 curbe.FrameWidthInSamples = m_frameWidth;
1205 curbe.FrameHeightInSamples = m_frameHeight;
1206
1207 curbe.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
1208 curbe.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
1209 curbe.Log2MaxTUSize = m_hevcSeqParams->log2_max_transform_block_size_minus2 + 2;
1210 curbe.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
1211
1212 curbe.ChromaFormatType = m_hevcSeqParams->chroma_format_idc;
1213
1214 curbe.TUDepthControl = curbe.MaxTransformDepthInter;
1215
1216 int32_t sliceQp = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
1217 curbe.FrameQP = abs(sliceQp);
1218 curbe.FrameQPSign = (sliceQp > 0) ? 0 : 1;
1219
1220 #if 0 // no need in the optimized kernel because kernel does the table look-up
1221 LoadCosts(CODECHAL_HEVC_B_SLICE, (uint8_t)sliceQp);
1222 curbe.DW4_ModeIntra32x32Cost = m_modeCostCre[LUTCREMODE_INTRA_32X32];
1223 curbe.DW4_ModeIntraNonDC32x32Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_32X32];
1224
1225 curbe.DW5_ModeIntra16x16Cost = m_modeCostCre[LUTCREMODE_INTRA_16X16];
1226 curbe.DW5_ModeIntraNonDC16x16Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_16X16];
1227 curbe.DW5_ModeIntra8x8Cost = m_modeCostCre[LUTCREMODE_INTRA_8X8];
1228 curbe.DW5_ModeIntraNonDC8x8Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_8X8];
1229
1230 curbe.DW6_ModeIntraNonPred = m_modeCostCre[LUTCREMODE_INTRA_NONPRED];
1231
1232 curbe.DW7_ChromaIntraModeCost = m_modeCostCre[LUTCREMODE_INTRA_CHROMA];
1233
1234 curbe.DW12_IntraModeCostMPM = m_modeCostRde[LUTRDEMODE_INTRA_MPM];
1235
1236 curbe.DW13_IntraTUDept0Cost = m_modeCostRde[LUTRDEMODE_TU_DEPTH_0];
1237 curbe.DW13_IntraTUDept1Cost = m_modeCostRde[LUTRDEMODE_TU_DEPTH_1];
1238
1239 curbe.DW14_IntraTU4x4CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_4X4];
1240 curbe.DW14_IntraTU8x8CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_8X8];
1241 curbe.DW14_IntraTU16x16CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_16X16];
1242 curbe.DW14_IntraTU32x32CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_32X32];
1243 curbe.DW15_LambdaRD = (uint16_t)m_lambdaRD;
1244 curbe.DW17_IntraNonDC8x8Penalty = m_modeCostRde[LUTRDEMODE_INTRA_NONDC_8X8];
1245 curbe.DW17_IntraNonDC32x32Penalty = m_modeCostRde[LUTRDEMODE_INTRA_NONDC_32X32];
1246 #endif
1247
1248 curbe.NumofColumnTile = m_hevcPicParams->num_tile_columns_minus1 + 1;
1249 curbe.NumofRowTile = m_hevcPicParams->num_tile_rows_minus1 + 1;
1250
1251 curbe.HMEFlag = m_hmeSupported ? 3 : 0;
1252
1253 curbe.MaxRefIdxL0 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10 - 1;
1254 curbe.MaxRefIdxL1 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L1_REF_G10 - 1;
1255 curbe.MaxBRefIdxL0 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10 - 1;
1256
1257 // Check whether Last Frame is I frame or not
1258 if (m_frameNum && m_lastPictureCodingType == I_TYPE)
1259 {
1260 // This is the flag to notify kernel not to use the history buffer
1261 curbe.LastFrameIsIntra = true;
1262 }
1263 else
1264 {
1265 curbe.LastFrameIsIntra = false;
1266 }
1267
1268 curbe.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
1269 curbe.TemporalMvpEnableFlag = m_hevcSliceParams->slice_temporal_mvp_enable_flag;
1270 curbe.CollocatedFromL0Flag = m_hevcSliceParams->collocated_from_l0_flag;
1271 curbe.theSameRefList = m_sameRefList;
1272 curbe.IsLowDelay = m_lowDelay;
1273 curbe.NumRefIdxL0 = m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1;
1274 curbe.NumRefIdxL1 = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? 0 : (m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1);
1275 if (m_hevcSeqParams->TargetUsage == 1)
1276 {
1277 // MaxNumMergeCand C Model uses 4 for TU1,
1278 // for quality consideration, make sure not larger than the value from App as it will be used in PAK
1279 curbe.MaxNumMergeCand = MOS_MIN(m_hevcSliceParams->MaxNumMergeCand, 4);
1280 }
1281 else
1282 {
1283 // MaxNumMergeCand C Model uses 2 for TU4,7
1284 // for quality consideration, make sure not larger than the value from App as it will be used in PAK
1285 curbe.MaxNumMergeCand = MOS_MIN(m_hevcSliceParams->MaxNumMergeCand, 2);
1286 }
1287
1288 int32_t tbRefListL0[CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10] = { 0 }, tbRefListL1[CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L1_REF_G10] = { 0 };
1289 curbe.FwdPocNumber_L0_mTb_0 = tbRefListL0[0] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][0]);
1290 curbe.BwdPocNumber_L1_mTb_0 = tbRefListL1[0] = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][0]) : ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][0]);
1291 curbe.FwdPocNumber_L0_mTb_1 = tbRefListL0[1] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][1]);
1292 curbe.BwdPocNumber_L1_mTb_1 = tbRefListL1[1] = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][1]) : ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][1]);
1293
1294 curbe.FwdPocNumber_L0_mTb_2 = tbRefListL0[2] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][2]);
1295 curbe.BwdPocNumber_L1_mTb_2 = tbRefListL1[2] = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][2]) : ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][2]);
1296 curbe.FwdPocNumber_L0_mTb_3 = tbRefListL0[3] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][3]);
1297 curbe.BwdPocNumber_L1_mTb_3 = tbRefListL1[3] = (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE) ? ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][3]) : ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][3]);
1298
1299 curbe.RefFrameWinHeight = m_frameHeight;
1300 curbe.RefFrameWinWidth = m_frameWidth;
1301
1302 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetRoundingIntraInterToUse());
1303
1304 curbe.RoundingInter = (m_roundingInterInUse + 1) << 4; // Should be an input from par in the cmodel (slice state)
1305 curbe.RoundingIntra = (m_roundingIntraInUse + 1) << 4; // Should be an input from par in the cmodel (slice state)
1306 curbe.RDEQuantRoundValue = (m_roundingInterInUse + 1) << 4;
1307
1308 uint32_t gopB = m_hevcSeqParams->GopRefDist;
1309
1310 curbe.CostScalingForRA = (m_hevcSeqParams->LowDelayMode) ? 0 : 1;
1311
1312 // get the min distance between current pic and ref pics
1313 uint32_t minPocDist = 255;
1314 uint32_t costTableIndex = 0;
1315
1316 if (curbe.SliceType == CODECHAL_ENCODE_HEVC_B_SLICE)
1317 {
1318 if (curbe.CostScalingForRA == 1)
1319 {
1320 for (uint8_t ref = 0; ref < curbe.NumRefIdxL0; ref++)
1321 {
1322 if ((uint32_t)abs(tbRefListL0[ref]) < minPocDist)
1323 minPocDist = abs(tbRefListL0[ref]);
1324 }
1325 for (uint8_t ref = 0; ref < curbe.NumRefIdxL1; ref++)
1326 {
1327 if ((uint32_t)abs(tbRefListL1[ref]) < minPocDist)
1328 minPocDist = abs(tbRefListL1[ref]);
1329 }
1330
1331 if (gopB == 4)
1332 {
1333 costTableIndex = minPocDist;
1334 if (minPocDist == 4)
1335 costTableIndex -= 1;
1336 }
1337 if (gopB == 8)
1338 {
1339 costTableIndex = minPocDist + 3;
1340 if (minPocDist == 4)
1341 costTableIndex -= 1;
1342 if (minPocDist == 8)
1343 costTableIndex -= 4;
1344 }
1345 }
1346 }
1347 else if (curbe.SliceType == CODECHAL_ENCODE_HEVC_P_SLICE)
1348 {
1349 costTableIndex = 8;
1350 }
1351 else
1352 {
1353 costTableIndex = 9;
1354 }
1355
1356 curbe.CostTableIndex = costTableIndex;
1357
1358 // the following fields are needed by the new optimized kernel in v052417
1359 curbe.Log2ParallelMergeLevel = m_hevcPicParams->log2_parallel_merge_level_minus2 + 2;
1360 curbe.MaxIntraRdeIter = 1;
1361 curbe.CornerNeighborPixel = 0;
1362 curbe.IntraNeighborAvailFlags = 0;
1363 curbe.SubPelMode = 3; // qual-pel search
1364 curbe.InterSADMeasure = 2; // Haar transform
1365 curbe.IntraSADMeasure = 2; // Haar transform
1366 curbe.IntraPrediction = 0; // enable 32x32, 16x16, and 8x8 luma intra prediction
1367 curbe.RefIDCostMode = 1; // 0: AVC and 1: linear method
1368 curbe.TUBasedCostSetting = 0;
1369 curbe.ConcurrentGroupNum = m_numberConcurrentGroup;
1370 curbe.WaveFrontSplitVQFix = ((1 << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)) == 64) ? 1 : 0;
1371 curbe.NumofUnitInWaveFront = m_numWavefrontInOneRegion;
1372 curbe.LoadBalenceEnable = 0; // when this flag is false, kernel does not use LoadBalance (or MBENC_B_FRAME_CONCURRENT_TG_DATA) buffe
1373 curbe.ThreadNumber = MOS_MIN(2, m_numberEncKernelSubThread);
1374 curbe.Pic_init_qp_B = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
1375 curbe.Pic_init_qp_P = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
1376 curbe.Pic_init_qp_I = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
1377 curbe.SuperHME = m_16xMeSupported;
1378 curbe.UltraHME = m_32xMeSupported;
1379 curbe.EnableCu64Check = (m_hevcSeqParams->TargetUsage == 1);
1380 curbe.PerBFrameQPOffset = 0;
1381
1382 switch (m_hevcSeqParams->TargetUsage)
1383 {
1384 case 1:
1385 curbe.Degree45 = 0;
1386 curbe.Break12Dependency = 0;
1387 curbe.DisableTemporal16and8 = 0;
1388 break;
1389 case 4:
1390 curbe.Degree45 = 1;
1391 curbe.Break12Dependency = 1;
1392 curbe.DisableTemporal16and8 = 0;
1393 break;
1394 default:
1395 curbe.Degree45 = 1;
1396 curbe.Break12Dependency = 1;
1397 curbe.DisableTemporal16and8 = 1;
1398 break;
1399 }
1400
1401 curbe.WaveFrontSplitsEnable = curbe.Degree45; // when 45 degree, enable wave front split
1402 curbe.LongTermReferenceFlags_L0 = 0;
1403 for (uint32_t i = 0; i < curbe.NumRefIdxL0; i++)
1404 {
1405 curbe.LongTermReferenceFlags_L0 |= (m_hevcSliceParams->RefPicList[0][i].PicFlags & PICTURE_LONG_TERM_REFERENCE) << i;
1406 }
1407 curbe.LongTermReferenceFlags_L1 = 0;
1408 for (uint32_t i = 0; i < curbe.NumRefIdxL1; i++)
1409 {
1410 curbe.LongTermReferenceFlags_L1 |= (m_hevcSliceParams->RefPicList[1][i].PicFlags & PICTURE_LONG_TERM_REFERENCE) << i;
1411 }
1412
1413 curbe.Stepping = 0;
1414 curbe.Cu64SkipCheckOnly = 0;
1415 curbe.Cu642Nx2NCheckOnly = 0;
1416 curbe.EnableCu64AmpCheck = 1;
1417 curbe.IntraSpeedMode = 0; // 35 mode
1418 curbe.DisableIntraNxN = 0;
1419
1420 #if 0 //needed only when using A stepping on simu/emu
1421 if (m_hwInterface->GetPlatform().usRevId == 0)
1422 {
1423 curbe.Stepping = 1;
1424 curbe.TUDepthControl = 1;
1425 curbe.MaxTransformDepthInter = 1;
1426 curbe.MaxTransformDepthIntra = 0;
1427 //buf->curbe.EnableCu64Check = 1;
1428 curbe.Cu64SkipCheckOnly = 0;
1429 curbe.Cu642Nx2NCheckOnly = 1;
1430 curbe.EnableCu64AmpCheck = 0;
1431 curbe.DisableIntraNxN = 1;
1432 curbe.MaxNumMergeCand = 1;
1433 }
1434 #endif
1435
1436 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1437 lockFlags.WriteOnly = 1;
1438 auto buf = (PMBENC_COMBINED_BUFFER1)m_osInterface->pfnLockResource(
1439 m_osInterface,
1440 &m_encBCombinedBuffer1[curIdx].sResource,
1441 &lockFlags);
1442 CODECHAL_ENCODE_CHK_NULL_RETURN(buf);
1443
1444 if (curbe.Degree45)
1445 {
1446 MOS_ZeroMemory(&buf->concurrent, sizeof(buf->concurrent));
1447 }
1448 buf->Curbe = curbe;
1449
1450 m_osInterface->pfnUnlockResource(
1451 m_osInterface,
1452 &m_encBCombinedBuffer1[curIdx].sResource);
1453
1454 // clean-up the thread dependency buffer in the second combined buffer
1455 if (m_numberEncKernelSubThread > 1)
1456 {
1457 MOS_LOCK_PARAMS lockFlags;
1458
1459 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1460 lockFlags.WriteOnly = 1;
1461 auto data = (uint8_t*)m_osInterface->pfnLockResource(
1462 m_osInterface,
1463 &m_encBCombinedBuffer2[curIdx].sResource,
1464 &lockFlags);
1465 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1466
1467 MOS_ZeroMemory(&data[m_threadTaskBufferOffset], m_threadTaskBufferSize);
1468
1469 m_osInterface->pfnUnlockResource(
1470 m_osInterface,
1471 &m_encBCombinedBuffer2[curIdx].sResource);
1472 }
1473
1474 if (m_initEncConstTable)
1475 {
1476 // Initialize the Enc Constant Table surface
1477 if (m_isMaxLcu64)
1478 {
1479 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_constTableB->WriteSurface(
1480 (unsigned char *)m_encLcu64ConstantDataLut,
1481 nullptr,
1482 sizeof(m_encLcu64ConstantDataLut)));
1483 }
1484 else
1485 {
1486 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_constTableB->WriteSurface(
1487 (unsigned char *)m_encLcu32ConstantDataLut,
1488 nullptr,
1489 sizeof(m_encLcu32ConstantDataLut)));
1490 }
1491 m_initEncConstTable = false;
1492 }
1493
1494 if (m_resolutionChanged)
1495 {
1496 m_initEncLoadBalence = true;
1497 }
1498
1499 if (m_initEncLoadBalence)
1500 {
1501 // Initialize the Enc Constant Table surface
1502 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_loadBalance->WriteSurface((unsigned char *)m_FrameBalance, nullptr, sizeof(m_FrameBalance)));
1503
1504 m_initEncLoadBalence = false;
1505 }
1506
1507 return eStatus;
1508 }
1509
SetupSurfacesB()1510 MOS_STATUS CodecHalHevcMbencG12::SetupSurfacesB()
1511 {
1512 CODECHAL_ENCODE_FUNCTION_ENTER;
1513 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1514
1515 //Concurrent Thread Group Data
1516 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateBuffer(
1517 &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource,
1518 m_combinedBuffer1));
1519
1520 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateBuffer(
1521 &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
1522 m_combinedBuffer2));
1523
1524 CODECHAL_DEBUG_TOOL(
1525 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1526 &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource,
1527 CodechalDbgAttr::attrOutput,
1528 "CombinedBuffer1",
1529 m_encBCombinedBuffer1[m_currRecycledBufIdx].dwSize,
1530 0,
1531 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1532 )
1533
1534 CODECHAL_DEBUG_TOOL(
1535 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1536 &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
1537 CodechalDbgAttr::attrOutput,
1538 "HistoryIn",
1539 sizeof(MBENC_COMBINED_BUFFER2::ucHistoryInBuffer),
1540 sizeof(MBENC_COMBINED_BUFFER2::ucBrcCombinedEncBuffer),
1541 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1542 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1543 &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource,
1544 CodechalDbgAttr::attrOutput,
1545 "ThreadTask",
1546 m_threadTaskBufferSize,
1547 m_threadTaskBufferOffset,
1548 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));)
1549
1550 PMOS_SURFACE inputSurface = m_rawSurfaceToEnc;
1551
1552 // Cur and VME surfaces
1553 //Source Y and UV
1554 //first create the 2D cur input surface
1555 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1556 &inputSurface->OsResource,
1557 m_curSurf));
1558
1559 CODECHAL_DEBUG_TOOL(
1560 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1561 inputSurface,
1562 CodechalDbgAttr::attrEncodeRawInputSurface,
1563 "MbEnc_Input_SrcSurf")));
1564
1565 if (m_curVme)
1566 {
1567 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroyVmeSurfaceG7_5(m_curVme));
1568 m_curVme = nullptr;
1569 }
1570
1571 for (int32_t surface_idx = 0; surface_idx < 4; surface_idx++)
1572 {
1573 int32_t ll = 0;
1574 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
1575 if (!CodecHal_PictureIsInvalid(refPic) &&
1576 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
1577 {
1578 int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
1579
1580 PMOS_SURFACE refSurfacePtr = nullptr;
1581 if (surface_idx == 0 && m_useWeightedSurfaceForL0)
1582 {
1583 refSurfacePtr = m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L0_START + surface_idx);
1584 }
1585 else
1586 {
1587 refSurfacePtr = &m_refList[idx]->sRefBuffer;
1588 }
1589
1590 // Picture Y VME
1591 //m_hwInterface->CacheabilitySettings[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
1592 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1593 &refSurfacePtr->OsResource,
1594 m_surfRefArray[surface_idx]));
1595
1596 CODECHAL_DEBUG_TOOL(
1597 m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
1598 std::string refSurfName = "RefSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex)) +
1599 "_L0" + std::to_string(surface_idx);
1600 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1601 refSurfacePtr,
1602 CodechalDbgAttr::attrReferenceSurfaces,
1603 refSurfName.data())));
1604 }
1605 else
1606 {
1607 // Providing Dummy surface as per VME requirement.
1608 //m_hwInterface->CacheabilitySettings[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
1609 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1610 &inputSurface->OsResource,
1611 m_surfRefArray[surface_idx]));
1612 }
1613
1614 ll = 1;
1615 refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
1616 if (!CodecHal_PictureIsInvalid(refPic) &&
1617 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
1618 {
1619 int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
1620
1621 PMOS_SURFACE refSurfacePtr = nullptr;
1622 if (surface_idx == 0 && m_useWeightedSurfaceForL1)
1623 {
1624 refSurfacePtr = m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L1_START + surface_idx);
1625 }
1626 else
1627 {
1628 refSurfacePtr = &m_refList[idx]->sRefBuffer;
1629 }
1630
1631 // Picture Y VME
1632 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1633 &refSurfacePtr->OsResource,
1634 m_surfRefArray[MAX_VME_BWD_REF + surface_idx]));
1635
1636 CODECHAL_DEBUG_TOOL(
1637 m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
1638 std::string refSurfName = "RefSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex)) +
1639 "_L1" + std::to_string(surface_idx);
1640 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1641 refSurfacePtr,
1642 CodechalDbgAttr::attrEncodeRawInputSurface,
1643 refSurfName.data())));
1644 }
1645 else
1646 {
1647 // Providing Dummy surface as per VME requirement.
1648 //m_hwInterface->CacheabilitySettings[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
1649 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1650 &inputSurface->OsResource,
1651 m_surfRefArray[MAX_VME_BWD_REF + surface_idx]));
1652 }
1653 }
1654
1655 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateVmeSurfaceG7_5(
1656 m_curSurf,
1657 &m_surfRefArray[0],
1658 &m_surfRefArray[MAX_VME_BWD_REF],
1659 MAX_VME_FWD_REF,
1660 MAX_VME_BWD_REF,
1661 m_curVme));
1662
1663 /* WA for 16k resolution tests with P010 format. Recon surface is NV12 format with width=2*original_width
1664 32k width is not supported by MEDIA_SURFACE_STATE_CMD.
1665 We can therefore change the recon dimensions to 16k width and 32k pitch,
1666 this will cover the portion of the surface that VME uses */
1667 if (MEDIA_IS_WA(m_waTable, Wa16kWidth32kPitchNV12ReconForP010Input) && m_curVme && m_encode16KSequence && (uint8_t)HCP_CHROMA_FORMAT_YUV420 == m_chromaFormat && inputSurface->Format == Format_P010)
1668 {
1669 CM_VME_SURFACE_STATE_PARAM vmeDimensionParam;
1670 vmeDimensionParam.width = ENCODE_HEVC_16K_PIC_WIDTH;
1671 vmeDimensionParam.height = ENCODE_HEVC_16K_PIC_HEIGHT;
1672 m_cmDev->SetVmeSurfaceStateParam(m_curVme, &vmeDimensionParam);
1673 }
1674
1675 // Current Y with reconstructed boundary pixels
1676 if (!m_reconWithBoundaryPix)
1677 {
1678 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateSurface2D(
1679 &m_currPicWithReconBoundaryPix.OsResource,
1680 m_reconWithBoundaryPix));
1681 }
1682
1683 // PAK object command surface
1684 if (m_mbCodeBuffer)
1685 {
1686 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_mbCodeBuffer));
1687 m_mbCodeBuffer = nullptr;
1688 }
1689
1690 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateBuffer(
1691 &m_resMbCodeSurface,
1692 m_mbCodeBuffer));
1693
1694 // PAK object command surface
1695 CM_BUFFER_STATE_PARAM bufParams;
1696 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateBufferAlias(
1697 m_mbCodeBuffer,
1698 m_mbCodeSurfIdx));
1699 bufParams.uiBaseAddressOffset = 0;
1700 bufParams.uiSize = m_mvOffset;
1701 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mbCodeBuffer->SetSurfaceStateParam(
1702 m_mbCodeSurfIdx,
1703 &bufParams));
1704
1705 // CU packet for PAK surface
1706 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateBufferAlias(
1707 m_mbCodeBuffer,
1708 m_mvDataSurfIdx));
1709 bufParams.uiBaseAddressOffset = m_mvOffset;
1710 bufParams.uiSize = m_mbCodeSize - m_mvOffset;
1711 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mbCodeBuffer->SetSurfaceStateParam(
1712 m_mvDataSurfIdx,
1713 &bufParams));
1714
1715 //Software Scoreboard surface
1716 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1717 &m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource,
1718 m_swScoreboardSurf));
1719
1720 CODECHAL_DEBUG_TOOL(
1721 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
1722 m_swScoreboardState->GetCurSwScoreboardSurface(),
1723 CodechalDbgAttr::attrOutput,
1724 "SBinSurface",
1725 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));)
1726
1727 if ((!m_mbQpDataEnabled) || (m_brcEnabled))
1728 {
1729 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1730 &m_brcBuffers.sBrcMbQpBuffer.OsResource,
1731 m_cu16X16QpIn));
1732 }
1733 else
1734 {
1735 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1736 &m_mbQpDataSurface.OsResource,
1737 m_cu16X16QpIn));
1738 }
1739
1740 // Lcu level data input
1741 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1742 &m_lcuLevelInputDataSurface[m_currRecycledBufIdx].OsResource,
1743 m_lcuLevelData));
1744
1745 CODECHAL_DEBUG_TOOL(
1746 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
1747 &m_lcuLevelInputDataSurface[m_currRecycledBufIdx],
1748 CodechalDbgAttr::attrOutput,
1749 "LcuInfoSurface",
1750 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));)
1751
1752 // Colocated CU Motion Vector Data Surface
1753 if (m_colocCumvData)
1754 {
1755 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroySurface(m_colocCumvData));
1756 m_colocCumvData = nullptr;
1757 }
1758
1759 if (m_mbCodeIdxForTempMVP != 0xFF)
1760 {
1761 //m_hwInterface->CacheabilitySettings[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
1762 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateBuffer(
1763 m_trackedBuf->GetMvTemporalBuffer(m_mbCodeIdxForTempMVP),
1764 m_colocCumvData));
1765
1766 CODECHAL_DEBUG_TOOL(
1767 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1768 m_trackedBuf->GetMvTemporalBuffer(m_mbCodeIdxForTempMVP),
1769 CodechalDbgAttr::attrOutput,
1770 "CollocatedMV",
1771 m_sizeOfMvTemporalBuffer,
1772 0,
1773 CODECHAL_MEDIA_STATE_HEVC_B_MBENC)););
1774 }
1775
1776 // HME motion predictor data
1777 if (m_hmeEnabled)
1778 {
1779 m_hmeMotionPredData = m_hmeKernel->GetCmSurface(CodechalKernelHme::SurfaceId::me4xMvDataBuffer);
1780 }
1781
1782 if (m_isMaxLcu64)
1783 {
1784 PMOS_SURFACE currScaledSurface2x = m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER);
1785
1786 //VME 2X Inter prediction Surface for current frame
1787 //m_hwInterface->CacheabilitySettings[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
1788 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1789 &currScaledSurface2x->OsResource,
1790 m_curSurf2X));
1791
1792 CODECHAL_DEBUG_TOOL(
1793 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1794 currScaledSurface2x,
1795 CodechalDbgAttr::attrReferenceSurfaces,
1796 "2xScaledSurf"))
1797 );
1798
1799 if (m_cur2XVme)
1800 {
1801 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->DestroyVmeSurfaceG7_5(m_cur2XVme));
1802 m_cur2XVme = nullptr;
1803 }
1804
1805 // RefFrame's 2x DS surface
1806 for (int32_t surface_idx = 0; surface_idx < 4; surface_idx++)
1807 {
1808 int32_t ll = 0;
1809 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
1810 if (!CodecHal_PictureIsInvalid(refPic) &&
1811 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
1812 {
1813 int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
1814
1815 // Picture Y VME
1816 //m_hwInterface->CacheabilitySettings[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
1817
1818 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1819 &m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx)->OsResource,
1820 m_surf2XArray[surface_idx]));
1821
1822 CODECHAL_DEBUG_TOOL(
1823 m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
1824 std::string refSurfName = "Ref2xScaledSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex)) +
1825 "_L0" + std::to_string(surface_idx);
1826 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1827 m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
1828 CodechalDbgAttr::attrReferenceSurfaces,
1829 refSurfName.data())));
1830 }
1831 else
1832 {
1833 // Providing Dummy surface as per VME requirement.
1834 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1835 &currScaledSurface2x->OsResource,
1836 m_surf2XArray[surface_idx]));
1837 }
1838
1839 ll = 1;
1840 refPic = m_hevcSliceParams->RefPicList[ll][surface_idx];
1841 if (!CodecHal_PictureIsInvalid(refPic) &&
1842 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
1843 {
1844 int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
1845
1846 // Picture Y VME
1847 //m_hwInterface->CacheabilitySettings[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
1848 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1849 &m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx)->OsResource,
1850 m_surf2XArray[MAX_VME_BWD_REF + surface_idx]));
1851
1852 CODECHAL_DEBUG_TOOL(
1853 m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
1854 std::string refSurfName = "Ref2xScaledSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex)) +
1855 "_L1" + std::to_string(surface_idx);
1856 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
1857 m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx),
1858 CodechalDbgAttr::attrReferenceSurfaces,
1859 refSurfName.data())));
1860 }
1861 else
1862 {
1863 // Providing Dummy surface as per VME requirement.
1864 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->UpdateSurface2D(
1865 &currScaledSurface2x->OsResource,
1866 m_surf2XArray[MAX_VME_BWD_REF + surface_idx]));
1867 }
1868 }
1869
1870 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateVmeSurfaceG7_5(
1871 m_curSurf2X,
1872 &m_surf2XArray[0],
1873 &m_surf2XArray[MAX_VME_BWD_REF],
1874 MAX_VME_FWD_REF,
1875 MAX_VME_BWD_REF,
1876 m_cur2XVme));
1877 }
1878
1879 if (m_isMaxLcu64)
1880 {
1881 // Encoder History Input Buffer
1882 if (!m_histInBuffer)
1883 {
1884 //m_hwInterface->CacheabilitySettings[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
1885 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateSurface2D(
1886 &m_encoderHistoryInputBuffer.OsResource,
1887 m_histInBuffer));
1888 }
1889
1890 // Encoder History Input Buffer
1891 if (!m_histOutBuffer)
1892 {
1893 //m_hwInterface->CacheabilitySettings[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
1894 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cmDev->CreateSurface2D(
1895 &m_encoderHistoryOutputBuffer.OsResource,
1896 m_histOutBuffer));
1897 }
1898 }
1899
1900 return eStatus;
1901 }
1902
EncodeIntraDistKernel()1903 MOS_STATUS CodecHalHevcMbencG12::EncodeIntraDistKernel()
1904 {
1905 CodechalKernelIntraDistMdfG12::CurbeParam curbeParam;
1906 curbeParam.downScaledWidthInMb4x = m_downscaledWidthInMb4x;
1907 curbeParam.downScaledHeightInMb4x = m_downscaledHeightInMb4x;
1908
1909 CodechalKernelIntraDistMdfG12::SurfaceParams surfaceParam;
1910 surfaceParam.input4xDsSurface = m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
1911 surfaceParam.intraDistSurface = m_brcBuffers.brcIntraDistortionSurface;
1912 surfaceParam.intraDistBottomFieldOffset = m_brcBuffers.dwMeBrcDistortionBottomFieldOffset;
1913 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_intraDistKernel->Execute(curbeParam, surfaceParam));
1914
1915 return MOS_STATUS_SUCCESS;
1916 }
1917
1918 //to remove this function after the fix in CodechalEncHevcState::GetRoundingIntraInterToUse() checked in.
GetRoundingIntraInterToUse()1919 MOS_STATUS CodecHalHevcMbencG12::GetRoundingIntraInterToUse()
1920 {
1921 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1922
1923 CODECHAL_ENCODE_FUNCTION_ENTER;
1924
1925 if (m_hevcPicParams->CustomRoundingOffsetsParams.fields.EnableCustomRoudingIntra)
1926 {
1927 m_roundingIntraInUse = m_hevcPicParams->CustomRoundingOffsetsParams.fields.RoundingOffsetIntra;
1928 }
1929 else
1930 {
1931 if (m_hevcPicParams->CodingType == I_TYPE)
1932 {
1933 m_roundingIntraInUse = 10;
1934 }
1935 else if (m_HierchGopBRCEnabled)
1936 {
1937 //Hierachical B GOP
1938 if (m_hevcPicParams->CodingType == P_TYPE)
1939 {
1940 m_roundingIntraInUse = 4;
1941 }
1942 else if (m_hevcPicParams->CodingType == B_TYPE)
1943 {
1944 m_roundingIntraInUse = 3;
1945 if (m_lowDelay && !m_hevcSeqParams->LowDelayMode)
1946 {
1947 // RAB test, anchor frame
1948 m_roundingIntraInUse = 4;
1949 }
1950 }
1951 else
1952 {
1953 m_roundingIntraInUse = 2;
1954 }
1955 }
1956 else
1957 {
1958 m_roundingIntraInUse = 10;
1959 }
1960 }
1961
1962 if (m_hevcPicParams->CustomRoundingOffsetsParams.fields.EnableCustomRoudingInter)
1963 {
1964 m_roundingInterInUse = m_hevcPicParams->CustomRoundingOffsetsParams.fields.RoundingOffsetInter;
1965 }
1966 else
1967 {
1968 if (m_HierchGopBRCEnabled)
1969 {
1970 //Hierachical B GOP
1971 if (m_hevcPicParams->CodingType == I_TYPE ||
1972 m_hevcPicParams->CodingType == P_TYPE)
1973 {
1974 m_roundingInterInUse = 4;
1975 }
1976 else if (m_hevcPicParams->CodingType == B_TYPE)
1977 {
1978 m_roundingInterInUse = 3;
1979 if (m_lowDelay && !m_hevcSeqParams->LowDelayMode)
1980 {
1981 // RAB test, anchor frame
1982 m_roundingInterInUse = 4;
1983 }
1984 }
1985 else
1986 {
1987 m_roundingInterInUse = 2;
1988 }
1989 }
1990 else
1991 {
1992 m_roundingInterInUse = 4;
1993 }
1994 }
1995
1996 CODECHAL_ENCODE_VERBOSEMESSAGE("Rounding intra in use:%d, rounding inter in use:%d.\n", m_roundingIntraInUse, m_roundingInterInUse);
1997
1998 return eStatus;
1999 }
2000