1 /*
2 * Copyright (c) 2017-2023, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_vdenc_avc_g11.cpp
24 //! \brief This file implements the C++ class/interface for Gen10 platform's AVC
25 //! VDEnc encoding to be used CODECHAL components.
26 //!
27
28 #include "codechal_vdenc_avc_g11.h"
29 #include "codechal_kernel_header_g11.h"
30 #include "codechal_kernel_hme_g11.h"
31 #include "mhw_vdbox_vdenc_g11_X.h"
32 #include "mhw_vdbox_g11_X.h"
33 #include "hal_oca_interface.h"
34 #include "mos_util_user_interface.h"
35 #if defined(ENABLE_KERNELS)
36 #include "igcodeckrn_g11.h"
37 #endif
38 #if USE_CODECHAL_DEBUG_TOOL
39 #include "codechal_debug_encode_par_g11.h"
40 #include "mhw_vdbox_mfx_hwcmd_g11_X.h"
41 #include "mhw_vdbox_vdenc_hwcmd_g11_X.h"
42 #endif
43
44 struct CodechalVdencAvcStateG11::KernelHeader
45 {
46 int m_kernelCount;
47 // Quality mode for Frame/Field
48 CODECHAL_KERNEL_HEADER m_mbEncQltyI;
49 CODECHAL_KERNEL_HEADER m_mbEncQltyP;
50 CODECHAL_KERNEL_HEADER m_mbEncQltyB;
51 // Normal mode for Frame/Field
52 CODECHAL_KERNEL_HEADER m_mbEncNormI;
53 CODECHAL_KERNEL_HEADER m_mbEncNormP;
54 CODECHAL_KERNEL_HEADER m_mbEncNormB;
55 // Performance modes for Frame/Field
56 CODECHAL_KERNEL_HEADER m_mbEncPerfI;
57 CODECHAL_KERNEL_HEADER m_mbEncPerfP;
58 CODECHAL_KERNEL_HEADER m_mbEncPerfB;
59 // Modes for Frame/Field
60 CODECHAL_KERNEL_HEADER m_mbEncAdvI;
61 CODECHAL_KERNEL_HEADER m_mbEncAdvP;
62 CODECHAL_KERNEL_HEADER m_mbEncAdvB;
63
64 // BRC init frame
65 CODECHAL_KERNEL_HEADER m_initFrameBrc;
66 // Frame BRC update
67 CODECHAL_KERNEL_HEADER m_frameEncUpdate;
68 // BRC Reset frame
69 CODECHAL_KERNEL_HEADER m_brcResetFrame;
70 // BRC I Frame Distortion
71 CODECHAL_KERNEL_HEADER m_brcIFrameDist;
72 // RRCBlockCopy
73 CODECHAL_KERNEL_HEADER m_brcBlockCopy;
74 // MbBRC Update
75 CODECHAL_KERNEL_HEADER m_mbBrcUpdate;
76 // 2x DownScaling
77 //Weighted Prediction Kernel
78 CODECHAL_KERNEL_HEADER m_weightedPrediction;
79 // SW scoreboard initialization kernel
80 CODECHAL_KERNEL_HEADER m_initSWScoreboard;
81
82 };
83
84 struct CodechalVdencAvcStateG11::BrcInitDmem
85 {
86 uint8_t BRCFunc_U8; // 0: Init; 2: Reset
87 uint8_t OpenSourceEnable_U8; // 0: disable opensource, 1: enable opensource
88 uint8_t RVSD[2];
89 uint16_t INIT_BRCFlag_U16; // ICQ or CQP with slice size control: 0x00 CBR: 0x10; VBR: 0x20; VCM: 0x40; LOWDELAY: 0x80.
90 uint16_t Reserved;
91 uint16_t INIT_FrameWidth_U16; // Luma width in bytes
92 uint16_t INIT_FrameHeight_U16; // Luma height in bytes
93 uint32_t INIT_TargetBitrate_U32; // target bitrate, set by application
94 uint32_t INIT_MinRate_U32; // 0
95 uint32_t INIT_MaxRate_U32; // Maximum bit rate in bits per second (bps).
96 uint32_t INIT_BufSize_U32; // buffer size
97 uint32_t INIT_InitBufFull_U32; // initial buffer fullness
98 uint32_t INIT_ProfileLevelMaxFrame_U32; // user defined. refer to AVC BRC for conformance check and correction
99 uint32_t INIT_FrameRateM_U32; // FrameRateM is the number of frames in FrameRateD
100 uint32_t INIT_FrameRateD_U32; // If driver gets this FrameRateD from VUI, it is the num_units_in_tick field (32 bits unsigned integer).
101 uint16_t INIT_GopP_U16; // number of P frames in a GOP
102 uint16_t INIT_GopB_U16; // number of B frames in a GOP
103 uint16_t INIT_MinQP_U16; // 10
104 uint16_t INIT_MaxQP_U16; // 51
105 int8_t INIT_DevThreshPB0_S8[8]; // lowdelay ? (-45, -33, -23, -15, -8, 0, 15, 25) : (-46, -38, -30, -23, 23, 30, 40, 46)
106 int8_t INIT_DevThreshVBR0_S8[8]; // lowdelay ? (-45, -35, -25, -15, -8, 0, 20, 40) : (-46, -40, -32, -23, 56, 64, 83, 93)
107 int8_t INIT_DevThreshI0_S8[8]; // lowdelay ? (-40, -30, -17, -10, -5, 0, 10, 20) : (-43, -36, -25, -18, 18, 28, 38, 46)
108 uint8_t INIT_InitQPIP; // Initial QP for I and P
109
110 uint8_t INIT_NotUseRhoDm_U8; // Reserved
111 uint8_t INIT_InitQPB; // Initial QP for B
112 uint8_t INIT_MbQpCtrl_U8; // Enable MB level QP control (global)
113 uint8_t INIT_SliceSizeCtrlEn_U8; // Enable slice size control
114 int8_t INIT_IntraQPDelta_I8[3]; // set to zero for all by default
115 int8_t INIT_SkipQPDelta_I8; // Reserved
116 int8_t INIT_DistQPDelta_I8[4]; // lowdelay ? (-5, -2, 2, 5) : (0, 0, 0, 0)
117 uint8_t INIT_OscillationQpDelta_U8; // BRCFLAG_ISVCM ? 16 : 0
118 uint8_t INIT_HRDConformanceCheckDisable_U8; // BRCFLAG_ISAVBR ? 1 : 0
119 uint8_t INIT_SkipFrameEnableFlag;
120 uint8_t INIT_TopQPDeltaThrForAdapt2Pass_U8; // =1. QP Delta threshold for second pass.
121 uint8_t INIT_TopFrmSzThrForAdapt2Pass_U8; // lowdelay ? 10 : 50. Top frame size threshold for second pass
122 uint8_t INIT_BotFrmSzThrForAdapt2Pass_U8; // lowdelay ? 10 : 200. Bottom frame size threshold for second pass
123 uint8_t INIT_QPSelectForFirstPass_U8; // lowdelay ? 0 : 1. =0 to use previous frame final QP; or =1 to use (targetQP + previousQP) / 2.
124 uint8_t INIT_MBHeaderCompensation_U8; // Reserved
125 uint8_t INIT_OverShootCarryFlag_U8; // set to zero by default
126 uint8_t INIT_OverShootSkipFramePct_U8; // set to zero by default
127 uint8_t INIT_EstRateThreshP0_U8[7]; // 4, 8, 12, 16, 20, 24, 28
128 uint8_t INIT_EstRateThreshB0_U8[7]; // 4, 8, 12, 16, 20, 24, 28
129 uint8_t INIT_EstRateThreshI0_U8[7]; // 4, 8, 12, 16, 20, 24, 28
130 uint8_t INIT_FracQPEnable_U8; // ExtendedRhoDomainEn from par file
131 uint8_t INIT_ScenarioInfo_U8; // 0: UNKNOWN, 1: DISPLAYREMOTING, 2: VIDEOCONFERENCE, 3: ARCHIVE, 4: LIVESTREAMING.
132 uint8_t INIT_StaticRegionStreamIn_U8; // should be programmed from par file
133 uint8_t INIT_DeltaQP_Adaptation_U8; // =1, should be programmed from par file
134 uint8_t INIT_MaxCRFQualityFactor_U8; // =52, should be programmed from par file
135 uint8_t INIT_CRFQualityFactor_U8; // =25, should be programmed from par file
136 uint8_t INIT_BotQPDeltaThrForAdapt2Pass_U8; // =1. QP Delta threshold for second pass.
137 uint8_t INIT_SlidingWindowSize_U8; // =30, the window size (in frames) used to compute bit rate
138 uint8_t INIT_SlidingWidowRCEnable_U8; // =0, sliding window based rate control (SWRC) disabled, 1: enabled
139 uint8_t INIT_SlidingWindowMaxRateRatio_U8; // =120, ratio between the max rate within the window and average target bitrate
140 uint8_t INIT_LowDelayGoldenFrameBoost_U8; // only for lowdelay mode, 0 (default): no boost for I and scene change frames, 1: boost
141 uint8_t INIT_AdaptiveCostEnable_U8; // 0: disabled, 1: enabled
142 uint8_t INIT_AdaptiveHMEExtensionEnable_U8; // 0: disabled, 1: enabled
143 uint8_t INIT_ICQReEncode_U8; // 0: disabled, 1: enabled
144 uint8_t INIT_LookaheadDepth_U8; // Lookahead depth in unit of frames [0, 127]
145 uint8_t INIT_SinglePassOnly; // 0: disabled, 1: enabled
146 uint8_t INIT_New_DeltaQP_Adaptation_U8; // = 1 to enable new delta QP adaption
147 uint8_t RSVD2[55]; // must be zero
148 };
149
150 struct CodechalVdencAvcStateG11::BrcUpdateDmem
151 {
152 uint8_t BRCFunc_U8; // =1 for Update, other values are reserved for future use
153 uint8_t RSVD[3];
154 uint32_t UPD_TARGETSIZE_U32; // refer to AVC BRC for calculation
155 uint32_t UPD_FRAMENUM_U32; // frame number
156 uint32_t UPD_PeakTxBitsPerFrame_U32; // current global target bits - previous global target bits (global target bits += input bits per frame)
157 uint32_t UPD_FrameBudget_U32; // target time counter
158 uint32_t FrameByteCount; // PAK output via MMIO
159 uint32_t TimingBudgetOverflow; // PAK output via MMIO
160 uint32_t ImgStatusCtrl; // PAK output via MMIO
161 uint32_t IPCMNonConformant; // PAK output via MMIO
162
163 uint16_t UPD_startGAdjFrame_U16[4]; // 10, 50, 100, 150
164 uint16_t UPD_MBBudget_U16[52]; // MB bugdet for QP 0 � 51.
165 uint16_t UPD_SLCSZ_TARGETSLCSZ_U16; // target slice size
166 uint16_t UPD_SLCSZ_UPD_THRDELTAI_U16[42]; // slice size threshold delta for I frame
167 uint16_t UPD_SLCSZ_UPD_THRDELTAP_U16[42]; // slice size threshold delta for P frame
168 uint16_t UPD_NumOfFramesSkipped_U16; // Recording how many frames have been skipped.
169 uint16_t UPD_SkipFrameSize_U16; // Recording the skip frame size for one frame. =NumMBs * 1, assuming one bit per mb for skip frame.
170 uint16_t UPD_StaticRegionPct_U16; // One entry, recording the percentage of static region
171 uint8_t UPD_gRateRatioThreshold_U8[7]; // 80,95,99,101,105,125,160
172 uint8_t UPD_CurrFrameType_U8; // I frame: 2; P frame: 0; B frame: 1.
173 uint8_t UPD_startGAdjMult_U8[5]; // 1, 1, 3, 2, 1
174 uint8_t UPD_startGAdjDiv_U8[5]; // 40, 5, 5, 3, 1
175 uint8_t UPD_gRateRatioThresholdQP_U8[8]; // 253,254,255,0,1,1,2,3
176 uint8_t UPD_PAKPassNum_U8; // current pak pass number
177 uint8_t UPD_MaxNumPass_U8; // 2
178 uint8_t UPD_SceneChgWidth_U8[2]; // set both to MIN((NumP + 1) / 5, 6)
179 uint8_t UPD_SceneChgDetectEn_U8; // Enable scene change detection
180 uint8_t UPD_SceneChgPrevIntraPctThreshold_U8; // =96. scene change previous intra percentage threshold
181 uint8_t UPD_SceneChgCurIntraPctThreshold_U8; // =192. scene change current intra percentage threshold
182 uint8_t UPD_IPAverageCoeff_U8; // lowdelay ? 0 : 128
183 uint8_t UPD_MinQpAdjustment_U8; // Minimum QP increase step
184 uint8_t UPD_TimingBudgetCheck_U8; // Flag indicating if kernel will check timing budget.
185 int8_t reserved_I8[4]; // must be zero
186 uint8_t UPD_CQP_QpValue_U8; // Application specified target QP in BRC_ICQ mode
187 uint8_t UPD_CQP_FracQp_U8; // Application specified fine position in BRC_ICQ mode
188 uint8_t UPD_HMEDetectionEnable_U8; // 0: default, 1: HuC BRC kernel requires information from HME detection kernel output
189 uint8_t UPD_HMECostEnable_U8; // 0: default, 1: driver provides HME cost table
190 uint8_t UPD_DisablePFrame8x8Transform_U8; // 0: enable, 1: disable
191 uint8_t RSVD3; // must be zero
192 uint8_t UPD_ROISource_U8; // =0: disable, 1: ROIMap from HME Static Region or from App dirty rectangle, 2: ROIMap from App
193 uint8_t RSVD4; // must be zero
194 uint16_t UPD_TargetSliceSize_U16; // default: 1498, max target slice size from app DDI
195 uint16_t UPD_MaxNumSliceAllowed_U16; // computed by driver based on level idc
196 uint16_t UPD_SLBB_Size_U16; // second level batch buffer (SLBB) size in bytes, the input buffer will contain two SLBBs A and B, A followed by B, A and B have the same structure.
197 uint16_t UPD_SLBB_B_Offset_U16; // offset in bytes from the beginning of the input buffer, it points to the start of SLBB B, set by driver for skip frame support
198 uint16_t UPD_AvcImgStateOffset_U16; // offset in bytes from the beginning of SLBB A
199 uint16_t reserved_u16;
200 uint32_t NumOfSlice; // PAK output via MMIO
201
202 /* HME distortion based QP adjustment */
203 uint16_t AveHmeDist_U16; // default: 0, in HME detection kernel output
204 uint8_t HmeDistAvailable_U8; // 0: disabled, 1: enabled
205 uint8_t DisableDMA; // default =0, use DMA data transfer; =1, use regular region read/write
206 uint16_t AdditionalFrameSize_U16; // for slice size control improvement
207 uint8_t AddNALHeaderSizeInternally_U8;
208 uint8_t UPD_RoiQpViaForceQp_U8; // HuC does not update StreamIn Buffer, 1: HuC updates StreamIn Buffer
209 uint32_t CABACZeroInsertionSize_U32; // PAK output via MMIO
210 uint32_t MiniFramePaddingSize_U32; // PAK output via MMIO
211 uint16_t UPD_WidthInMB_U16; // width in MB
212 uint16_t UPD_HeightInMB_U16; // height in MB
213 int8_t UPD_ROIQpDelta_I8[8]; // Application specified ROI QP Adjustment for Zone0, Zone1, Zone2 and Zone3, Zone4, Zone5, Zone6 and Zone7.
214
215 //HME--Offset values need to be a multiple of 4 in order to be aligned to the 4x4 HME block for downscaled 4X HME precision and HME--Offset range is [-128,127]
216 int8_t HME0XOffset_I8; // default = 32, Frame level X offset from the co-located (0, 0) location for HME0.
217 int8_t HME0YOffset_I8; // default = 24, Frame level Y offset from the co-located (0, 0) location for HME0.
218 int8_t HME1XOffset_I8; // default = -32, Frame level X offset from the co-located (0, 0) location for HME1.
219 int8_t HME1YOffset_I8; // default = -24, Frame level Y offset from the co-located (0, 0) location for HME1.
220 uint8_t MOTION_ADAPTIVE_G4;
221 uint8_t EnableLookAhead;
222 uint8_t UPD_LA_Data_Offset_U8;
223 uint8_t UPD_CQMEnabled_U8; // 0 indicates CQM is disabled for current frame; otherwise CQM is enabled.
224 uint32_t UPD_LA_TargetSize_U32; // target frame size in lookahead BRC (if EnableLookAhead == 1) or TCBRC mode. If zero, lookahead BRC or TCBRC is disabled.
225 uint32_t UPD_LA_TargetFulness_U32; // target VBV buffer fulness in lookahead BRC mode (if EnableLookAhead == 1).
226 uint8_t UPD_Delta_U8; // delta QP of pyramid
227 uint8_t UPD_ROM_CURRENT_U8; // ROM average of current frame
228 uint8_t UPD_ROM_ZERO_U8; // ROM zero percentage (255 is 100%)
229 uint8_t UPD_TCBRC_SCENARIO_U8;
230 uint8_t RSVD2[12];
231 };
232
233 // CURBE for Static Frame Detection kernel
234 class CodechalVdencAvcStateG11::SfdCurbe
235 {
236 public:
237 union
238 {
239 struct
240 {
241 uint32_t VDEncModeDisable : MOS_BITFIELD_BIT(0);
242 uint32_t BRCModeEnable : MOS_BITFIELD_BIT(1);
243 uint32_t SliceType : MOS_BITFIELD_RANGE(2, 3);
244 uint32_t : MOS_BITFIELD_BIT(4);
245 uint32_t StreamInType : MOS_BITFIELD_RANGE(5, 8);
246 uint32_t EnableAdaptiveMvStreamIn : MOS_BITFIELD_BIT(9);
247 uint32_t : MOS_BITFIELD_BIT(10);
248 uint32_t EnableIntraCostScalingForStaticFrame: MOS_BITFIELD_BIT(11);
249 uint32_t Reserved : MOS_BITFIELD_RANGE(12, 31);
250 };
251 struct
252 {
253 uint32_t Value;
254 };
255 } m_dw0;
256
257 union
258 {
259 struct
260 {
261 uint32_t QPValue : MOS_BITFIELD_RANGE(0, 7);
262 uint32_t NumOfRefs : MOS_BITFIELD_RANGE(8, 15);
263 uint32_t HMEStreamInRefCost : MOS_BITFIELD_RANGE(16, 23);
264 uint32_t Reserved : MOS_BITFIELD_RANGE(24, 31);
265 };
266 struct
267 {
268 uint32_t Value;
269 };
270 } m_dw1;
271
272 union
273 {
274 struct
275 {
276 uint32_t FrameWidthInMBs : MOS_BITFIELD_RANGE(0, 15); // round-up to 4-MB aligned
277 uint32_t FrameHeightInMBs : MOS_BITFIELD_RANGE(16, 31); // round-up to 4-MB aligned
278 };
279 struct
280 {
281 uint32_t Value;
282 };
283 } m_dw2;
284
285 union
286 {
287 struct
288 {
289 uint32_t LargeMvThresh : MOS_BITFIELD_RANGE(0, 31);
290 };
291 struct
292 {
293 uint32_t Value;
294 };
295 } m_dw3;
296
297 union
298 {
299 struct
300 {
301 uint32_t TotalLargeMvThreshold : MOS_BITFIELD_RANGE(0, 31);
302 };
303 struct
304 {
305 uint32_t Value;
306 };
307 } m_dw4;
308
309 union
310 {
311 struct
312 {
313 uint32_t ZMVThreshold : MOS_BITFIELD_RANGE(0, 31);
314 };
315 struct
316 {
317 uint32_t Value;
318 };
319 } m_dw5;
320
321 union
322 {
323 struct
324 {
325 uint32_t TotalZMVThreshold : MOS_BITFIELD_RANGE(0, 31);
326 };
327 struct
328 {
329 uint32_t Value;
330 };
331 } m_dw6;
332
333 union
334 {
335 struct
336 {
337 uint32_t MinDistThreshold : MOS_BITFIELD_RANGE(0, 31);
338 };
339 struct
340 {
341 uint32_t Value;
342 };
343 } m_dw7;
344
345 uint8_t m_costTable[52];
346
347 union
348 {
349 struct
350 {
351 uint32_t ActualWidthInMB : MOS_BITFIELD_RANGE(0, 15);
352 uint32_t ActualHeightInMB : MOS_BITFIELD_RANGE(16, 31);
353 };
354 struct
355 {
356 uint32_t Value;
357 };
358 } m_dw21;
359
360 union
361 {
362 struct
363 {
364 uint32_t Reserved : MOS_BITFIELD_RANGE(0, 31);
365 };
366 struct
367 {
368 uint32_t Value;
369 };
370 } m_dw22;
371
372 union
373 {
374 struct
375 {
376 uint32_t Reserved : MOS_BITFIELD_RANGE(0, 31);
377 };
378 struct
379 {
380 uint32_t Value;
381 };
382 } m_dw23;
383
384 union
385 {
386 struct
387 {
388 uint32_t VDEncInputImagStateIndex : MOS_BITFIELD_RANGE(0, 31); // used in VDEnc CQP mode
389 };
390 struct
391 {
392 uint32_t Value;
393 };
394 } m_dw24;
395
396 union
397 {
398 struct
399 {
400 uint32_t Reserved : MOS_BITFIELD_RANGE(0, 31);
401 };
402 struct
403 {
404 uint32_t Value;
405 };
406 } m_dw25;
407
408 union
409 {
410 struct
411 {
412 uint32_t MVDataSurfaceIndex : MOS_BITFIELD_RANGE(0, 31); // contains HME MV Data generated by HME kernel
413 };
414 struct
415 {
416 uint32_t Value;
417 };
418 } m_dw26;
419
420 union
421 {
422 struct
423 {
424 uint32_t InterDistortionSurfaceIndex : MOS_BITFIELD_RANGE(0, 31); // contains HME Inter Distortion generated by HME kernel
425 };
426 struct
427 {
428 uint32_t Value;
429 };
430 } m_dw27;
431
432 union
433 {
434 struct
435 {
436 uint32_t OutputDataSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
437 };
438 struct
439 {
440 uint32_t Value;
441 };
442 } m_dw28;
443
444 union
445 {
446 struct
447 {
448 uint32_t VDEncOutputImagStateIndex : MOS_BITFIELD_RANGE(0, 31);
449 };
450 struct
451 {
452 uint32_t Value;
453 };
454 } m_dw29;
455
SfdCurbe()456 SfdCurbe()
457 {
458 m_dw0.Value = 0;
459 m_dw1.Value = 0;
460 m_dw2.Value = 0;
461 m_dw3.Value = 0;
462 m_dw4.Value = 0;
463 m_dw5.Value = 0;
464 m_dw6.Value = 0;
465 m_dw7.Value = 0;
466 m_dw21.Value = 0;
467 m_dw22.Value = 0;
468 m_dw23.Value = 0;
469 m_dw24.Value = 0;
470 m_dw25.Value = 0;
471 m_dw26.Value = 0;
472 m_dw27.Value = 0;
473 m_dw28.Value = 0;
474 m_dw29.Value = 0;
475
476 for (uint8_t i = 0; i < 52; i++)
477 {
478 m_costTable[i] = 0;
479 }
480 };
481 };
482
483 enum SfdBindingTableOffset
484 {
485 sfdVdencInputImageState = 0,
486 sfdMvDataSurface = 1,
487 sfdInterDistortionSurface = 2,
488 sfdOutputDataSurface = 3,
489 sfdVdencOutputImageState = 4,
490 sfdNumSurfaces = 5
491 };
492
493 const uint32_t CodechalVdencAvcStateG11::m_mvCostSkipBiasQPel[3][8] =
494 {
495 // for normal case
496 { 0, 6, 6, 9, 10, 13, 14, 16 },
497 // for QP = 47,48,49
498 { 0, 6, 6, 6, 6, 7, 8, 8 },
499 // for QP = 50,51
500 { 0, 6, 6, 6, 6, 7, 7, 7 }
501 };
502
503 const uint32_t CodechalVdencAvcStateG11::m_hmeCostDisplayRemote[8][CODEC_AVC_NUM_QP] =
504 {
505 //mv=0
506 {
507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13 ~25]
509 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26 ~38]
510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 //QP=[39 ~51]
511 },
512 //mv<=16
513 {
514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13 ~25]
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26 ~38]
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 //QP=[39 ~51]
518 },
519 //mv<=32
520 {
521 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[0 ~12]
522 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[13 ~25]
523 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[26 ~38]
524 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 //QP=[39 ~51]
525 },
526 //mv<=64
527 {
528 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
529 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[13 ~25]
530 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[26 ~38]
531 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 //QP=[39 ~51]
532 },
533 //mv<=128
534 {
535 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[0 ~12]
536 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[13 ~25]
537 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[26 ~38]
538 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 //QP=[39 ~51]
539 },
540 //mv<=256
541 {
542 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[0 ~12]
543 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[13 ~25]
544 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[26 ~38]
545 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 //QP=[39 ~51]
546 },
547 //mv<=512
548 {
549 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[0 ~12]
550 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[13 ~25]
551 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[26 ~38]
552 20, 20, 20, 20, 20, 30, 30, 30, 30, 30, 30, 30, 30 //QP=[39 ~51]
553 },
554 //mv<=1024
555 {
556 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[0 ~12]
557 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[13 ~25]
558 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[26 ~38]
559 20, 20, 20, 30, 40, 50, 50, 50, 50, 50, 50, 50, 50 //QP=[39 ~51]
560 }
561 };
562
563 const uint32_t CodechalVdencAvcStateG11::m_hmeCost[8][CODEC_AVC_NUM_QP] =
564 {
565 //mv=0
566 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13 ~25]
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26 ~38]
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 //QP=[39 ~51]
570 },
571 //mv<=16
572 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13 ~25]
574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26 ~38]
575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 //QP=[39 ~51]
576 },
577 //mv<=32
578 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[0 ~12]
579 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[13 ~25]
580 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[26 ~38]
581 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 //QP=[39 ~51]
582 },
583 //mv<=64
584 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
585 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[13 ~25]
586 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[26 ~38]
587 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 //QP=[39 ~51]
588 },
589 //mv<=128
590 { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[0 ~12]
591 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[13 ~25]
592 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[26 ~38]
593 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 //QP=[39 ~51]
594 },
595 //mv<=256
596 { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[0 ~12]
597 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[13 ~25]
598 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[26 ~38]
599 10, 10, 10, 10, 20, 30, 40, 50, 50, 50, 50, 50, 50 //QP=[39 ~51]
600 },
601 //mv<=512
602 { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[0 ~12]
603 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[13 ~25]
604 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[26 ~38]
605 20, 20, 20, 40, 60, 80, 100, 100, 100, 100, 100, 100, 100 //QP=[39 ~51]
606 },
607 //mv<=1024
608 { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[0 ~12]
609 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[13 ~25]
610 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[26 ~38]
611 20, 20, 30, 50, 100, 200, 200, 200, 200, 200, 200, 200, 200 //QP=[39 ~51]
612 }
613 };
614
615 const int8_t CodechalVdencAvcStateG11::m_brcInitDistQpDeltaI8[4] =
616 {
617 0, 0, 0, 0
618 };
619
620 const int8_t CodechalVdencAvcStateG11::m_brcInitDistQpDeltaI8LowDelay[4] =
621 {
622 -5, -2, 2, 5
623 };
624
GetKernelHeaderAndSize(void * binary,EncOperation operation,uint32_t krnStateIdx,void * krnHeader,uint32_t * krnSize)625 MOS_STATUS CodechalVdencAvcStateG11::GetKernelHeaderAndSize(
626 void *binary,
627 EncOperation operation,
628 uint32_t krnStateIdx,
629 void *krnHeader,
630 uint32_t *krnSize)
631 {
632 CODECHAL_ENCODE_FUNCTION_ENTER;
633
634 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
635
636 CODECHAL_ENCODE_CHK_NULL_RETURN(binary);
637 CODECHAL_ENCODE_CHK_NULL_RETURN(krnHeader);
638 CODECHAL_ENCODE_CHK_NULL_RETURN(krnSize);
639
640 auto kernelHeaderTable = (KernelHeader *)binary;
641 PCODECHAL_KERNEL_HEADER invalidEntry = &(kernelHeaderTable->m_weightedPrediction) + 1;
642 PCODECHAL_KERNEL_HEADER nextKrnHeader = nullptr;
643 PCODECHAL_KERNEL_HEADER currKrnHeader = nullptr;
644
645 if (operation == ENC_BRC)
646 {
647 currKrnHeader = &kernelHeaderTable->m_initFrameBrc;
648 }
649 else if (operation == ENC_MBENC)
650 {
651 currKrnHeader = &kernelHeaderTable->m_mbEncQltyI;
652 }
653 else if (operation == ENC_MBENC_ADV)
654 {
655 currKrnHeader = &kernelHeaderTable->m_mbEncAdvI;
656 }
657 else if (operation == ENC_WP)
658 {
659 currKrnHeader = &kernelHeaderTable->m_weightedPrediction;
660 }
661 else
662 {
663 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
664 return MOS_STATUS_INVALID_PARAMETER;
665 }
666
667 currKrnHeader += krnStateIdx;
668 *((PCODECHAL_KERNEL_HEADER)krnHeader) = *currKrnHeader;
669
670 nextKrnHeader = (currKrnHeader + 1);
671 uint32_t nextKrnOffset = *krnSize;
672 if (nextKrnHeader < invalidEntry)
673 {
674 nextKrnOffset = nextKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT;
675 }
676 *krnSize = nextKrnOffset - (currKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
677
678 return eStatus;
679 }
680
CodechalVdencAvcStateG11(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)681 CodechalVdencAvcStateG11::CodechalVdencAvcStateG11(
682 CodechalHwInterface * hwInterface,
683 CodechalDebugInterface *debugInterface,
684 PCODECHAL_STANDARD_INFO standardInfo) : CodechalVdencAvcState(hwInterface, debugInterface, standardInfo),
685 m_sinlgePipeVeState(nullptr)
686 {
687 CODECHAL_ENCODE_FUNCTION_ENTER;
688
689 CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_osInterface);
690
691 #if defined(ENABLE_KERNELS)
692 m_kernelBase = (uint8_t*)IGCODECKRN_G11;
693 #endif
694 m_cmKernelEnable = true;
695 m_mbStatsSupported = true; //Starting from GEN9
696
697 pfnGetKernelHeaderAndSize = CodechalVdencAvcStateG11::GetKernelHeaderAndSize;
698
699 m_vdencBrcInitDmemBufferSize = sizeof(BrcInitDmem);
700 m_vdencBrcUpdateDmemBufferSize = sizeof(BrcUpdateDmem);
701 m_vdencBrcNumOfSliceOffset = CODECHAL_OFFSETOF(BrcUpdateDmem, NumOfSlice);
702
703 // Virtual Engine is enabled in default.
704 Mos_SetVirtualEngineSupported(m_osInterface, true);
705
706 m_vdboxOneDefaultUsed = true;
707 m_nonNativeBrcRoiSupported = true;
708 m_brcAdaptiveRegionBoostSupported = true;
709
710 m_hmeSupported = true;
711 m_16xMeSupported = true;
712 m_32xMeSupported = true;
713
714 m_osInterface->pfnVirtualEngineSupported(m_osInterface, false, true);
715
716 CODECHAL_DEBUG_TOOL(
717 CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_encodeParState = MOS_New(CodechalDebugEncodeParG11, this));
718 CreateAvcPar();
719 )
720 }
721
~CodechalVdencAvcStateG11()722 CodechalVdencAvcStateG11::~CodechalVdencAvcStateG11()
723 {
724 CODECHAL_ENCODE_FUNCTION_ENTER;
725
726 if (m_sinlgePipeVeState)
727 {
728 MOS_FreeMemAndSetNull(m_sinlgePipeVeState);
729 }
730
731 CODECHAL_DEBUG_TOOL(
732 DestroyAvcPar();
733 MOS_Delete(m_encodeParState);
734 )
735 }
736
InitializeState()737 MOS_STATUS CodechalVdencAvcStateG11::InitializeState()
738 {
739 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
740
741 CODECHAL_ENCODE_FUNCTION_ENTER;
742
743 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencAvcState::InitializeState());
744
745 m_sliceSizeStreamoutSupported = true;
746 m_useHwScoreboard = false;
747 m_useCommonKernel = true;
748
749 if (MOS_VE_SUPPORTED(m_osInterface))
750 {
751 m_sinlgePipeVeState = (PCODECHAL_ENCODE_SINGLEPIPE_VIRTUALENGINE_STATE)MOS_AllocAndZeroMemory(sizeof(CODECHAL_ENCODE_SINGLEPIPE_VIRTUALENGINE_STATE));
752 CODECHAL_ENCODE_CHK_NULL_RETURN(m_sinlgePipeVeState);
753 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeSinglePipeVE_InitInterface(m_hwInterface, m_sinlgePipeVeState));
754 }
755
756 return eStatus;
757 }
758
SetAndPopulateVEHintParams(PMOS_COMMAND_BUFFER cmdBuffer)759 MOS_STATUS CodechalVdencAvcStateG11::SetAndPopulateVEHintParams(
760 PMOS_COMMAND_BUFFER cmdBuffer)
761 {
762 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
763
764 CODECHAL_ENCODE_FUNCTION_ENTER;
765
766 if (!MOS_VE_SUPPORTED(m_osInterface))
767 {
768 return eStatus;
769 }
770
771 if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
772 {
773 MOS_VIRTUALENGINE_SET_PARAMS vesetParams;
774 MOS_ZeroMemory(&vesetParams, sizeof(vesetParams));
775 vesetParams.bNeedSyncWithPrevious = true;
776 vesetParams.bSFCInUse = false;
777 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeSinglePipeVE_SetHintParams(m_sinlgePipeVeState, &vesetParams));
778 }
779 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeSinglePipeVE_PopulateHintParams(m_sinlgePipeVeState, cmdBuffer, true));
780
781 return eStatus;
782 }
783
784
SetGpuCtxCreatOption()785 MOS_STATUS CodechalVdencAvcStateG11::SetGpuCtxCreatOption()
786 {
787 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
788
789 CODECHAL_ENCODE_FUNCTION_ENTER;
790
791 if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
792 {
793 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SetGpuCtxCreatOption());
794 }
795 else
796 {
797 m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS_ENHANCED);
798 CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt);
799
800 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeSinglePipeVE_ConstructParmsForGpuCtxCreation(
801 m_sinlgePipeVeState,
802 (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
803 }
804
805 return eStatus;
806 }
807
UserFeatureKeyReport()808 MOS_STATUS CodechalVdencAvcStateG11::UserFeatureKeyReport()
809 {
810 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
811
812 CODECHAL_ENCODE_FUNCTION_ENTER;
813
814 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencAvcState::UserFeatureKeyReport());
815
816 #if (_DEBUG || _RELEASE_INTERNAL)
817
818 // VE2.0 Reporting
819 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENABLE_ENCODE_VE_CTXSCHEDULING_ID, MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface), m_osInterface->pOsContext);
820
821 #endif // _DEBUG || _RELEASE_INTERNAL
822 return eStatus;
823 }
824
ExecuteSliceLevel()825 MOS_STATUS CodechalVdencAvcStateG11::ExecuteSliceLevel()
826 {
827 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
828
829 CODECHAL_ENCODE_FUNCTION_ENTER;
830
831 CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
832
833 auto cpInterface = m_hwInterface->GetCpInterface();
834 auto avcSlcParams = m_avcSliceParams;
835 auto avcPicParams = m_avcPicParams[avcSlcParams->pic_parameter_set_id];
836 auto avcSeqParams = m_avcSeqParams[avcPicParams->seq_parameter_set_id];
837 auto slcData = m_slcData;
838
839 // *** Temporarily commented until ULT fully support multislice ROW mode
840
841 // For use with the single task phase implementation
842 //if (m_sliceStructCaps != CODECHAL_SLICE_STRUCT_ARBITRARYMBSLICE)
843 //{
844 // uint32_t numSlc = (m_frameFieldHeightInMb + m_sliceHeight - 1) / m_sliceHeight;
845
846 // if (numSlc != m_numSlices)
847 // {
848 // return MOS_STATUS_INVALID_PARAMETER;
849 // }
850 //}
851
852 bool useBatchBufferForPakSlices = false;
853 if (m_singleTaskPhaseSupported && m_singleTaskPhaseSupportedInPak)
854 {
855 if (m_currPass == 0)
856 {
857 // The same buffer is used for all slices for all passes.
858 uint32_t batchBufferForPakSlicesSize =
859 (m_numPasses + 1) * m_numSlices * m_pakSliceSize;
860 if (batchBufferForPakSlicesSize >
861 (uint32_t)m_batchBufferForPakSlices[m_currRecycledBufIdx].iSize)
862 {
863 if (m_batchBufferForPakSlices[m_currRecycledBufIdx].iSize)
864 {
865 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReleaseBatchBufferForPakSlices(m_currRecycledBufIdx));
866 }
867
868 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBatchBufferForPakSlices(
869 m_numSlices,
870 m_numPasses,
871 m_currRecycledBufIdx));
872 }
873 }
874 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_LockBb(
875 m_osInterface,
876 &m_batchBufferForPakSlices[m_currRecycledBufIdx]));
877 useBatchBufferForPakSlices = true;
878 }
879
880 MOS_COMMAND_BUFFER cmdBuffer;
881 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
882
883 if (m_osInterface->osCpInterface->IsCpEnabled())
884 {
885 MHW_CP_SLICE_INFO_PARAMS sliceInfoParam;
886 sliceInfoParam.bLastPass = (m_currPass == m_numPasses) ? true : false;
887 CODECHAL_ENCODE_CHK_STATUS_RETURN(cpInterface->SetMfxProtectionState(false, &cmdBuffer, nullptr, &sliceInfoParam));
888
889 CODECHAL_ENCODE_CHK_STATUS_RETURN(cpInterface->UpdateParams(false));
890 }
891
892 avcSlcParams = m_avcSliceParams;
893
894 CODECHAL_ENCODE_AVC_PACK_SLC_HEADER_PARAMS packSlcHeaderParams;
895 packSlcHeaderParams.pBsBuffer = &m_bsBuffer;
896 packSlcHeaderParams.pPicParams = avcPicParams;
897 packSlcHeaderParams.pSeqParams = m_avcSeqParam;
898 packSlcHeaderParams.ppRefList = &(m_refList[0]);
899 packSlcHeaderParams.CurrPic = m_currOriginalPic;
900 packSlcHeaderParams.CurrReconPic = m_currReconstructedPic;
901 packSlcHeaderParams.UserFlags = m_userFlags;
902 packSlcHeaderParams.NalUnitType = m_nalUnitType;
903 packSlcHeaderParams.wPictureCodingType = m_pictureCodingType;
904 packSlcHeaderParams.bVdencEnabled = true;
905
906 MHW_VDBOX_AVC_SLICE_STATE sliceState;
907 MOS_ZeroMemory(&sliceState, sizeof(sliceState));
908 sliceState.presDataBuffer = &m_resMbCodeSurface;
909 sliceState.pAvcPicIdx = &(m_picIdx[0]);
910 sliceState.pEncodeAvcSeqParams = m_avcSeqParam;
911 sliceState.pEncodeAvcPicParams = avcPicParams;
912 sliceState.pBsBuffer = &m_bsBuffer;
913 sliceState.ppNalUnitParams = m_nalUnitParams;
914 sliceState.bBrcEnabled = false;
915 // Disable Panic mode when min/max QP control is on. kernel may disable it, but disable in driver also.
916 sliceState.bRCPanicEnable = m_panicEnable && (!m_minMaxQpControlEnabled);
917 sliceState.bAcceleratorHeaderPackingCaps = m_encodeParams.bAcceleratorHeaderPackingCaps;
918 sliceState.wFrameFieldHeightInMB = m_frameFieldHeightInMb;
919
920 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
921 for (uint16_t slcCount = 0; slcCount < m_numSlices; slcCount++)
922 {
923 if (m_currPass == 0)
924 {
925 packSlcHeaderParams.pAvcSliceParams = &avcSlcParams[slcCount];
926 if (m_acceleratorHeaderPackingCaps)
927 {
928 slcData[slcCount].SliceOffset = m_bsBuffer.SliceOffset;
929 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalAvcEncode_PackSliceHeader(&packSlcHeaderParams));
930 slcData[slcCount].BitSize = m_bsBuffer.BitSize;
931 }
932 if (m_sliceStructCaps != CODECHAL_SLICE_STRUCT_ARBITRARYMBSLICE)
933 {
934 slcData[slcCount].CmdOffset = slcCount * m_sliceHeight * m_picWidthInMb * 16 * 4;
935 }
936 else
937 {
938 slcData[slcCount].CmdOffset = packSlcHeaderParams.pAvcSliceParams->first_mb_in_slice * 16 * 4;
939 }
940 }
941
942 sliceState.pEncodeAvcSliceParams = &avcSlcParams[slcCount];
943 sliceState.dwDataBufferOffset =
944 m_slcData[slcCount].CmdOffset + m_mbcodeBottomFieldOffset;
945 sliceState.dwOffset = slcData[slcCount].SliceOffset;
946 sliceState.dwLength = slcData[slcCount].BitSize;
947 sliceState.uiSkipEmulationCheckCount = slcData[slcCount].SkipEmulationByteCount;
948 sliceState.dwSliceIndex = (uint32_t)slcCount;
949 sliceState.bFirstPass = (m_currPass == 0);
950 sliceState.bLastPass = (m_currPass == m_numPasses);
951 sliceState.bInsertBeforeSliceHeaders = (slcCount == 0);
952 sliceState.bVdencInUse = true;
953 // App handles tail insertion for VDEnc dynamic slice in non-cp case
954 sliceState.bVdencNoTailInsertion = m_vdencNoTailInsertion;
955
956 uint32_t batchBufferForPakSlicesStartOffset =
957 (uint32_t)m_batchBufferForPakSlices[m_currRecycledBufIdx].iCurrent;
958
959 if (useBatchBufferForPakSlices)
960 {
961 sliceState.pBatchBufferForPakSlices =
962 &m_batchBufferForPakSlices[m_currRecycledBufIdx];
963 sliceState.bSingleTaskPhaseSupported = true;
964 sliceState.dwBatchBufferForPakSlicesStartOffset = batchBufferForPakSlicesStartOffset;
965 }
966
967 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRounding(m_avcRoundingParams, &sliceState));
968
969 sliceState.oneOnOneMapping = m_oneOnOneMapping;
970 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendSlice(&cmdBuffer, &sliceState));
971
972 // Report slice size
973 if (m_presMetadataBuffer != nullptr)
974 {
975 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReportSliceSizeMetaData(m_presMetadataBuffer, &cmdBuffer, slcCount));
976 }
977
978 // Add dumps for 2nd level batch buffer
979 if (sliceState.bSingleTaskPhaseSupported && !sliceState.bVdencInUse)
980 {
981 CODECHAL_ENCODE_CHK_NULL_RETURN(sliceState.pBatchBufferForPakSlices);
982
983 CODECHAL_DEBUG_TOOL(
984 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->Dump2ndLvlBatch(
985 sliceState.pBatchBufferForPakSlices,
986 CODECHAL_MEDIA_STATE_ENC_NORMAL,
987 nullptr));
988 )
989 }
990
991 // For SKL, only the 1st slice state should be programmed for VDENC
992 if (!m_hwInterface->m_isVdencSuperSliceEnabled)
993 {
994 break;
995 }
996 else // For CNL slice state is programmed per Super slice
997 {
998 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
999 // MfxPipeDone should be set for all super slices except the last super slice and should not be set for tail insertion.
1000 vdPipelineFlushParams.Flags.bWaitDoneMFX =
1001 (slcCount == (m_numSlices)-1) ? ((m_lastPicInStream || m_lastPicInSeq) ? 0 : 1) : 1;
1002 vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1;
1003 vdPipelineFlushParams.Flags.bFlushVDENC = 1;
1004 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
1005
1006 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
1007
1008 //Do not send MI_FLUSH for last Super slice now
1009 if (slcCount != ((m_numSlices)-1))
1010 {
1011 // Send MI_FLUSH for every Super slice
1012 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
1013 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
1014 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
1015 &cmdBuffer,
1016 &flushDwParams));
1017 }
1018 }
1019 }
1020
1021 if (useBatchBufferForPakSlices)
1022 {
1023 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_UnlockBb(
1024 m_osInterface,
1025 &m_batchBufferForPakSlices[m_currRecycledBufIdx],
1026 m_lastTaskInPhase));
1027 }
1028
1029 //Send VDENC WALKER cmd per every frame for SKL
1030 if (!m_hwInterface->m_isVdencSuperSliceEnabled)
1031 {
1032 PMHW_VDBOX_VDENC_WALKER_STATE_PARAMS vdencWalkerStateParams = CreateMhwVdboxVdencWalkerStateParams();
1033 CODECHAL_ENCODE_CHK_NULL_RETURN(vdencWalkerStateParams);
1034 vdencWalkerStateParams->Mode = CODECHAL_ENCODE_MODE_AVC;
1035 vdencWalkerStateParams->pAvcSeqParams = avcSeqParams;
1036 vdencWalkerStateParams->pAvcSlcParams = avcSlcParams;
1037 eStatus = m_vdencInterface->AddVdencWalkerStateCmd(&cmdBuffer, vdencWalkerStateParams);
1038 MOS_Delete(vdencWalkerStateParams);
1039 CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
1040
1041 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
1042 // MFXPipeDone should not be set for tail insertion
1043 vdPipelineFlushParams.Flags.bWaitDoneMFX =
1044 (m_lastPicInStream || m_lastPicInSeq) ? 0 : 1;
1045 vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1;
1046 vdPipelineFlushParams.Flags.bFlushVDENC = 1;
1047 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
1048
1049 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
1050 }
1051
1052 // Insert end of sequence/stream if set
1053 if (m_lastPicInStream || m_lastPicInSeq)
1054 {
1055 MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
1056 MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
1057 pakInsertObjectParams.bLastPicInSeq = m_lastPicInSeq;
1058 pakInsertObjectParams.bLastPicInStream = m_lastPicInStream;
1059 pakInsertObjectParams.dwBitSize = 32; // use dwBitSize for SrcDataEndingBitInclusion
1060 if (m_lastPicInSeq)
1061 {
1062 pakInsertObjectParams.dwLastPicInSeqData = (uint32_t)((1 << 16) | CODECHAL_ENCODE_AVC_NAL_UT_EOSEQ << 24);
1063 }
1064 if (m_lastPicInStream)
1065 {
1066 pakInsertObjectParams.dwLastPicInStreamData = (uint32_t)((1 << 16) | CODECHAL_ENCODE_AVC_NAL_UT_EOSTREAM << 24);
1067 }
1068 pakInsertObjectParams.bHeaderLengthExcludeFrmSize = true;
1069 if (pakInsertObjectParams.bEmulationByteBitsInsert)
1070 {
1071 //Does not matter here, but keeping for consistency
1072 CODECHAL_ENCODE_ASSERTMESSAGE("The emulation prevention bytes are not inserted by the app and are requested to be inserted by HW.");
1073 }
1074 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mfxInterface->AddMfxPakInsertObject(&cmdBuffer, nullptr, &pakInsertObjectParams));
1075 }
1076
1077 if (m_hwInterface->m_isVdencSuperSliceEnabled)
1078 {
1079 // Send MI_FLUSH with bVideoPipelineCacheInvalidate set to true for last Super slice
1080 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
1081 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
1082 flushDwParams.bVideoPipelineCacheInvalidate = true;
1083 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
1084 &cmdBuffer,
1085 &flushDwParams));
1086 }
1087
1088 #if defined(ENABLE_KERNELS)
1089 // On-demand sync for VDEnc StreamIn surface and CSC surface
1090 if (m_currPass == 0)
1091 {
1092 if (m_cscDsState->RequireCsc())
1093 {
1094 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->WaitCscSurface(m_videoContext, true));
1095 }
1096
1097 if (m_16xMeSupported)
1098 {
1099 auto syncParams = g_cInitSyncParams;
1100 syncParams.GpuContext = m_videoContext;
1101 syncParams.bReadOnly = true;
1102 syncParams.presSyncResource = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
1103 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
1104 m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams);
1105 }
1106 }
1107 #endif
1108
1109 // Prepare MetaData
1110 if (m_presMetadataBuffer != nullptr)
1111 {
1112 CODECHAL_ENCODE_CHK_STATUS_RETURN(PrepareHWMetaData(m_presMetadataBuffer, &m_pakSliceSizeStreamoutBuffer, &cmdBuffer));
1113 }
1114
1115 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadMfcStatus(&cmdBuffer));
1116
1117 if (m_vdencBrcEnabled)
1118 {
1119 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreNumPasses(
1120 &(m_encodeStatusBuf),
1121 m_miInterface,
1122 &cmdBuffer,
1123 m_currPass));
1124 }
1125
1126 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
1127
1128 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1129 {
1130 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
1131 }
1132
1133 std::string pak_pass = "PAK_PASS" + std::to_string(static_cast<uint32_t>(m_currPass));
1134 CODECHAL_DEBUG_TOOL(
1135 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
1136 &cmdBuffer,
1137 CODECHAL_NUM_MEDIA_STATES,
1138 pak_pass.data()));
1139
1140 //CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgReplaceAllCommands(
1141 // m_debugInterface,
1142 // &cmdBuffer));
1143 )
1144
1145 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
1146
1147 bool renderingFlags = m_videoContextUsesNullHw;
1148
1149 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1150 {
1151 // Restore TLB allocation
1152 if (MEDIA_IS_WA(m_waTable, WaTlbAllocationForAvcVdenc))
1153 {
1154 CODECHAL_ENCODE_CHK_STATUS_RETURN(RestoreTLBAllocation(&cmdBuffer, &m_vdencTlbMmioBuffer));
1155 }
1156
1157 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(&cmdBuffer));
1158
1159 HalOcaInterface::On1stLevelBBEnd(cmdBuffer, *m_osInterface);
1160 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, renderingFlags));
1161
1162 CODECHAL_DEBUG_TOOL(
1163 if (m_mmcState)
1164 {
1165 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
1166 }
1167 )
1168
1169 if (m_sliceSizeStreamoutSupported)
1170 {
1171 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1172 &m_pakSliceSizeStreamoutBuffer,
1173 CodechalDbgAttr::attrOutput,
1174 "SliceSizeStreamout",
1175 CODECHAL_ENCODE_SLICESIZE_BUF_SIZE,
1176 0,
1177 CODECHAL_NUM_MEDIA_STATES)));
1178 }
1179
1180 if ((m_currPass == m_numPasses) &&
1181 m_signalEnc &&
1182 !Mos_ResourceIsNull(&m_resSyncObjectVideoContextInUse))
1183 {
1184 // Check if the signal obj count exceeds max value
1185 if (m_semaphoreObjCount == MOS_MIN(m_semaphoreMaxCount, MOS_MAX_OBJECT_SIGNALED))
1186 {
1187 auto syncParams = g_cInitSyncParams;
1188 syncParams.GpuContext = m_renderContext;
1189 syncParams.presSyncResource = &m_resSyncObjectVideoContextInUse;
1190
1191 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
1192 m_semaphoreObjCount--;
1193 }
1194
1195 // signal semaphore
1196 auto syncParams = g_cInitSyncParams;
1197 syncParams.GpuContext = m_videoContext;
1198 syncParams.presSyncResource = &m_resSyncObjectVideoContextInUse;
1199
1200 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
1201 m_semaphoreObjCount++;
1202 }
1203 }
1204
1205 CODECHAL_DEBUG_TOOL(
1206 // here add the dump buffer for PAK statistics.
1207 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1208 &m_pakStatsBufferFull[m_currRecycledBufIdx],
1209 CodechalDbgAttr::attrPakOutput,
1210 "MB and FrameLevel PAK staistics vdenc",
1211 m_vdencBrcPakStatsBufferSize + m_picWidthInMb * m_picHeightInMb * 64, //size
1212 0, //offset
1213 CODECHAL_MEDIA_STATE_16X_ME));
1214 )
1215
1216 if (m_vdencBrcEnabled)
1217 {
1218 CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(false));
1219 CODECHAL_DEBUG_TOOL(DumpEncodeImgStats(nullptr));
1220 }
1221
1222 // Reset parameters for next PAK execution
1223 if (m_currPass == m_numPasses)
1224 {
1225 if (!m_singleTaskPhaseSupported)
1226 {
1227 m_osInterface->pfnResetPerfBufferID(m_osInterface);
1228 }
1229
1230 m_newPpsHeader = 0;
1231 m_newSeqHeader = 0;
1232 }
1233
1234 CODECHAL_DEBUG_TOOL(
1235 CODECHAL_ENCODE_CHK_STATUS_RETURN(PopulateSliceStateParam(
1236 m_adaptiveRoundingInterEnable,
1237 &sliceState));
1238
1239 CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpFrameParFile());)
1240
1241 return eStatus;
1242 }
1243
InitKernelStateSFD()1244 MOS_STATUS CodechalVdencAvcStateG11::InitKernelStateSFD()
1245 {
1246 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1247
1248 CODECHAL_ENCODE_FUNCTION_ENTER;
1249
1250 auto renderEngineInterface = m_hwInterface->GetRenderInterface();
1251 auto stateHeapInterface = m_renderEngineInterface->m_stateHeapInterface;
1252 CODECHAL_ENCODE_CHK_NULL_RETURN(stateHeapInterface);
1253
1254 uint8_t* kernelBinary;
1255 uint32_t kernelSize;
1256
1257 MOS_STATUS status = CodecHalGetKernelBinaryAndSize(m_kernelBase, m_kuidCommon, &kernelBinary, &kernelSize);
1258 CODECHAL_ENCODE_CHK_STATUS_RETURN(status);
1259
1260 CODECHAL_KERNEL_HEADER currKrnHeader;
1261 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(
1262 kernelBinary,
1263 ENC_SFD,
1264 0,
1265 (void*)&currKrnHeader,
1266 &kernelSize));
1267
1268 auto kernelStatePtr = m_sfdKernelState;
1269 kernelStatePtr->KernelParams.iBTCount = sfdNumSurfaces;
1270 kernelStatePtr->KernelParams.iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
1271 kernelStatePtr->KernelParams.iCurbeLength = sizeof(SfdCurbe);
1272 kernelStatePtr->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
1273 kernelStatePtr->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
1274 kernelStatePtr->KernelParams.iIdCount = 1;
1275 kernelStatePtr->KernelParams.iInlineDataLength = 0;
1276
1277 kernelStatePtr->dwCurbeOffset = stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
1278 kernelStatePtr->KernelParams.pBinary = kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1279 kernelStatePtr->KernelParams.iSize = kernelSize;
1280
1281 CODECHAL_ENCODE_CHK_STATUS_RETURN(stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
1282 stateHeapInterface,
1283 kernelStatePtr->KernelParams.iBTCount,
1284 &kernelStatePtr->dwSshSize,
1285 &kernelStatePtr->dwBindingTableSize));
1286
1287 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(stateHeapInterface, kernelStatePtr));
1288
1289 return eStatus;
1290 }
1291
CheckSupportedFormat(PMOS_SURFACE surface)1292 bool CodechalVdencAvcStateG11::CheckSupportedFormat(PMOS_SURFACE surface)
1293 {
1294 CODECHAL_ENCODE_FUNCTION_ENTER;
1295
1296 bool colorFormatSupported = true;
1297 if (IS_Y_MAJOR_TILE_FORMAT(surface->TileType))
1298 {
1299 switch (surface->Format)
1300 {
1301 case Format_NV12:
1302 break;
1303 default:
1304 colorFormatSupported = false;
1305 break;
1306 }
1307 }
1308 else if (surface->TileType == MOS_TILE_LINEAR)
1309 {
1310 switch (surface->Format)
1311 {
1312 case Format_NV12:
1313 case Format_YUY2:
1314 case Format_YUYV:
1315 case Format_YVYU:
1316 case Format_UYVY:
1317 case Format_VYUY:
1318 case Format_AYUV:
1319 case Format_A8R8G8B8:
1320 case Format_A8B8G8R8:
1321 break;
1322 default:
1323 colorFormatSupported = false;
1324 break;
1325 }
1326 }
1327 else
1328 {
1329 colorFormatSupported = false;
1330 }
1331
1332 return colorFormatSupported;
1333 }
1334
GetTrellisQuantization(PCODECHAL_ENCODE_AVC_TQ_INPUT_PARAMS params,PCODECHAL_ENCODE_AVC_TQ_PARAMS trellisQuantParams)1335 MOS_STATUS CodechalVdencAvcStateG11::GetTrellisQuantization(PCODECHAL_ENCODE_AVC_TQ_INPUT_PARAMS params, PCODECHAL_ENCODE_AVC_TQ_PARAMS trellisQuantParams)
1336 {
1337 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1338
1339 CODECHAL_ENCODE_FUNCTION_ENTER;
1340
1341 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1342 CODECHAL_ENCODE_CHK_NULL_RETURN(trellisQuantParams);
1343
1344 trellisQuantParams->dwTqEnabled = TrellisQuantizationEnable[params->ucTargetUsage];
1345 trellisQuantParams->dwTqRounding = trellisQuantParams->dwTqEnabled ? TrellisQuantizationRounding[params->ucTargetUsage] : 0;
1346
1347 return eStatus;
1348 }
1349
AddHucOutputRegistersHandling(MmioRegistersHuc * mmioRegisters,PMOS_COMMAND_BUFFER cmdBuffer,bool addToEncodeStatus)1350 MOS_STATUS CodechalVdencAvcStateG11::AddHucOutputRegistersHandling(
1351 MmioRegistersHuc* mmioRegisters,
1352 PMOS_COMMAND_BUFFER cmdBuffer,
1353 bool addToEncodeStatus)
1354 {
1355 CODECHAL_ENCODE_FUNCTION_ENTER;
1356
1357 CODECHAL_ENCODE_CHK_NULL_RETURN(mmioRegisters);
1358 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
1359
1360 return StoreHucErrorStatus(mmioRegisters, cmdBuffer, addToEncodeStatus);
1361 }
1362
SetDmemHuCBrcInitReset()1363 MOS_STATUS CodechalVdencAvcStateG11::SetDmemHuCBrcInitReset()
1364 {
1365 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1366
1367 CODECHAL_ENCODE_FUNCTION_ENTER;
1368
1369 // Setup BRC DMEM
1370 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1371 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1372 lockFlagsWriteOnly.WriteOnly = 1;
1373 auto dmem = (BrcInitDmem *)m_osInterface->pfnLockResource(
1374 m_osInterface, &m_resVdencBrcInitDmemBuffer[m_currRecycledBufIdx], &lockFlagsWriteOnly);
1375
1376 CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
1377 MOS_ZeroMemory(dmem, sizeof(BrcInitDmem));
1378
1379 SetDmemHuCBrcInitResetImpl<BrcInitDmem>(dmem);
1380
1381 // fractional QP enable for extended rho domain
1382 dmem->INIT_FracQPEnable_U8 = m_lookaheadDepth > 0 ? 0 : (uint8_t)m_vdencInterface->IsRhoDomainStatsEnabled();
1383
1384 dmem->INIT_SinglePassOnly = m_vdencSinglePassEnable;
1385
1386 if (m_avcSeqParam->ScenarioInfo == ESCENARIO_GAMESTREAMING)
1387 {
1388 if (m_avcSeqParam->RateControlMethod == RATECONTROL_VBR)
1389 {
1390 m_avcSeqParam->MaxBitRate = m_avcSeqParam->TargetBitRate;
1391 }
1392
1393 // Disable delta QP adaption for non-VCM/ICQ/LowDelay until we have better algorithm
1394 if ((m_avcSeqParam->RateControlMethod != RATECONTROL_VCM) &&
1395 (m_avcSeqParam->RateControlMethod != RATECONTROL_ICQ) &&
1396 (m_avcSeqParam->FrameSizeTolerance != EFRAMESIZETOL_EXTREMELY_LOW))
1397 {
1398 dmem->INIT_DeltaQP_Adaptation_U8 = 0;
1399 }
1400
1401 dmem->INIT_New_DeltaQP_Adaptation_U8 = 1;
1402 }
1403
1404 if (((m_avcSeqParam->TargetUsage & 0x07) == TARGETUSAGE_BEST_SPEED) &&
1405 (m_avcSeqParam->FrameWidth >= m_singlePassMinFrameWidth) &&
1406 (m_avcSeqParam->FrameHeight >= m_singlePassMinFrameHeight) &&
1407 (m_avcSeqParam->FramesPer100Sec >= m_singlePassMinFramePer100s))
1408 {
1409 dmem->INIT_SinglePassOnly = true;
1410 }
1411
1412 dmem->INIT_LookaheadDepth_U8 = m_lookaheadDepth;
1413
1414 //Override the DistQPDelta.
1415 if (m_mbBrcEnabled)
1416 {
1417 if (m_avcSeqParam->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)
1418 {
1419 MOS_SecureMemcpy(dmem->INIT_DistQPDelta_I8, 4 * sizeof(int8_t), (void*)m_brcInitDistQpDeltaI8LowDelay, 4 * sizeof(int8_t));
1420 }
1421 else
1422 {
1423 MOS_SecureMemcpy(dmem->INIT_DistQPDelta_I8, 4 * sizeof(int8_t), (void*)m_brcInitDistQpDeltaI8, 4 * sizeof(int8_t));
1424 }
1425 }
1426
1427 CODECHAL_DEBUG_TOOL(
1428 CODECHAL_ENCODE_CHK_STATUS_RETURN(PopulateBrcInitParam(
1429 dmem));
1430 )
1431
1432 m_osInterface->pfnUnlockResource(m_osInterface, &m_resVdencBrcInitDmemBuffer[m_currRecycledBufIdx]);
1433
1434 return eStatus;
1435 }
1436
SetDmemHuCBrcUpdate()1437 MOS_STATUS CodechalVdencAvcStateG11::SetDmemHuCBrcUpdate()
1438 {
1439 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1440
1441 CODECHAL_ENCODE_FUNCTION_ENTER;
1442
1443 // Program update DMEM
1444 MOS_LOCK_PARAMS lockFlags;
1445 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1446 lockFlags.WriteOnly = 1;
1447 auto dmem = (BrcUpdateDmem *)m_osInterface->pfnLockResource(
1448 m_osInterface, &m_resVdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][m_currPass], &lockFlags);
1449 CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
1450 SetDmemHuCBrcUpdateImpl<BrcUpdateDmem>(dmem);
1451
1452 MOS_LOCK_PARAMS lockFlagsReadOnly;
1453 MOS_ZeroMemory(&lockFlagsReadOnly, sizeof(MOS_LOCK_PARAMS));
1454 lockFlagsReadOnly.ReadOnly = 1;
1455 auto initDmem = (BrcInitDmem *)m_osInterface->pfnLockResource(
1456 m_osInterface, &m_resVdencBrcInitDmemBuffer[m_currRecycledBufIdx], &lockFlagsReadOnly);
1457 CODECHAL_ENCODE_CHK_NULL_RETURN(initDmem);
1458
1459 if (initDmem->INIT_AdaptiveHMEExtensionEnable_U8)
1460 {
1461 dmem->HME0XOffset_I8 = 32;
1462 dmem->HME0YOffset_I8 = 24;
1463 dmem->HME1XOffset_I8 = -32;
1464 dmem->HME1YOffset_I8 = -24;
1465 }
1466
1467 m_osInterface->pfnUnlockResource(m_osInterface, &m_resVdencBrcInitDmemBuffer[m_currRecycledBufIdx]);
1468
1469 if (m_16xMeSupported && (m_pictureCodingType == P_TYPE))
1470 {
1471 dmem->HmeDistAvailable_U8 = 1;
1472 }
1473 dmem->UPD_WidthInMB_U16 = m_picWidthInMb;
1474 dmem->UPD_HeightInMB_U16 = m_picHeightInMb;
1475
1476 dmem->MOTION_ADAPTIVE_G4 = (m_avcSeqParam->ScenarioInfo == ESCENARIO_GAMESTREAMING) || ((m_avcPicParam->TargetFrameSize > 0) && (m_lookaheadDepth == 0)); // GS or TCBRC
1477 dmem->UPD_CQMEnabled_U8 = m_avcSeqParam->seq_scaling_matrix_present_flag || m_avcPicParam->pic_scaling_matrix_present_flag;
1478
1479 dmem->UPD_LA_TargetSize_U32 = m_avcPicParam->TargetFrameSize << 3;
1480
1481 if (m_lookaheadDepth > 0)
1482 {
1483 dmem->EnableLookAhead = 1;
1484 dmem->UPD_LA_TargetFulness_U32 = m_targetBufferFulness;
1485 dmem->UPD_Delta_U8 = m_avcPicParam->QpModulationStrength;
1486 }
1487
1488 dmem->UPD_TCBRC_SCENARIO_U8 = m_avcSeqParam->bAutoMaxPBFrameSizeForSceneChange;
1489
1490 CODECHAL_DEBUG_TOOL(
1491 CODECHAL_ENCODE_CHK_STATUS_RETURN(PopulateBrcUpdateParam(
1492 dmem));
1493 )
1494
1495 m_osInterface->pfnUnlockResource(m_osInterface, &(m_resVdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][m_currPass]));
1496
1497 return eStatus;
1498 }
1499
LoadMvCost(uint8_t qp)1500 MOS_STATUS CodechalVdencAvcStateG11::LoadMvCost(uint8_t qp)
1501 {
1502 CODECHAL_ENCODE_FUNCTION_ENTER;
1503
1504 for (uint8_t i=0; i< 8; i++)
1505 {
1506 m_vdEncMvCost[i] = Map44LutValue((uint32_t)(m_mvCostSkipBiasQPel[0][i]), 0x6f);
1507 }
1508
1509 if (!m_vdencBrcEnabled)
1510 {
1511 if (qp == 47 || qp == 48 || qp == 49)
1512 {
1513 for (uint8_t i = 3; i < 8; i++)
1514 {
1515 m_vdEncMvCost[i] = Map44LutValue((uint32_t)(m_mvCostSkipBiasQPel[1][i]), 0x6f);
1516 }
1517 }
1518 if (qp == 50 || qp == 51)
1519 {
1520 for (uint8_t i = 3; i < 8; i++)
1521 {
1522 m_vdEncMvCost[i] = Map44LutValue((uint32_t)(m_mvCostSkipBiasQPel[2][i]), 0x6f);
1523 }
1524 }
1525 }
1526
1527 return MOS_STATUS_SUCCESS;
1528 }
1529
LoadHmeMvCost(uint8_t qp)1530 MOS_STATUS CodechalVdencAvcStateG11::LoadHmeMvCost(uint8_t qp)
1531 {
1532 CODECHAL_ENCODE_FUNCTION_ENTER;
1533
1534 PCODEC_AVC_ENCODE_SEQUENCE_PARAMS avcSeqParams = m_avcSeqParam;
1535 const uint32_t(*vdencHmeCostTable)[CODEC_AVC_NUM_QP];
1536 if (avcSeqParams->ScenarioInfo == ESCENARIO_DISPLAYREMOTING)
1537 {
1538 vdencHmeCostTable = m_hmeCostDisplayRemote;
1539 }
1540 else
1541 {
1542 vdencHmeCostTable = m_hmeCost;
1543 }
1544
1545 for (uint8_t i = 0; i < 8; i++)
1546 {
1547 m_vdEncHmeMvCost[i] = Map44LutValue(*(vdencHmeCostTable[i] + qp), 0x6f);
1548 }
1549
1550 return MOS_STATUS_SUCCESS;
1551 }
1552
LoadHmeMvCostTable(PCODEC_AVC_ENCODE_SEQUENCE_PARAMS seqParams,uint8_t hmeMVCostTable[8][42])1553 MOS_STATUS CodechalVdencAvcStateG11::LoadHmeMvCostTable(PCODEC_AVC_ENCODE_SEQUENCE_PARAMS seqParams, uint8_t hmeMVCostTable[8][42])
1554 {
1555 CODECHAL_ENCODE_FUNCTION_ENTER;
1556
1557 const uint32_t(*vdencHmeCostTable)[CODEC_AVC_NUM_QP];
1558 if ((m_avcSeqParam->ScenarioInfo == ESCENARIO_DISPLAYREMOTING) || (m_avcSeqParam->RateControlMethod == RATECONTROL_QVBR))
1559 {
1560 vdencHmeCostTable = m_hmeCostDisplayRemote;
1561 }
1562 else
1563 {
1564 vdencHmeCostTable = m_hmeCost;
1565 }
1566
1567 for (int i = 0; i < 8; i++)
1568 {
1569 for (int j = 0; j < 42; j++)
1570 {
1571 hmeMVCostTable[i][j] = Map44LutValue(*(vdencHmeCostTable[i] + j + 10), 0x6f);
1572 }
1573 }
1574
1575 return MOS_STATUS_SUCCESS;
1576 }
1577
AddVdencWalkerStateCmd(PMOS_COMMAND_BUFFER cmdBuffer)1578 MOS_STATUS CodechalVdencAvcStateG11::AddVdencWalkerStateCmd(
1579 PMOS_COMMAND_BUFFER cmdBuffer)
1580 {
1581 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1582
1583 CODECHAL_ENCODE_FUNCTION_ENTER;
1584
1585 MHW_VDBOX_VDENC_WALKER_STATE_PARAMS_G11 vdencWalkerStateParams;
1586 auto avcSlcParams = m_avcSliceParams;
1587 auto avcPicParams = m_avcPicParams[avcSlcParams->pic_parameter_set_id];
1588 auto avcSeqParams = m_avcSeqParams[avcPicParams->seq_parameter_set_id];
1589
1590 vdencWalkerStateParams.Mode = CODECHAL_ENCODE_MODE_AVC;
1591 vdencWalkerStateParams.pAvcSeqParams = avcSeqParams;
1592 vdencWalkerStateParams.pAvcSlcParams = m_avcSliceParams;
1593 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWalkerStateCmd(cmdBuffer, &vdencWalkerStateParams));
1594
1595 return eStatus;
1596 }
1597
CalculateVdencCommandsSize()1598 MOS_STATUS CodechalVdencAvcStateG11::CalculateVdencCommandsSize()
1599 {
1600 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1601
1602 CODECHAL_ENCODE_FUNCTION_ENTER;
1603
1604 MHW_VDBOX_STATE_CMDSIZE_PARAMS_G11 stateCmdSizeParams;
1605 uint32_t vdencPictureStatesSize, vdencPicturePatchListSize;
1606 uint32_t vdencSliceStatesSize, vdencSlicePatchListSize;
1607 m_hwInterface->GetHxxStateCommandSize(
1608 CODECHAL_ENCODE_MODE_AVC,
1609 (uint32_t*)&vdencPictureStatesSize,
1610 (uint32_t*)&vdencPicturePatchListSize,
1611 &stateCmdSizeParams);
1612
1613 m_pictureStatesSize += vdencPictureStatesSize;
1614 m_picturePatchListSize += vdencPicturePatchListSize;
1615
1616 // Picture Level Commands
1617 m_hwInterface->GetVdencStateCommandsDataSize(
1618 CODECHAL_ENCODE_MODE_AVC,
1619 (uint32_t*)&vdencPictureStatesSize,
1620 (uint32_t*)&vdencPicturePatchListSize);
1621
1622 m_pictureStatesSize += vdencPictureStatesSize;
1623 m_picturePatchListSize += vdencPicturePatchListSize;
1624
1625 // Slice Level Commands
1626 m_hwInterface->GetVdencPrimitiveCommandsDataSize(
1627 CODECHAL_ENCODE_MODE_AVC,
1628 (uint32_t*)&vdencSliceStatesSize,
1629 (uint32_t*)&vdencSlicePatchListSize
1630 );
1631
1632 m_sliceStatesSize += vdencSliceStatesSize;
1633 m_slicePatchListSize += vdencSlicePatchListSize;
1634
1635 return eStatus;
1636 }
1637
SendPrologWithFrameTracking(PMOS_COMMAND_BUFFER cmdBuffer,bool frameTracking,MHW_MI_MMIOREGISTERS * mmioRegister)1638 MOS_STATUS CodechalVdencAvcStateG11::SendPrologWithFrameTracking(
1639 PMOS_COMMAND_BUFFER cmdBuffer,
1640 bool frameTracking,
1641 MHW_MI_MMIOREGISTERS *mmioRegister)
1642 {
1643 if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe)
1644 {
1645 PMOS_CMD_BUF_ATTRI_VE attriExt =
1646 (PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe);
1647 attriExt->bUseVirtualEngineHint = true;
1648 attriExt->VEngineHintParams.NeedSyncWithPrevious = 1;
1649 }
1650
1651 return CodechalVdencAvcState::SendPrologWithFrameTracking(cmdBuffer, frameTracking, mmioRegister);
1652 }
1653
CreateMhwVdboxStateCmdsizeParams()1654 PMHW_VDBOX_STATE_CMDSIZE_PARAMS CodechalVdencAvcStateG11::CreateMhwVdboxStateCmdsizeParams()
1655 {
1656 PMHW_VDBOX_STATE_CMDSIZE_PARAMS cmdSizeParams = MOS_New(MHW_VDBOX_STATE_CMDSIZE_PARAMS_G11);
1657
1658 return cmdSizeParams;
1659 }
1660
CreateMhwVdboxVdencWalkerStateParams()1661 PMHW_VDBOX_VDENC_WALKER_STATE_PARAMS CodechalVdencAvcStateG11::CreateMhwVdboxVdencWalkerStateParams()
1662 {
1663 PMHW_VDBOX_VDENC_WALKER_STATE_PARAMS vdencWalkerStateParams = MOS_New(MHW_VDBOX_VDENC_WALKER_STATE_PARAMS_G11);
1664
1665 return vdencWalkerStateParams;
1666 }
1667
InitKernelStateMe()1668 MOS_STATUS CodechalVdencAvcStateG11::InitKernelStateMe()
1669 {
1670 m_hmeKernel = MOS_New(CodechalKernelHmeG11, this);
1671 CODECHAL_ENCODE_CHK_NULL_RETURN(m_hmeKernel);
1672 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Initialize(
1673 GetCommonKernelHeaderAndSizeG11,
1674 m_kernelBase,
1675 m_kuidCommon));
1676 return MOS_STATUS_SUCCESS;
1677 }
1678
ExecuteMeKernel()1679 MOS_STATUS CodechalVdencAvcStateG11::ExecuteMeKernel()
1680 {
1681 if (m_hmeKernel && m_hmeKernel->Is4xMeEnabled())
1682 {
1683 CodechalKernelHme::CurbeParam curbeParam = {};
1684 curbeParam.subPelMode = 3;
1685 curbeParam.currOriginalPic = m_avcPicParam->CurrOriginalPic;
1686 curbeParam.qpPrimeY = m_avcPicParam->pic_init_qp_minus26 + 26 + m_avcSliceParams->slice_qp_delta;
1687 curbeParam.targetUsage = m_avcSeqParam->TargetUsage;
1688 curbeParam.maxMvLen = CodecHalAvcEncode_GetMaxMvLen(m_avcSeqParam->Level);
1689 curbeParam.numRefIdxL0Minus1 = m_avcSliceParams->num_ref_idx_l0_active_minus1;
1690 curbeParam.numRefIdxL1Minus1 = m_avcSliceParams->num_ref_idx_l1_active_minus1;
1691
1692 auto slcParams = m_avcSliceParams;
1693 curbeParam.list0RefID0FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_0, CODECHAL_ENCODE_REF_ID_0);
1694 curbeParam.list0RefID1FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_0, CODECHAL_ENCODE_REF_ID_1);
1695 curbeParam.list0RefID2FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_0, CODECHAL_ENCODE_REF_ID_2);
1696 curbeParam.list0RefID3FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_0, CODECHAL_ENCODE_REF_ID_3);
1697 curbeParam.list0RefID4FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_0, CODECHAL_ENCODE_REF_ID_4);
1698 curbeParam.list0RefID5FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_0, CODECHAL_ENCODE_REF_ID_5);
1699 curbeParam.list0RefID6FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_0, CODECHAL_ENCODE_REF_ID_6);
1700 curbeParam.list0RefID7FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_0, CODECHAL_ENCODE_REF_ID_7);
1701 curbeParam.list1RefID0FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_1, CODECHAL_ENCODE_REF_ID_0);
1702 curbeParam.list1RefID1FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_1, CODECHAL_ENCODE_REF_ID_1);
1703
1704 CodechalKernelHme::SurfaceParams surfaceParam = {};
1705 surfaceParam.mbaffEnabled = m_mbaffEnabled;
1706 surfaceParam.numRefIdxL0ActiveMinus1 = m_avcSliceParams->num_ref_idx_l0_active_minus1;
1707 surfaceParam.numRefIdxL1ActiveMinus1 = m_avcSliceParams->num_ref_idx_l1_active_minus1;
1708 surfaceParam.verticalLineStride = m_verticalLineStride;
1709 surfaceParam.verticalLineStrideOffset = m_verticalLineStrideOffset;
1710 surfaceParam.refList = &m_refList[0];
1711 surfaceParam.picIdx = &m_picIdx[0];
1712 surfaceParam.currOriginalPic = &m_currOriginalPic;
1713 surfaceParam.refL0List = &(m_avcSliceParams->RefPicList[LIST_0][0]);
1714 surfaceParam.refL1List = &(m_avcSliceParams->RefPicList[LIST_1][0]);
1715 surfaceParam.vdencStreamInEnabled = m_vdencEnabled && (m_16xMeSupported || m_staticFrameDetectionInUse);
1716 surfaceParam.meVdencStreamInBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
1717 surfaceParam.vdencStreamInSurfaceSize = MOS_BYTES_TO_DWORDS(m_picHeightInMb * m_picWidthInMb * 64);
1718
1719 if (m_hmeKernel->Is16xMeEnabled())
1720 {
1721 m_lastTaskInPhase = false;
1722 if (m_hmeKernel->Is32xMeEnabled())
1723 {
1724 surfaceParam.downScaledWidthInMb = m_downscaledWidthInMb32x;
1725 surfaceParam.downScaledHeightInMb = m_downscaledFrameFieldHeightInMb16x;
1726 surfaceParam.downScaledBottomFieldOffset = m_scaled32xBottomFieldOffset;
1727 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel32x));
1728 }
1729 surfaceParam.downScaledWidthInMb = m_downscaledWidthInMb16x;
1730 surfaceParam.downScaledHeightInMb = m_downscaledFrameFieldHeightInMb16x;
1731 surfaceParam.downScaledBottomFieldOffset = m_scaled16xBottomFieldOffset;
1732 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel16x));
1733 }
1734
1735 // On-demand sync for VDEnc SHME StreamIn surface
1736 auto syncParams = g_cInitSyncParams;
1737 syncParams.GpuContext = m_renderContext;
1738 syncParams.presSyncResource = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
1739
1740 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
1741 m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams);
1742
1743 // HME StreamIn
1744 m_lastTaskInPhase = !m_staticFrameDetectionInUse;
1745
1746 surfaceParam.downScaledWidthInMb = m_downscaledWidthInMb4x;
1747 surfaceParam.downScaledHeightInMb = m_downscaledFrameFieldHeightInMb4x;
1748 surfaceParam.downScaledBottomFieldOffset = m_scaledBottomFieldOffset;
1749 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel4x));
1750 m_vdencStreamInEnabled = true;
1751 }
1752 return MOS_STATUS_SUCCESS;
1753 }
1754
UpdateCmdBufAttribute(PMOS_COMMAND_BUFFER cmdBuffer,bool renderEngineInUse)1755 MOS_STATUS CodechalVdencAvcStateG11::UpdateCmdBufAttribute(
1756 PMOS_COMMAND_BUFFER cmdBuffer,
1757 bool renderEngineInUse)
1758 {
1759 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1760
1761 // should not be there. Will remove it in the next change
1762 CODECHAL_ENCODE_FUNCTION_ENTER;
1763 if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe)
1764 {
1765 PMOS_CMD_BUF_ATTRI_VE attriExt =
1766 (PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe);
1767
1768 memset(attriExt, 0, sizeof(MOS_CMD_BUF_ATTRI_VE));
1769 attriExt->bUseVirtualEngineHint =
1770 attriExt->VEngineHintParams.NeedSyncWithPrevious = !renderEngineInUse;
1771 }
1772
1773 return eStatus;
1774 }
1775
1776 #if USE_CODECHAL_DEBUG_TOOL
PopulateBrcInitParam(void * cmd)1777 MOS_STATUS CodechalVdencAvcStateG11::PopulateBrcInitParam(
1778 void *cmd)
1779 {
1780 CODECHAL_DEBUG_FUNCTION_ENTER;
1781
1782 CODECHAL_DEBUG_CHK_NULL(m_debugInterface);
1783
1784 if (!m_debugInterface->DumpIsEnabled(CodechalDbgAttr::attrDumpEncodePar))
1785 {
1786 return MOS_STATUS_SUCCESS;
1787 }
1788
1789 BrcInitDmem * dmem = (BrcInitDmem *)cmd;
1790
1791 if (m_pictureCodingType == I_TYPE)
1792 {
1793 m_avcPar->MBBRCEnable = m_mbBrcEnabled;
1794 m_avcPar->MBRC = m_mbBrcEnabled;
1795 m_avcPar->BitRate = dmem->INIT_TargetBitrate_U32;
1796 m_avcPar->InitVbvFullnessInBit = dmem->INIT_InitBufFull_U32;
1797 m_avcPar->MaxBitRate = dmem->INIT_MaxRate_U32;
1798 m_avcPar->VbvSzInBit = dmem->INIT_BufSize_U32;
1799 m_avcPar->UserMaxFrame = dmem->INIT_ProfileLevelMaxFrame_U32;
1800 m_avcPar->SlidingWindowEnable = dmem->INIT_SlidingWidowRCEnable_U8;
1801 m_avcPar->SlidingWindowSize = dmem->INIT_SlidingWindowSize_U8;
1802 m_avcPar->SlidingWindowMaxRateRatio = dmem->INIT_SlidingWindowMaxRateRatio_U8;
1803 m_avcPar->LowDelayGoldenFrameBoost = dmem->INIT_LowDelayGoldenFrameBoost_U8;
1804 m_avcPar->TopQPDeltaThrforAdaptive2Pass = dmem->INIT_TopQPDeltaThrForAdapt2Pass_U8;
1805 m_avcPar->BotQPDeltaThrforAdaptive2Pass = dmem->INIT_BotQPDeltaThrForAdapt2Pass_U8;
1806 m_avcPar->TopFrmSzPctThrforAdaptive2Pass = dmem->INIT_TopFrmSzThrForAdapt2Pass_U8;
1807 m_avcPar->BotFrmSzPctThrforAdaptive2Pass = dmem->INIT_BotFrmSzThrForAdapt2Pass_U8;
1808 m_avcPar->MBHeaderCompensation = dmem->INIT_MBHeaderCompensation_U8;
1809 m_avcPar->QPSelectMethodforFirstPass = dmem->INIT_QPSelectForFirstPass_U8;
1810 m_avcPar->MBQpCtrl = (dmem->INIT_MbQpCtrl_U8 > 0) ? true : false;
1811 m_avcPar->QPMax = dmem->INIT_MaxQP_U16;
1812 m_avcPar->QPMin = dmem->INIT_MinQP_U16;
1813 m_avcPar->HrdConformanceCheckDisable = (dmem->INIT_HRDConformanceCheckDisable_U8 > 0) ? true : false;
1814 m_avcPar->ICQReEncode = (dmem->INIT_ICQReEncode_U8 > 0) ? true : false;
1815 m_avcPar->AdaptiveCostAdjustEnable = (dmem->INIT_AdaptiveCostEnable_U8 > 0) ? true : false;
1816 m_avcPar->AdaptiveHMEExtension = (dmem->INIT_AdaptiveHMEExtensionEnable_U8 > 0) ? true : false;
1817 m_avcPar->StreamInStaticRegion = dmem->INIT_StaticRegionStreamIn_U8;
1818 ;
1819 m_avcPar->ScenarioInfo = dmem->INIT_ScenarioInfo_U8;
1820 ;
1821 }
1822
1823 return MOS_STATUS_SUCCESS;
1824 }
1825
PopulateBrcUpdateParam(void * cmd)1826 MOS_STATUS CodechalVdencAvcStateG11::PopulateBrcUpdateParam(
1827 void *cmd)
1828 {
1829 CODECHAL_DEBUG_FUNCTION_ENTER;
1830
1831 CODECHAL_DEBUG_CHK_NULL(m_debugInterface);
1832
1833 if (!m_debugInterface->DumpIsEnabled(CodechalDbgAttr::attrDumpEncodePar))
1834 {
1835 return MOS_STATUS_SUCCESS;
1836 }
1837
1838 BrcUpdateDmem * dmem = (BrcUpdateDmem *)cmd;
1839
1840 if (m_pictureCodingType == I_TYPE)
1841 {
1842 m_avcPar->EnableMultipass = (dmem->UPD_MaxNumPass_U8 > 0) ? true : false;
1843 m_avcPar->MaxNumPakPasses = dmem->UPD_MaxNumPass_U8;
1844 m_avcPar->SceneChgDetectEn = (dmem->UPD_SceneChgDetectEn_U8 > 0) ? true : false;
1845 m_avcPar->SceneChgPrevIntraPctThresh = dmem->UPD_SceneChgPrevIntraPctThreshold_U8;
1846 m_avcPar->SceneChgCurIntraPctThresh = dmem->UPD_SceneChgCurIntraPctThreshold_U8;
1847 m_avcPar->SceneChgWidth0 = dmem->UPD_SceneChgWidth_U8[0];
1848 m_avcPar->SceneChgWidth1 = dmem->UPD_SceneChgWidth_U8[1];
1849 m_avcPar->SliceSizeThr = dmem->UPD_SLCSZ_TARGETSLCSZ_U16;
1850 m_avcPar->SliceMaxSize = dmem->UPD_TargetSliceSize_U16;
1851 }
1852 else if (m_pictureCodingType == P_TYPE)
1853 {
1854 m_avcPar->Transform8x8PDisable = (dmem->UPD_DisablePFrame8x8Transform_U8 > 0) ? true : false;
1855 }
1856
1857 return MOS_STATUS_SUCCESS;
1858 }
1859
PopulateEncParam(uint8_t meMethod,void * cmd)1860 MOS_STATUS CodechalVdencAvcStateG11::PopulateEncParam(
1861 uint8_t meMethod,
1862 void *cmd)
1863 {
1864 CODECHAL_DEBUG_FUNCTION_ENTER;
1865
1866 CODECHAL_DEBUG_CHK_NULL(m_debugInterface);
1867
1868 if (!m_debugInterface->DumpIsEnabled(CodechalDbgAttr::attrDumpEncodePar))
1869 {
1870 return MOS_STATUS_SUCCESS;
1871 }
1872
1873 uint8_t *data = nullptr;
1874 MOS_LOCK_PARAMS lockFlags;
1875 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1876 lockFlags.ReadOnly = 1;
1877
1878 if (m_vdencBrcEnabled)
1879 {
1880 // BRC case: VDENC IMG STATE is updated by HuC FW
1881 data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &m_resVdencBrcImageStatesReadBuffer[m_currRecycledBufIdx], &lockFlags);
1882 data = data + mhw_vdbox_mfx_g11_X::MFX_AVC_IMG_STATE_CMD::byteSize;
1883 }
1884 else
1885 {
1886 // CQP case: VDENC IMG STATE is updated by driver or SFD kernel
1887 if (!m_staticFrameDetectionInUse)
1888 {
1889 data = m_batchBufferForVdencImgStat[m_currRecycledBufIdx].pData;
1890 data = data + mhw_vdbox_mfx_g11_X::MFX_AVC_IMG_STATE_CMD::byteSize;
1891 }
1892 else
1893 {
1894 data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &m_resVdencSfdImageStateReadBuffer, &lockFlags);
1895 }
1896 }
1897
1898 CODECHAL_DEBUG_CHK_NULL(data);
1899
1900 mhw_vdbox_vdenc_g11_X::VDENC_IMG_STATE_CMD vdencCmd;
1901 vdencCmd = *(mhw_vdbox_vdenc_g11_X::VDENC_IMG_STATE_CMD *)(data);
1902
1903 if (m_pictureCodingType == I_TYPE)
1904 {
1905 m_avcPar->BlockBasedSkip = vdencCmd.DW4.BlockBasedSkipEnabled;
1906 m_avcPar->VDEncPerfMode = vdencCmd.DW1.VdencPerfmode;
1907 }
1908 else if (m_pictureCodingType == P_TYPE)
1909 {
1910 m_avcPar->SubPelMode = vdencCmd.DW4.SubPelMode;
1911 m_avcPar->FTQBasedSkip = vdencCmd.DW4.ForwardTransformSkipCheckEnable;
1912 m_avcPar->BiMixDisable = vdencCmd.DW1.BidirectionalMixDisable;
1913 m_avcPar->SurvivedSkipCost = (vdencCmd.DW8.NonSkipZeroMvCostAdded << 1) + vdencCmd.DW8.NonSkipMbModeCostAdded;
1914 m_avcPar->UniMixDisable = vdencCmd.DW2.UnidirectionalMixDisable;
1915 m_avcPar->VdencExtPakObjDisable = !vdencCmd.DW1.VdencExtendedPakObjCmdEnable;
1916 m_avcPar->PPMVDisable = vdencCmd.DW34.PpmvDisable;
1917 }
1918
1919 if (data)
1920 {
1921 if (m_vdencBrcEnabled)
1922 {
1923 m_osInterface->pfnUnlockResource(
1924 m_osInterface,
1925 &m_resVdencBrcImageStatesReadBuffer[m_currRecycledBufIdx]);
1926 }
1927 else
1928 {
1929 if (m_staticFrameDetectionInUse)
1930 {
1931 m_osInterface->pfnUnlockResource(
1932 m_osInterface,
1933 &m_resVdencSfdImageStateReadBuffer);
1934 }
1935 }
1936 }
1937
1938 return MOS_STATUS_SUCCESS;
1939 }
1940
PopulatePakParam(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_BATCH_BUFFER secondLevelBatchBuffer)1941 MOS_STATUS CodechalVdencAvcStateG11::PopulatePakParam(
1942 PMOS_COMMAND_BUFFER cmdBuffer,
1943 PMHW_BATCH_BUFFER secondLevelBatchBuffer)
1944 {
1945 CODECHAL_DEBUG_FUNCTION_ENTER;
1946
1947 CODECHAL_DEBUG_CHK_NULL(m_debugInterface);
1948
1949 if (!m_debugInterface->DumpIsEnabled(CodechalDbgAttr::attrDumpEncodePar))
1950 {
1951 return MOS_STATUS_SUCCESS;
1952 }
1953
1954 uint8_t *data = nullptr;
1955 MOS_LOCK_PARAMS lockFlags;
1956 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1957 lockFlags.ReadOnly = 1;
1958
1959 if (cmdBuffer != nullptr)
1960 {
1961 data = (uint8_t*)(cmdBuffer->pCmdPtr - (mhw_vdbox_mfx_g11_X::MFX_AVC_IMG_STATE_CMD::byteSize / sizeof(uint32_t)));
1962 }
1963 else if (secondLevelBatchBuffer != nullptr)
1964 {
1965 data = secondLevelBatchBuffer->pData;
1966 }
1967 else
1968 {
1969 data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &m_resVdencBrcImageStatesReadBuffer[m_currRecycledBufIdx], &lockFlags);
1970 }
1971
1972 CODECHAL_DEBUG_CHK_NULL(data);
1973
1974 mhw_vdbox_mfx_g11_X::MFX_AVC_IMG_STATE_CMD mfxCmd;
1975 mfxCmd = *(mhw_vdbox_mfx_g11_X::MFX_AVC_IMG_STATE_CMD *)(data);
1976
1977 if (m_pictureCodingType == I_TYPE)
1978 {
1979 m_avcPar->TrellisQuantizationEnable = mfxCmd.DW5.TrellisQuantizationEnabledTqenb;
1980 m_avcPar->EnableAdaptiveTrellisQuantization = mfxCmd.DW5.TrellisQuantizationEnabledTqenb;
1981 m_avcPar->TrellisQuantizationRounding = mfxCmd.DW5.TrellisQuantizationRoundingTqr;
1982 m_avcPar->TrellisQuantizationChromaDisable = mfxCmd.DW5.TrellisQuantizationChromaDisableTqchromadisable;
1983 m_avcPar->ExtendedRhoDomainEn = mfxCmd.DW17.ExtendedRhodomainStatisticsEnable;
1984 }
1985
1986 if (data && (cmdBuffer == nullptr) && (secondLevelBatchBuffer == nullptr))
1987 {
1988 m_osInterface->pfnUnlockResource(
1989 m_osInterface,
1990 &m_resVdencBrcImageStatesReadBuffer[m_currRecycledBufIdx]);
1991 }
1992
1993 return MOS_STATUS_SUCCESS;
1994 }
1995 #endif
1996