1 /*
2 * Copyright (c) 2017-2023, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     codechal_vdenc_avc_g11.cpp
24 //! \brief    This file implements the C++ class/interface for Gen10 platform's AVC
25 //!           VDEnc encoding to be used CODECHAL components.
26 //!
27 
28 #include "codechal_vdenc_avc_g11.h"
29 #include "codechal_kernel_header_g11.h"
30 #include "codechal_kernel_hme_g11.h"
31 #include "mhw_vdbox_vdenc_g11_X.h"
32 #include "mhw_vdbox_g11_X.h"
33 #include "hal_oca_interface.h"
34 #include "mos_util_user_interface.h"
35 #if defined(ENABLE_KERNELS)
36 #include "igcodeckrn_g11.h"
37 #endif
38 #if USE_CODECHAL_DEBUG_TOOL
39 #include "codechal_debug_encode_par_g11.h"
40 #include "mhw_vdbox_mfx_hwcmd_g11_X.h"
41 #include "mhw_vdbox_vdenc_hwcmd_g11_X.h"
42 #endif
43 
44 struct CodechalVdencAvcStateG11::KernelHeader
45 {
46     int m_kernelCount;
47     // Quality mode for Frame/Field
48     CODECHAL_KERNEL_HEADER m_mbEncQltyI;
49     CODECHAL_KERNEL_HEADER m_mbEncQltyP;
50     CODECHAL_KERNEL_HEADER m_mbEncQltyB;
51     // Normal mode for Frame/Field
52     CODECHAL_KERNEL_HEADER m_mbEncNormI;
53     CODECHAL_KERNEL_HEADER m_mbEncNormP;
54     CODECHAL_KERNEL_HEADER m_mbEncNormB;
55     // Performance modes for Frame/Field
56     CODECHAL_KERNEL_HEADER m_mbEncPerfI;
57     CODECHAL_KERNEL_HEADER m_mbEncPerfP;
58     CODECHAL_KERNEL_HEADER m_mbEncPerfB;
59     // Modes for Frame/Field
60     CODECHAL_KERNEL_HEADER m_mbEncAdvI;
61     CODECHAL_KERNEL_HEADER m_mbEncAdvP;
62     CODECHAL_KERNEL_HEADER m_mbEncAdvB;
63 
64     // BRC init frame
65     CODECHAL_KERNEL_HEADER m_initFrameBrc;
66     // Frame BRC update
67     CODECHAL_KERNEL_HEADER m_frameEncUpdate;
68     // BRC Reset frame
69     CODECHAL_KERNEL_HEADER m_brcResetFrame;
70     // BRC I Frame Distortion
71     CODECHAL_KERNEL_HEADER m_brcIFrameDist;
72     // RRCBlockCopy
73     CODECHAL_KERNEL_HEADER m_brcBlockCopy;
74     // MbBRC Update
75     CODECHAL_KERNEL_HEADER m_mbBrcUpdate;
76     // 2x DownScaling
77     //Weighted Prediction Kernel
78     CODECHAL_KERNEL_HEADER m_weightedPrediction;
79     // SW scoreboard initialization kernel
80     CODECHAL_KERNEL_HEADER m_initSWScoreboard;
81 
82 };
83 
84 struct CodechalVdencAvcStateG11::BrcInitDmem
85 {
86     uint8_t     BRCFunc_U8;                           // 0: Init; 2: Reset
87     uint8_t     OpenSourceEnable_U8;                  // 0: disable opensource, 1: enable opensource
88     uint8_t     RVSD[2];
89     uint16_t    INIT_BRCFlag_U16;                     // ICQ or CQP with slice size control: 0x00 CBR: 0x10; VBR: 0x20; VCM: 0x40; LOWDELAY: 0x80.
90     uint16_t    Reserved;
91     uint16_t    INIT_FrameWidth_U16;                  // Luma width in bytes
92     uint16_t    INIT_FrameHeight_U16;                 // Luma height in bytes
93     uint32_t    INIT_TargetBitrate_U32;               // target bitrate, set by application
94     uint32_t    INIT_MinRate_U32;                     // 0
95     uint32_t    INIT_MaxRate_U32;                     // Maximum bit rate in bits per second (bps).
96     uint32_t    INIT_BufSize_U32;                     // buffer size
97     uint32_t    INIT_InitBufFull_U32;                 // initial buffer fullness
98     uint32_t    INIT_ProfileLevelMaxFrame_U32;        // user defined. refer to AVC BRC for conformance check and correction
99     uint32_t    INIT_FrameRateM_U32;                  // FrameRateM is the number of frames in FrameRateD
100     uint32_t    INIT_FrameRateD_U32;                  // If driver gets this FrameRateD from VUI, it is the num_units_in_tick field (32 bits unsigned integer).
101     uint16_t    INIT_GopP_U16;                        // number of P frames in a GOP
102     uint16_t    INIT_GopB_U16;                        // number of B frames in a GOP
103     uint16_t    INIT_MinQP_U16;                       // 10
104     uint16_t    INIT_MaxQP_U16;                       // 51
105     int8_t      INIT_DevThreshPB0_S8[8];              // lowdelay ? (-45, -33, -23, -15, -8, 0, 15, 25) : (-46, -38, -30, -23, 23, 30, 40, 46)
106     int8_t      INIT_DevThreshVBR0_S8[8];             // lowdelay ? (-45, -35, -25, -15, -8, 0, 20, 40) : (-46, -40, -32, -23, 56, 64, 83, 93)
107     int8_t      INIT_DevThreshI0_S8[8];               // lowdelay ? (-40, -30, -17, -10, -5, 0, 10, 20) : (-43, -36, -25, -18, 18, 28, 38, 46)
108     uint8_t     INIT_InitQPIP;                        // Initial QP for I and P
109 
110     uint8_t     INIT_NotUseRhoDm_U8;                  // Reserved
111     uint8_t     INIT_InitQPB;                         // Initial QP for B
112     uint8_t     INIT_MbQpCtrl_U8;                     // Enable MB level QP control (global)
113     uint8_t     INIT_SliceSizeCtrlEn_U8;              // Enable slice size control
114     int8_t      INIT_IntraQPDelta_I8[3];              // set to zero for all by default
115     int8_t      INIT_SkipQPDelta_I8;                  // Reserved
116     int8_t      INIT_DistQPDelta_I8[4];               // lowdelay ? (-5, -2, 2, 5) : (0, 0, 0, 0)
117     uint8_t     INIT_OscillationQpDelta_U8;           // BRCFLAG_ISVCM ? 16 : 0
118     uint8_t     INIT_HRDConformanceCheckDisable_U8;   // BRCFLAG_ISAVBR ? 1 : 0
119     uint8_t     INIT_SkipFrameEnableFlag;
120     uint8_t     INIT_TopQPDeltaThrForAdapt2Pass_U8;   // =1. QP Delta threshold for second pass.
121     uint8_t     INIT_TopFrmSzThrForAdapt2Pass_U8;     // lowdelay ? 10 : 50. Top frame size threshold for second pass
122     uint8_t     INIT_BotFrmSzThrForAdapt2Pass_U8;     // lowdelay ? 10 : 200. Bottom frame size threshold for second pass
123     uint8_t     INIT_QPSelectForFirstPass_U8;         // lowdelay ? 0 : 1. =0 to use previous frame final QP; or =1 to use (targetQP + previousQP) / 2.
124     uint8_t     INIT_MBHeaderCompensation_U8;         // Reserved
125     uint8_t     INIT_OverShootCarryFlag_U8;           // set to zero by default
126     uint8_t     INIT_OverShootSkipFramePct_U8;        // set to zero by default
127     uint8_t     INIT_EstRateThreshP0_U8[7];           // 4, 8, 12, 16, 20, 24, 28
128     uint8_t     INIT_EstRateThreshB0_U8[7];           // 4, 8, 12, 16, 20, 24, 28
129     uint8_t     INIT_EstRateThreshI0_U8[7];           // 4, 8, 12, 16, 20, 24, 28
130     uint8_t     INIT_FracQPEnable_U8;                 // ExtendedRhoDomainEn from par file
131     uint8_t     INIT_ScenarioInfo_U8;                 // 0: UNKNOWN, 1: DISPLAYREMOTING, 2: VIDEOCONFERENCE, 3: ARCHIVE, 4: LIVESTREAMING.
132     uint8_t     INIT_StaticRegionStreamIn_U8;         // should be programmed from par file
133     uint8_t     INIT_DeltaQP_Adaptation_U8;           // =1, should be programmed from par file
134     uint8_t     INIT_MaxCRFQualityFactor_U8;          // =52, should be programmed from par file
135     uint8_t     INIT_CRFQualityFactor_U8;             // =25, should be programmed from par file
136     uint8_t     INIT_BotQPDeltaThrForAdapt2Pass_U8;   // =1. QP Delta threshold for second pass.
137     uint8_t     INIT_SlidingWindowSize_U8;            // =30, the window size (in frames) used to compute bit rate
138     uint8_t     INIT_SlidingWidowRCEnable_U8;         // =0, sliding window based rate control (SWRC) disabled, 1: enabled
139     uint8_t     INIT_SlidingWindowMaxRateRatio_U8;    // =120, ratio between the max rate within the window and average target bitrate
140     uint8_t     INIT_LowDelayGoldenFrameBoost_U8;     // only for lowdelay mode, 0 (default): no boost for I and scene change frames, 1: boost
141     uint8_t     INIT_AdaptiveCostEnable_U8;           // 0: disabled, 1: enabled
142     uint8_t     INIT_AdaptiveHMEExtensionEnable_U8;   // 0: disabled, 1: enabled
143     uint8_t     INIT_ICQReEncode_U8;                  // 0: disabled, 1: enabled
144     uint8_t     INIT_LookaheadDepth_U8;               // Lookahead depth in unit of frames [0, 127]
145     uint8_t     INIT_SinglePassOnly;                  // 0: disabled, 1: enabled
146     uint8_t     INIT_New_DeltaQP_Adaptation_U8;       // = 1 to enable new delta QP adaption
147     uint8_t     RSVD2[55];                            // must be zero
148 };
149 
150 struct CodechalVdencAvcStateG11::BrcUpdateDmem
151 {
152     uint8_t     BRCFunc_U8;                           // =1 for Update, other values are reserved for future use
153     uint8_t     RSVD[3];
154     uint32_t    UPD_TARGETSIZE_U32;                   // refer to AVC BRC for calculation
155     uint32_t    UPD_FRAMENUM_U32;                     // frame number
156     uint32_t    UPD_PeakTxBitsPerFrame_U32;           // current global target bits - previous global target bits (global target bits += input bits per frame)
157     uint32_t    UPD_FrameBudget_U32;                  // target time counter
158     uint32_t    FrameByteCount;                       // PAK output via MMIO
159     uint32_t    TimingBudgetOverflow;                 // PAK output via MMIO
160     uint32_t    ImgStatusCtrl;                        // PAK output via MMIO
161     uint32_t    IPCMNonConformant;                    // PAK output via MMIO
162 
163     uint16_t    UPD_startGAdjFrame_U16[4];            // 10, 50, 100, 150
164     uint16_t    UPD_MBBudget_U16[52];                 // MB bugdet for QP 0 � 51.
165     uint16_t    UPD_SLCSZ_TARGETSLCSZ_U16;            // target slice size
166     uint16_t    UPD_SLCSZ_UPD_THRDELTAI_U16[42];      // slice size threshold delta for I frame
167     uint16_t    UPD_SLCSZ_UPD_THRDELTAP_U16[42];      // slice size threshold delta for P frame
168     uint16_t    UPD_NumOfFramesSkipped_U16;           // Recording how many frames have been skipped.
169     uint16_t    UPD_SkipFrameSize_U16;                 // Recording the skip frame size for one frame. =NumMBs * 1, assuming one bit per mb for skip frame.
170     uint16_t    UPD_StaticRegionPct_U16;              // One entry, recording the percentage of static region
171     uint8_t     UPD_gRateRatioThreshold_U8[7];        // 80,95,99,101,105,125,160
172     uint8_t     UPD_CurrFrameType_U8;                 // I frame: 2; P frame: 0; B frame: 1.
173     uint8_t     UPD_startGAdjMult_U8[5];              // 1, 1, 3, 2, 1
174     uint8_t     UPD_startGAdjDiv_U8[5];               // 40, 5, 5, 3, 1
175     uint8_t     UPD_gRateRatioThresholdQP_U8[8];      // 253,254,255,0,1,1,2,3
176     uint8_t     UPD_PAKPassNum_U8;                    // current pak pass number
177     uint8_t     UPD_MaxNumPass_U8;                    // 2
178     uint8_t     UPD_SceneChgWidth_U8[2];              // set both to MIN((NumP + 1) / 5, 6)
179     uint8_t     UPD_SceneChgDetectEn_U8;              // Enable scene change detection
180     uint8_t     UPD_SceneChgPrevIntraPctThreshold_U8; // =96. scene change previous intra percentage threshold
181     uint8_t     UPD_SceneChgCurIntraPctThreshold_U8;  // =192. scene change current intra percentage threshold
182     uint8_t     UPD_IPAverageCoeff_U8;                // lowdelay ? 0 : 128
183     uint8_t     UPD_MinQpAdjustment_U8;               // Minimum QP increase step
184     uint8_t     UPD_TimingBudgetCheck_U8;             // Flag indicating if kernel will check timing budget.
185     int8_t      reserved_I8[4];                       // must be zero
186     uint8_t     UPD_CQP_QpValue_U8;                   // Application specified target QP in BRC_ICQ mode
187     uint8_t     UPD_CQP_FracQp_U8;                    // Application specified fine position in BRC_ICQ mode
188     uint8_t     UPD_HMEDetectionEnable_U8;            // 0: default, 1: HuC BRC kernel requires information from HME detection kernel output
189     uint8_t     UPD_HMECostEnable_U8;                 // 0: default, 1: driver provides HME cost table
190     uint8_t     UPD_DisablePFrame8x8Transform_U8;     // 0: enable, 1: disable
191     uint8_t     RSVD3;                                // must be zero
192     uint8_t     UPD_ROISource_U8;                     // =0: disable, 1: ROIMap from HME Static Region or from App dirty rectangle, 2: ROIMap from App
193     uint8_t     RSVD4;                                // must be zero
194     uint16_t    UPD_TargetSliceSize_U16;              // default: 1498, max target slice size from app DDI
195     uint16_t    UPD_MaxNumSliceAllowed_U16;           // computed by driver based on level idc
196     uint16_t    UPD_SLBB_Size_U16;                    // second level batch buffer (SLBB) size in bytes, the input buffer will contain two SLBBs A and B, A followed by B, A and B have the same structure.
197     uint16_t    UPD_SLBB_B_Offset_U16;                // offset in bytes from the beginning of the input buffer, it points to the start of SLBB B, set by driver for skip frame support
198     uint16_t    UPD_AvcImgStateOffset_U16;            // offset in bytes from the beginning of SLBB A
199     uint16_t    reserved_u16;
200     uint32_t    NumOfSlice;                           // PAK output via MMIO
201 
202                                                     /* HME distortion based QP adjustment */
203     uint16_t    AveHmeDist_U16;                       // default: 0, in HME detection kernel output
204     uint8_t     HmeDistAvailable_U8;                  // 0: disabled, 1: enabled
205     uint8_t     DisableDMA;                           // default =0, use DMA data transfer; =1, use regular region read/write
206     uint16_t    AdditionalFrameSize_U16;              // for slice size control improvement
207     uint8_t     AddNALHeaderSizeInternally_U8;
208     uint8_t     UPD_RoiQpViaForceQp_U8;               // HuC does not update StreamIn Buffer, 1: HuC updates StreamIn Buffer
209     uint32_t    CABACZeroInsertionSize_U32;           // PAK output via MMIO
210     uint32_t    MiniFramePaddingSize_U32;             // PAK output via MMIO
211     uint16_t    UPD_WidthInMB_U16;                    // width in MB
212     uint16_t    UPD_HeightInMB_U16;                   // height in MB
213     int8_t      UPD_ROIQpDelta_I8[8];                 // Application specified ROI QP Adjustment for Zone0, Zone1, Zone2 and Zone3, Zone4, Zone5, Zone6 and Zone7.
214 
215     //HME--Offset values need to be a multiple of 4 in order to be aligned to the 4x4 HME block for downscaled 4X HME precision and HME--Offset range is [-128,127]
216     int8_t       HME0XOffset_I8;    // default = 32, Frame level X offset from the co-located (0, 0) location for HME0.
217     int8_t       HME0YOffset_I8;    // default = 24, Frame level Y offset from the co-located (0, 0) location for HME0.
218     int8_t       HME1XOffset_I8;    // default = -32, Frame level X offset from the co-located (0, 0) location for HME1.
219     int8_t       HME1YOffset_I8;    // default = -24, Frame level Y offset from the co-located (0, 0) location for HME1.
220     uint8_t      MOTION_ADAPTIVE_G4;
221     uint8_t      EnableLookAhead;
222     uint8_t      UPD_LA_Data_Offset_U8;
223     uint8_t      UPD_CQMEnabled_U8;  // 0 indicates CQM is disabled for current frame; otherwise CQM is enabled.
224     uint32_t     UPD_LA_TargetSize_U32;     // target frame size in lookahead BRC (if EnableLookAhead == 1) or TCBRC mode. If zero, lookahead BRC or TCBRC is disabled.
225     uint32_t     UPD_LA_TargetFulness_U32;  // target VBV buffer fulness in lookahead BRC mode (if EnableLookAhead == 1).
226     uint8_t      UPD_Delta_U8;              // delta QP of pyramid
227     uint8_t      UPD_ROM_CURRENT_U8;        // ROM average of current frame
228     uint8_t      UPD_ROM_ZERO_U8;           // ROM zero percentage (255 is 100%)
229     uint8_t      UPD_TCBRC_SCENARIO_U8;
230     uint8_t      RSVD2[12];
231 };
232 
233 // CURBE for Static Frame Detection kernel
234 class CodechalVdencAvcStateG11::SfdCurbe
235 {
236 public:
237     union
238     {
239         struct
240         {
241             uint32_t   VDEncModeDisable                    : MOS_BITFIELD_BIT(0);
242             uint32_t   BRCModeEnable                       : MOS_BITFIELD_BIT(1);
243             uint32_t   SliceType                           : MOS_BITFIELD_RANGE(2, 3);
244             uint32_t                                       : MOS_BITFIELD_BIT(4);
245             uint32_t   StreamInType                        : MOS_BITFIELD_RANGE(5, 8);
246             uint32_t   EnableAdaptiveMvStreamIn            : MOS_BITFIELD_BIT(9);
247             uint32_t                                       : MOS_BITFIELD_BIT(10);
248             uint32_t   EnableIntraCostScalingForStaticFrame: MOS_BITFIELD_BIT(11);
249             uint32_t   Reserved                            : MOS_BITFIELD_RANGE(12, 31);
250         };
251         struct
252         {
253             uint32_t   Value;
254         };
255     } m_dw0;
256 
257     union
258     {
259         struct
260         {
261             uint32_t   QPValue                             : MOS_BITFIELD_RANGE(0, 7);
262             uint32_t   NumOfRefs                           : MOS_BITFIELD_RANGE(8, 15);
263             uint32_t   HMEStreamInRefCost                  : MOS_BITFIELD_RANGE(16, 23);
264             uint32_t   Reserved                            : MOS_BITFIELD_RANGE(24, 31);
265         };
266         struct
267         {
268             uint32_t   Value;
269         };
270     } m_dw1;
271 
272     union
273     {
274         struct
275         {
276             uint32_t   FrameWidthInMBs                     : MOS_BITFIELD_RANGE(0, 15);     // round-up to 4-MB aligned
277             uint32_t   FrameHeightInMBs                    : MOS_BITFIELD_RANGE(16, 31);     // round-up to 4-MB aligned
278         };
279         struct
280         {
281             uint32_t   Value;
282         };
283     } m_dw2;
284 
285     union
286     {
287         struct
288         {
289             uint32_t   LargeMvThresh                       : MOS_BITFIELD_RANGE(0, 31);
290         };
291         struct
292         {
293             uint32_t   Value;
294         };
295     } m_dw3;
296 
297     union
298     {
299         struct
300         {
301             uint32_t   TotalLargeMvThreshold               : MOS_BITFIELD_RANGE(0, 31);
302         };
303         struct
304         {
305             uint32_t   Value;
306         };
307     } m_dw4;
308 
309     union
310     {
311         struct
312         {
313             uint32_t   ZMVThreshold                        : MOS_BITFIELD_RANGE(0, 31);
314         };
315         struct
316         {
317             uint32_t   Value;
318         };
319     } m_dw5;
320 
321     union
322     {
323         struct
324         {
325             uint32_t   TotalZMVThreshold                   : MOS_BITFIELD_RANGE(0, 31);
326         };
327         struct
328         {
329             uint32_t   Value;
330         };
331     } m_dw6;
332 
333     union
334     {
335         struct
336         {
337             uint32_t   MinDistThreshold                    : MOS_BITFIELD_RANGE(0, 31);
338         };
339         struct
340         {
341             uint32_t   Value;
342         };
343     } m_dw7;
344 
345     uint8_t m_costTable[52];
346 
347     union
348     {
349         struct
350         {
351             uint32_t   ActualWidthInMB                     : MOS_BITFIELD_RANGE(0, 15);
352             uint32_t   ActualHeightInMB                    : MOS_BITFIELD_RANGE(16, 31);
353         };
354         struct
355         {
356             uint32_t   Value;
357         };
358     } m_dw21;
359 
360     union
361     {
362         struct
363         {
364             uint32_t   Reserved                            : MOS_BITFIELD_RANGE(0, 31);
365         };
366         struct
367         {
368             uint32_t   Value;
369         };
370     } m_dw22;
371 
372     union
373     {
374         struct
375         {
376             uint32_t   Reserved                            : MOS_BITFIELD_RANGE(0, 31);
377         };
378         struct
379         {
380             uint32_t   Value;
381         };
382     } m_dw23;
383 
384     union
385     {
386         struct
387         {
388             uint32_t   VDEncInputImagStateIndex            : MOS_BITFIELD_RANGE(0, 31);      // used in VDEnc CQP mode
389         };
390         struct
391         {
392             uint32_t   Value;
393         };
394     } m_dw24;
395 
396     union
397     {
398         struct
399         {
400             uint32_t   Reserved                            : MOS_BITFIELD_RANGE(0, 31);
401         };
402         struct
403         {
404             uint32_t   Value;
405         };
406     } m_dw25;
407 
408     union
409     {
410         struct
411         {
412             uint32_t   MVDataSurfaceIndex                  : MOS_BITFIELD_RANGE(0, 31);      // contains HME MV Data generated by HME kernel
413         };
414         struct
415         {
416             uint32_t   Value;
417         };
418     } m_dw26;
419 
420     union
421     {
422         struct
423         {
424             uint32_t   InterDistortionSurfaceIndex         : MOS_BITFIELD_RANGE(0, 31);      // contains HME Inter Distortion generated by HME kernel
425         };
426         struct
427         {
428             uint32_t   Value;
429         };
430     } m_dw27;
431 
432     union
433     {
434         struct
435         {
436             uint32_t   OutputDataSurfaceIndex              : MOS_BITFIELD_RANGE(0, 31);
437         };
438         struct
439         {
440             uint32_t   Value;
441         };
442     } m_dw28;
443 
444     union
445     {
446         struct
447         {
448             uint32_t   VDEncOutputImagStateIndex           : MOS_BITFIELD_RANGE(0, 31);
449         };
450         struct
451         {
452             uint32_t   Value;
453         };
454     } m_dw29;
455 
SfdCurbe()456     SfdCurbe()
457     {
458         m_dw0.Value  = 0;
459         m_dw1.Value  = 0;
460         m_dw2.Value  = 0;
461         m_dw3.Value  = 0;
462         m_dw4.Value  = 0;
463         m_dw5.Value  = 0;
464         m_dw6.Value  = 0;
465         m_dw7.Value  = 0;
466         m_dw21.Value = 0;
467         m_dw22.Value = 0;
468         m_dw23.Value = 0;
469         m_dw24.Value = 0;
470         m_dw25.Value = 0;
471         m_dw26.Value = 0;
472         m_dw27.Value = 0;
473         m_dw28.Value = 0;
474         m_dw29.Value = 0;
475 
476         for (uint8_t i = 0; i < 52; i++)
477         {
478             m_costTable[i] = 0;
479         }
480     };
481 };
482 
483 enum SfdBindingTableOffset
484 {
485     sfdVdencInputImageState = 0,
486     sfdMvDataSurface = 1,
487     sfdInterDistortionSurface = 2,
488     sfdOutputDataSurface = 3,
489     sfdVdencOutputImageState = 4,
490     sfdNumSurfaces = 5
491 };
492 
493 const uint32_t CodechalVdencAvcStateG11::m_mvCostSkipBiasQPel[3][8] =
494 {
495     // for normal case
496     { 0, 6, 6, 9, 10, 13, 14, 16 },
497     // for QP = 47,48,49
498     { 0, 6, 6, 6, 6, 7, 8, 8 },
499     // for QP = 50,51
500     { 0, 6, 6, 6, 6, 7, 7, 7 }
501 };
502 
503 const uint32_t CodechalVdencAvcStateG11::m_hmeCostDisplayRemote[8][CODEC_AVC_NUM_QP] =
504 {
505     //mv=0
506     {
507         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[0 ~12]
508         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[13 ~25]
509         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[26 ~38]
510         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0      //QP=[39 ~51]
511     },
512     //mv<=16
513     {
514         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[0 ~12]
515         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[13 ~25]
516         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[26 ~38]
517         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0      //QP=[39 ~51]
518     },
519     //mv<=32
520     {
521         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[0 ~12]
522         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[13 ~25]
523         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[26 ~38]
524         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1      //QP=[39 ~51]
525     },
526     //mv<=64
527     {
528         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[0 ~12]
529         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[13 ~25]
530         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[26 ~38]
531         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5      //QP=[39 ~51]
532     },
533     //mv<=128
534     {
535         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[0 ~12]
536         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[13 ~25]
537         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[26 ~38]
538         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10      //QP=[39 ~51]
539     },
540     //mv<=256
541     {
542         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[0 ~12]
543         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[13 ~25]
544         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[26 ~38]
545         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10      //QP=[39 ~51]
546     },
547     //mv<=512
548     {
549         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[0 ~12]
550         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[13 ~25]
551         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[26 ~38]
552         20, 20, 20, 20, 20, 30, 30, 30, 30, 30, 30, 30, 30      //QP=[39 ~51]
553     },
554     //mv<=1024
555     {
556         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[0 ~12]
557         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[13 ~25]
558         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[26 ~38]
559         20, 20, 20, 30, 40, 50, 50, 50, 50, 50, 50, 50, 50      //QP=[39 ~51]
560     }
561 };
562 
563 const uint32_t CodechalVdencAvcStateG11::m_hmeCost[8][CODEC_AVC_NUM_QP] =
564 {
565     //mv=0
566     { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,       //QP=[0 ~12]
567     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,         //QP=[13 ~25]
568     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,         //QP=[26 ~38]
569     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0          //QP=[39 ~51]
570     },
571     //mv<=16
572     { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,       //QP=[0 ~12]
573     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,         //QP=[13 ~25]
574     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,         //QP=[26 ~38]
575     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0          //QP=[39 ~51]
576     },
577     //mv<=32
578     { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,       //QP=[0 ~12]
579     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,         //QP=[13 ~25]
580     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,         //QP=[26 ~38]
581     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1          //QP=[39 ~51]
582     },
583     //mv<=64
584     { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,       //QP=[0 ~12]
585     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,         //QP=[13 ~25]
586     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,         //QP=[26 ~38]
587     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5          //QP=[39 ~51]
588     },
589     //mv<=128
590     { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,       //QP=[0 ~12]
591     10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,         //QP=[13 ~25]
592     10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,         //QP=[26 ~38]
593     10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10          //QP=[39 ~51]
594     },
595     //mv<=256
596     { 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,       //QP=[0 ~12]
597     10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,         //QP=[13 ~25]
598     10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,         //QP=[26 ~38]
599     10, 10, 10, 10, 20, 30, 40, 50, 50, 50, 50, 50, 50          //QP=[39 ~51]
600     },
601     //mv<=512
602     { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,       //QP=[0 ~12]
603     20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,         //QP=[13 ~25]
604     20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,         //QP=[26 ~38]
605     20, 20, 20, 40, 60, 80, 100, 100, 100, 100, 100, 100, 100   //QP=[39 ~51]
606     },
607     //mv<=1024
608     { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,       //QP=[0 ~12]
609     20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,         //QP=[13 ~25]
610     20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,         //QP=[26 ~38]
611     20, 20, 30, 50, 100, 200, 200, 200, 200, 200, 200, 200, 200 //QP=[39 ~51]
612     }
613 };
614 
615 const int8_t CodechalVdencAvcStateG11::m_brcInitDistQpDeltaI8[4] =
616 {
617     0, 0, 0, 0
618 };
619 
620 const int8_t CodechalVdencAvcStateG11::m_brcInitDistQpDeltaI8LowDelay[4] =
621 {
622     -5, -2, 2, 5
623 };
624 
GetKernelHeaderAndSize(void * binary,EncOperation operation,uint32_t krnStateIdx,void * krnHeader,uint32_t * krnSize)625 MOS_STATUS CodechalVdencAvcStateG11::GetKernelHeaderAndSize(
626     void                         *binary,
627     EncOperation                 operation,
628     uint32_t                     krnStateIdx,
629     void                         *krnHeader,
630     uint32_t                     *krnSize)
631 {
632     CODECHAL_ENCODE_FUNCTION_ENTER;
633 
634     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
635 
636     CODECHAL_ENCODE_CHK_NULL_RETURN(binary);
637     CODECHAL_ENCODE_CHK_NULL_RETURN(krnHeader);
638     CODECHAL_ENCODE_CHK_NULL_RETURN(krnSize);
639 
640     auto kernelHeaderTable = (KernelHeader *)binary;
641     PCODECHAL_KERNEL_HEADER invalidEntry = &(kernelHeaderTable->m_weightedPrediction) + 1;
642     PCODECHAL_KERNEL_HEADER nextKrnHeader = nullptr;
643     PCODECHAL_KERNEL_HEADER currKrnHeader = nullptr;
644 
645     if (operation == ENC_BRC)
646     {
647         currKrnHeader = &kernelHeaderTable->m_initFrameBrc;
648     }
649     else if (operation == ENC_MBENC)
650     {
651         currKrnHeader = &kernelHeaderTable->m_mbEncQltyI;
652     }
653     else if (operation == ENC_MBENC_ADV)
654     {
655         currKrnHeader = &kernelHeaderTable->m_mbEncAdvI;
656     }
657     else if (operation == ENC_WP)
658     {
659         currKrnHeader = &kernelHeaderTable->m_weightedPrediction;
660     }
661     else
662     {
663         CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
664         return MOS_STATUS_INVALID_PARAMETER;
665     }
666 
667     currKrnHeader += krnStateIdx;
668     *((PCODECHAL_KERNEL_HEADER)krnHeader) = *currKrnHeader;
669 
670     nextKrnHeader = (currKrnHeader + 1);
671     uint32_t nextKrnOffset = *krnSize;
672     if (nextKrnHeader < invalidEntry)
673     {
674         nextKrnOffset = nextKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT;
675     }
676     *krnSize = nextKrnOffset - (currKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
677 
678     return eStatus;
679 }
680 
CodechalVdencAvcStateG11(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)681 CodechalVdencAvcStateG11::CodechalVdencAvcStateG11(
682         CodechalHwInterface *   hwInterface,
683         CodechalDebugInterface *debugInterface,
684         PCODECHAL_STANDARD_INFO standardInfo)  : CodechalVdencAvcState(hwInterface, debugInterface, standardInfo),
685     m_sinlgePipeVeState(nullptr)
686 {
687     CODECHAL_ENCODE_FUNCTION_ENTER;
688 
689     CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_osInterface);
690 
691 #if defined(ENABLE_KERNELS)
692     m_kernelBase = (uint8_t*)IGCODECKRN_G11;
693 #endif
694     m_cmKernelEnable = true;
695     m_mbStatsSupported = true; //Starting from GEN9
696 
697     pfnGetKernelHeaderAndSize    = CodechalVdencAvcStateG11::GetKernelHeaderAndSize;
698 
699     m_vdencBrcInitDmemBufferSize   = sizeof(BrcInitDmem);
700     m_vdencBrcUpdateDmemBufferSize = sizeof(BrcUpdateDmem);
701     m_vdencBrcNumOfSliceOffset = CODECHAL_OFFSETOF(BrcUpdateDmem, NumOfSlice);
702 
703     // Virtual Engine is enabled in default.
704     Mos_SetVirtualEngineSupported(m_osInterface, true);
705 
706     m_vdboxOneDefaultUsed = true;
707     m_nonNativeBrcRoiSupported = true;
708     m_brcAdaptiveRegionBoostSupported = true;
709 
710     m_hmeSupported   = true;
711     m_16xMeSupported = true;
712     m_32xMeSupported = true;
713 
714     m_osInterface->pfnVirtualEngineSupported(m_osInterface, false, true);
715 
716     CODECHAL_DEBUG_TOOL(
717         CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_encodeParState = MOS_New(CodechalDebugEncodeParG11, this));
718         CreateAvcPar();
719     )
720 }
721 
~CodechalVdencAvcStateG11()722 CodechalVdencAvcStateG11::~CodechalVdencAvcStateG11()
723 {
724     CODECHAL_ENCODE_FUNCTION_ENTER;
725 
726     if (m_sinlgePipeVeState)
727     {
728         MOS_FreeMemAndSetNull(m_sinlgePipeVeState);
729     }
730 
731     CODECHAL_DEBUG_TOOL(
732         DestroyAvcPar();
733         MOS_Delete(m_encodeParState);
734     )
735 }
736 
InitializeState()737 MOS_STATUS CodechalVdencAvcStateG11::InitializeState()
738 {
739     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
740 
741     CODECHAL_ENCODE_FUNCTION_ENTER;
742 
743     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencAvcState::InitializeState());
744 
745     m_sliceSizeStreamoutSupported = true;
746     m_useHwScoreboard = false;
747     m_useCommonKernel = true;
748 
749     if (MOS_VE_SUPPORTED(m_osInterface))
750     {
751         m_sinlgePipeVeState = (PCODECHAL_ENCODE_SINGLEPIPE_VIRTUALENGINE_STATE)MOS_AllocAndZeroMemory(sizeof(CODECHAL_ENCODE_SINGLEPIPE_VIRTUALENGINE_STATE));
752         CODECHAL_ENCODE_CHK_NULL_RETURN(m_sinlgePipeVeState);
753         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeSinglePipeVE_InitInterface(m_hwInterface, m_sinlgePipeVeState));
754     }
755 
756     return eStatus;
757 }
758 
SetAndPopulateVEHintParams(PMOS_COMMAND_BUFFER cmdBuffer)759 MOS_STATUS CodechalVdencAvcStateG11::SetAndPopulateVEHintParams(
760     PMOS_COMMAND_BUFFER  cmdBuffer)
761 {
762     MOS_STATUS                      eStatus = MOS_STATUS_SUCCESS;
763 
764     CODECHAL_ENCODE_FUNCTION_ENTER;
765 
766     if (!MOS_VE_SUPPORTED(m_osInterface))
767     {
768         return eStatus;
769     }
770 
771     if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
772     {
773         MOS_VIRTUALENGINE_SET_PARAMS  vesetParams;
774         MOS_ZeroMemory(&vesetParams, sizeof(vesetParams));
775         vesetParams.bNeedSyncWithPrevious = true;
776         vesetParams.bSFCInUse = false;
777         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeSinglePipeVE_SetHintParams(m_sinlgePipeVeState, &vesetParams));
778     }
779     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeSinglePipeVE_PopulateHintParams(m_sinlgePipeVeState, cmdBuffer, true));
780 
781     return eStatus;
782 }
783 
784 
SetGpuCtxCreatOption()785 MOS_STATUS CodechalVdencAvcStateG11::SetGpuCtxCreatOption()
786 {
787     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
788 
789     CODECHAL_ENCODE_FUNCTION_ENTER;
790 
791     if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
792     {
793         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SetGpuCtxCreatOption());
794     }
795     else
796     {
797         m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS_ENHANCED);
798         CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt);
799 
800         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeSinglePipeVE_ConstructParmsForGpuCtxCreation(
801             m_sinlgePipeVeState,
802             (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
803     }
804 
805     return eStatus;
806 }
807 
UserFeatureKeyReport()808 MOS_STATUS CodechalVdencAvcStateG11::UserFeatureKeyReport()
809 {
810     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
811 
812     CODECHAL_ENCODE_FUNCTION_ENTER;
813 
814     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencAvcState::UserFeatureKeyReport());
815 
816 #if (_DEBUG || _RELEASE_INTERNAL)
817 
818     // VE2.0 Reporting
819     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENABLE_ENCODE_VE_CTXSCHEDULING_ID, MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface), m_osInterface->pOsContext);
820 
821 #endif // _DEBUG || _RELEASE_INTERNAL
822     return eStatus;
823 }
824 
ExecuteSliceLevel()825 MOS_STATUS CodechalVdencAvcStateG11::ExecuteSliceLevel()
826 {
827     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
828 
829     CODECHAL_ENCODE_FUNCTION_ENTER;
830 
831     CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
832 
833     auto cpInterface = m_hwInterface->GetCpInterface();
834     auto avcSlcParams = m_avcSliceParams;
835     auto avcPicParams = m_avcPicParams[avcSlcParams->pic_parameter_set_id];
836     auto avcSeqParams = m_avcSeqParams[avcPicParams->seq_parameter_set_id];
837     auto slcData = m_slcData;
838 
839     // *** Temporarily commented until ULT fully support multislice ROW mode
840 
841     // For use with the single task phase implementation
842     //if (m_sliceStructCaps != CODECHAL_SLICE_STRUCT_ARBITRARYMBSLICE)
843     //{
844     //    uint32_t numSlc = (m_frameFieldHeightInMb + m_sliceHeight - 1) / m_sliceHeight;
845 
846     //    if (numSlc != m_numSlices)
847     //    {
848     //        return MOS_STATUS_INVALID_PARAMETER;
849     //    }
850     //}
851 
852     bool useBatchBufferForPakSlices = false;
853     if (m_singleTaskPhaseSupported  && m_singleTaskPhaseSupportedInPak)
854     {
855         if (m_currPass == 0)
856         {
857             // The same buffer is used for all slices for all passes.
858             uint32_t batchBufferForPakSlicesSize =
859                 (m_numPasses + 1) * m_numSlices * m_pakSliceSize;
860             if (batchBufferForPakSlicesSize >
861                 (uint32_t)m_batchBufferForPakSlices[m_currRecycledBufIdx].iSize)
862             {
863                 if (m_batchBufferForPakSlices[m_currRecycledBufIdx].iSize)
864                 {
865                     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReleaseBatchBufferForPakSlices(m_currRecycledBufIdx));
866                 }
867 
868                 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBatchBufferForPakSlices(
869                     m_numSlices,
870                     m_numPasses,
871                     m_currRecycledBufIdx));
872             }
873         }
874         CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_LockBb(
875             m_osInterface,
876             &m_batchBufferForPakSlices[m_currRecycledBufIdx]));
877         useBatchBufferForPakSlices = true;
878     }
879 
880     MOS_COMMAND_BUFFER cmdBuffer;
881     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
882 
883     if (m_osInterface->osCpInterface->IsCpEnabled())
884     {
885         MHW_CP_SLICE_INFO_PARAMS sliceInfoParam;
886         sliceInfoParam.bLastPass = (m_currPass == m_numPasses) ? true : false;
887         CODECHAL_ENCODE_CHK_STATUS_RETURN(cpInterface->SetMfxProtectionState(false, &cmdBuffer, nullptr, &sliceInfoParam));
888 
889         CODECHAL_ENCODE_CHK_STATUS_RETURN(cpInterface->UpdateParams(false));
890     }
891 
892     avcSlcParams = m_avcSliceParams;
893 
894     CODECHAL_ENCODE_AVC_PACK_SLC_HEADER_PARAMS packSlcHeaderParams;
895     packSlcHeaderParams.pBsBuffer = &m_bsBuffer;
896     packSlcHeaderParams.pPicParams = avcPicParams;
897     packSlcHeaderParams.pSeqParams = m_avcSeqParam;
898     packSlcHeaderParams.ppRefList = &(m_refList[0]);
899     packSlcHeaderParams.CurrPic = m_currOriginalPic;
900     packSlcHeaderParams.CurrReconPic = m_currReconstructedPic;
901     packSlcHeaderParams.UserFlags = m_userFlags;
902     packSlcHeaderParams.NalUnitType = m_nalUnitType;
903     packSlcHeaderParams.wPictureCodingType = m_pictureCodingType;
904     packSlcHeaderParams.bVdencEnabled = true;
905 
906     MHW_VDBOX_AVC_SLICE_STATE sliceState;
907     MOS_ZeroMemory(&sliceState, sizeof(sliceState));
908     sliceState.presDataBuffer = &m_resMbCodeSurface;
909     sliceState.pAvcPicIdx = &(m_picIdx[0]);
910     sliceState.pEncodeAvcSeqParams = m_avcSeqParam;
911     sliceState.pEncodeAvcPicParams = avcPicParams;
912     sliceState.pBsBuffer = &m_bsBuffer;
913     sliceState.ppNalUnitParams = m_nalUnitParams;
914     sliceState.bBrcEnabled = false;
915     // Disable Panic mode when min/max QP control is on. kernel may disable it, but disable in driver also.
916     sliceState.bRCPanicEnable = m_panicEnable && (!m_minMaxQpControlEnabled);
917     sliceState.bAcceleratorHeaderPackingCaps = m_encodeParams.bAcceleratorHeaderPackingCaps;
918     sliceState.wFrameFieldHeightInMB = m_frameFieldHeightInMb;
919 
920     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
921     for (uint16_t slcCount = 0; slcCount < m_numSlices; slcCount++)
922     {
923         if (m_currPass == 0)
924         {
925             packSlcHeaderParams.pAvcSliceParams = &avcSlcParams[slcCount];
926             if (m_acceleratorHeaderPackingCaps)
927             {
928                 slcData[slcCount].SliceOffset = m_bsBuffer.SliceOffset;
929                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalAvcEncode_PackSliceHeader(&packSlcHeaderParams));
930                 slcData[slcCount].BitSize = m_bsBuffer.BitSize;
931             }
932             if (m_sliceStructCaps != CODECHAL_SLICE_STRUCT_ARBITRARYMBSLICE)
933             {
934                 slcData[slcCount].CmdOffset = slcCount * m_sliceHeight * m_picWidthInMb * 16 * 4;
935             }
936             else
937             {
938                 slcData[slcCount].CmdOffset = packSlcHeaderParams.pAvcSliceParams->first_mb_in_slice * 16 * 4;
939             }
940         }
941 
942         sliceState.pEncodeAvcSliceParams = &avcSlcParams[slcCount];
943         sliceState.dwDataBufferOffset =
944             m_slcData[slcCount].CmdOffset + m_mbcodeBottomFieldOffset;
945         sliceState.dwOffset = slcData[slcCount].SliceOffset;
946         sliceState.dwLength = slcData[slcCount].BitSize;
947         sliceState.uiSkipEmulationCheckCount = slcData[slcCount].SkipEmulationByteCount;
948         sliceState.dwSliceIndex = (uint32_t)slcCount;
949         sliceState.bFirstPass = (m_currPass == 0);
950         sliceState.bLastPass = (m_currPass == m_numPasses);
951         sliceState.bInsertBeforeSliceHeaders = (slcCount == 0);
952         sliceState.bVdencInUse = true;
953         // App handles tail insertion for VDEnc dynamic slice in non-cp case
954         sliceState.bVdencNoTailInsertion = m_vdencNoTailInsertion;
955 
956         uint32_t batchBufferForPakSlicesStartOffset =
957             (uint32_t)m_batchBufferForPakSlices[m_currRecycledBufIdx].iCurrent;
958 
959         if (useBatchBufferForPakSlices)
960         {
961             sliceState.pBatchBufferForPakSlices =
962                 &m_batchBufferForPakSlices[m_currRecycledBufIdx];
963             sliceState.bSingleTaskPhaseSupported = true;
964             sliceState.dwBatchBufferForPakSlicesStartOffset = batchBufferForPakSlicesStartOffset;
965         }
966 
967         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRounding(m_avcRoundingParams, &sliceState));
968 
969         sliceState.oneOnOneMapping = m_oneOnOneMapping;
970         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendSlice(&cmdBuffer, &sliceState));
971 
972         // Report slice size
973         if (m_presMetadataBuffer != nullptr)
974         {
975             CODECHAL_ENCODE_CHK_STATUS_RETURN(ReportSliceSizeMetaData(m_presMetadataBuffer, &cmdBuffer, slcCount));
976         }
977 
978         // Add dumps for 2nd level batch buffer
979         if (sliceState.bSingleTaskPhaseSupported && !sliceState.bVdencInUse)
980         {
981             CODECHAL_ENCODE_CHK_NULL_RETURN(sliceState.pBatchBufferForPakSlices);
982 
983             CODECHAL_DEBUG_TOOL(
984                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->Dump2ndLvlBatch(
985                     sliceState.pBatchBufferForPakSlices,
986                     CODECHAL_MEDIA_STATE_ENC_NORMAL,
987                     nullptr));
988             )
989         }
990 
991         // For SKL, only the 1st slice state should be programmed for VDENC
992         if (!m_hwInterface->m_isVdencSuperSliceEnabled)
993         {
994             break;
995         }
996         else // For CNL slice state is programmed per Super slice
997         {
998             MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
999             // MfxPipeDone should be set for all super slices except the last super slice and should not be set for tail insertion.
1000             vdPipelineFlushParams.Flags.bWaitDoneMFX =
1001                 (slcCount == (m_numSlices)-1) ? ((m_lastPicInStream || m_lastPicInSeq) ? 0 : 1) : 1;
1002             vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1;
1003             vdPipelineFlushParams.Flags.bFlushVDENC = 1;
1004             vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
1005 
1006             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
1007 
1008             //Do not send MI_FLUSH for last Super slice now
1009             if (slcCount != ((m_numSlices)-1))
1010             {
1011                 // Send MI_FLUSH for every Super slice
1012                 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
1013                 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
1014                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
1015                     &cmdBuffer,
1016                     &flushDwParams));
1017             }
1018         }
1019     }
1020 
1021     if (useBatchBufferForPakSlices)
1022     {
1023         CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_UnlockBb(
1024             m_osInterface,
1025             &m_batchBufferForPakSlices[m_currRecycledBufIdx],
1026             m_lastTaskInPhase));
1027     }
1028 
1029     //Send VDENC WALKER cmd per every frame for SKL
1030     if (!m_hwInterface->m_isVdencSuperSliceEnabled)
1031     {
1032         PMHW_VDBOX_VDENC_WALKER_STATE_PARAMS vdencWalkerStateParams = CreateMhwVdboxVdencWalkerStateParams();
1033         CODECHAL_ENCODE_CHK_NULL_RETURN(vdencWalkerStateParams);
1034         vdencWalkerStateParams->Mode = CODECHAL_ENCODE_MODE_AVC;
1035         vdencWalkerStateParams->pAvcSeqParams = avcSeqParams;
1036         vdencWalkerStateParams->pAvcSlcParams = avcSlcParams;
1037         eStatus = m_vdencInterface->AddVdencWalkerStateCmd(&cmdBuffer, vdencWalkerStateParams);
1038         MOS_Delete(vdencWalkerStateParams);
1039         CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
1040 
1041         MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
1042         // MFXPipeDone should not be set for tail insertion
1043         vdPipelineFlushParams.Flags.bWaitDoneMFX =
1044             (m_lastPicInStream || m_lastPicInSeq) ? 0 : 1;
1045         vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1;
1046         vdPipelineFlushParams.Flags.bFlushVDENC = 1;
1047         vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
1048 
1049         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
1050     }
1051 
1052     // Insert end of sequence/stream if set
1053     if (m_lastPicInStream || m_lastPicInSeq)
1054     {
1055         MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
1056         MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
1057         pakInsertObjectParams.bLastPicInSeq = m_lastPicInSeq;
1058         pakInsertObjectParams.bLastPicInStream = m_lastPicInStream;
1059         pakInsertObjectParams.dwBitSize = 32;   // use dwBitSize for SrcDataEndingBitInclusion
1060         if (m_lastPicInSeq)
1061         {
1062             pakInsertObjectParams.dwLastPicInSeqData = (uint32_t)((1 << 16) | CODECHAL_ENCODE_AVC_NAL_UT_EOSEQ << 24);
1063         }
1064         if (m_lastPicInStream)
1065         {
1066             pakInsertObjectParams.dwLastPicInStreamData = (uint32_t)((1 << 16) | CODECHAL_ENCODE_AVC_NAL_UT_EOSTREAM << 24);
1067         }
1068         pakInsertObjectParams.bHeaderLengthExcludeFrmSize = true;
1069         if (pakInsertObjectParams.bEmulationByteBitsInsert)
1070         {
1071             //Does not matter here, but keeping for consistency
1072             CODECHAL_ENCODE_ASSERTMESSAGE("The emulation prevention bytes are not inserted by the app and are requested to be inserted by HW.");
1073         }
1074         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mfxInterface->AddMfxPakInsertObject(&cmdBuffer, nullptr, &pakInsertObjectParams));
1075     }
1076 
1077     if (m_hwInterface->m_isVdencSuperSliceEnabled)
1078     {
1079         // Send MI_FLUSH with bVideoPipelineCacheInvalidate set to true for last Super slice
1080         MHW_MI_FLUSH_DW_PARAMS flushDwParams;
1081         MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
1082         flushDwParams.bVideoPipelineCacheInvalidate = true;
1083         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
1084             &cmdBuffer,
1085             &flushDwParams));
1086     }
1087 
1088 #if defined(ENABLE_KERNELS)
1089     // On-demand sync for VDEnc StreamIn surface and CSC surface
1090     if (m_currPass == 0)
1091     {
1092         if (m_cscDsState->RequireCsc())
1093         {
1094             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->WaitCscSurface(m_videoContext, true));
1095         }
1096 
1097         if (m_16xMeSupported)
1098         {
1099             auto syncParams = g_cInitSyncParams;
1100             syncParams.GpuContext = m_videoContext;
1101             syncParams.bReadOnly = true;
1102             syncParams.presSyncResource = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
1103             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
1104             m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams);
1105         }
1106     }
1107 #endif
1108 
1109     // Prepare MetaData
1110     if (m_presMetadataBuffer != nullptr)
1111     {
1112         CODECHAL_ENCODE_CHK_STATUS_RETURN(PrepareHWMetaData(m_presMetadataBuffer, &m_pakSliceSizeStreamoutBuffer, &cmdBuffer));
1113     }
1114 
1115     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadMfcStatus(&cmdBuffer));
1116 
1117     if (m_vdencBrcEnabled)
1118     {
1119         CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreNumPasses(
1120             &(m_encodeStatusBuf),
1121             m_miInterface,
1122             &cmdBuffer,
1123             m_currPass));
1124     }
1125 
1126     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
1127 
1128     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1129     {
1130         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
1131     }
1132 
1133     std::string pak_pass = "PAK_PASS" + std::to_string(static_cast<uint32_t>(m_currPass));
1134     CODECHAL_DEBUG_TOOL(
1135         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
1136             &cmdBuffer,
1137             CODECHAL_NUM_MEDIA_STATES,
1138             pak_pass.data()));
1139 
1140     //CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgReplaceAllCommands(
1141     //    m_debugInterface,
1142     //    &cmdBuffer));
1143     )
1144 
1145         m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
1146 
1147     bool renderingFlags = m_videoContextUsesNullHw;
1148 
1149     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
1150     {
1151         // Restore TLB allocation
1152         if (MEDIA_IS_WA(m_waTable, WaTlbAllocationForAvcVdenc))
1153         {
1154             CODECHAL_ENCODE_CHK_STATUS_RETURN(RestoreTLBAllocation(&cmdBuffer, &m_vdencTlbMmioBuffer));
1155         }
1156 
1157         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(&cmdBuffer));
1158 
1159         HalOcaInterface::On1stLevelBBEnd(cmdBuffer, *m_osInterface);
1160         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, renderingFlags));
1161 
1162         CODECHAL_DEBUG_TOOL(
1163             if (m_mmcState)
1164             {
1165                 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
1166             }
1167         )
1168 
1169             if (m_sliceSizeStreamoutSupported)
1170             {
1171                 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1172                     &m_pakSliceSizeStreamoutBuffer,
1173                     CodechalDbgAttr::attrOutput,
1174                     "SliceSizeStreamout",
1175                     CODECHAL_ENCODE_SLICESIZE_BUF_SIZE,
1176                     0,
1177                     CODECHAL_NUM_MEDIA_STATES)));
1178             }
1179 
1180         if ((m_currPass == m_numPasses) &&
1181             m_signalEnc &&
1182             !Mos_ResourceIsNull(&m_resSyncObjectVideoContextInUse))
1183         {
1184             // Check if the signal obj count exceeds max value
1185             if (m_semaphoreObjCount == MOS_MIN(m_semaphoreMaxCount, MOS_MAX_OBJECT_SIGNALED))
1186             {
1187                 auto syncParams = g_cInitSyncParams;
1188                 syncParams.GpuContext = m_renderContext;
1189                 syncParams.presSyncResource = &m_resSyncObjectVideoContextInUse;
1190 
1191                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
1192                 m_semaphoreObjCount--;
1193             }
1194 
1195             // signal semaphore
1196             auto syncParams = g_cInitSyncParams;
1197             syncParams.GpuContext = m_videoContext;
1198             syncParams.presSyncResource = &m_resSyncObjectVideoContextInUse;
1199 
1200             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
1201             m_semaphoreObjCount++;
1202         }
1203     }
1204 
1205     CODECHAL_DEBUG_TOOL(
1206         // here add the dump buffer for PAK statistics.
1207         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1208             &m_pakStatsBufferFull[m_currRecycledBufIdx],
1209             CodechalDbgAttr::attrPakOutput,
1210             "MB and FrameLevel PAK staistics vdenc",
1211             m_vdencBrcPakStatsBufferSize + m_picWidthInMb * m_picHeightInMb * 64,   //size
1212             0, //offset
1213             CODECHAL_MEDIA_STATE_16X_ME));
1214     )
1215 
1216     if (m_vdencBrcEnabled)
1217     {
1218         CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(false));
1219         CODECHAL_DEBUG_TOOL(DumpEncodeImgStats(nullptr));
1220     }
1221 
1222     // Reset parameters for next PAK execution
1223     if (m_currPass == m_numPasses)
1224     {
1225         if (!m_singleTaskPhaseSupported)
1226         {
1227             m_osInterface->pfnResetPerfBufferID(m_osInterface);
1228         }
1229 
1230         m_newPpsHeader = 0;
1231         m_newSeqHeader = 0;
1232     }
1233 
1234     CODECHAL_DEBUG_TOOL(
1235         CODECHAL_ENCODE_CHK_STATUS_RETURN(PopulateSliceStateParam(
1236             m_adaptiveRoundingInterEnable,
1237             &sliceState));
1238 
1239     CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpFrameParFile());)
1240 
1241         return eStatus;
1242 }
1243 
InitKernelStateSFD()1244 MOS_STATUS CodechalVdencAvcStateG11::InitKernelStateSFD()
1245 {
1246     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1247 
1248     CODECHAL_ENCODE_FUNCTION_ENTER;
1249 
1250     auto renderEngineInterface = m_hwInterface->GetRenderInterface();
1251     auto stateHeapInterface = m_renderEngineInterface->m_stateHeapInterface;
1252     CODECHAL_ENCODE_CHK_NULL_RETURN(stateHeapInterface);
1253 
1254     uint8_t* kernelBinary;
1255     uint32_t kernelSize;
1256 
1257     MOS_STATUS status = CodecHalGetKernelBinaryAndSize(m_kernelBase, m_kuidCommon, &kernelBinary, &kernelSize);
1258     CODECHAL_ENCODE_CHK_STATUS_RETURN(status);
1259 
1260     CODECHAL_KERNEL_HEADER currKrnHeader;
1261     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG11(
1262                 kernelBinary,
1263                 ENC_SFD,
1264                 0,
1265                 (void*)&currKrnHeader,
1266                 &kernelSize));
1267 
1268     auto kernelStatePtr                            = m_sfdKernelState;
1269     kernelStatePtr->KernelParams.iBTCount = sfdNumSurfaces;
1270     kernelStatePtr->KernelParams.iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
1271     kernelStatePtr->KernelParams.iCurbeLength = sizeof(SfdCurbe);
1272     kernelStatePtr->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
1273     kernelStatePtr->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
1274     kernelStatePtr->KernelParams.iIdCount = 1;
1275     kernelStatePtr->KernelParams.iInlineDataLength = 0;
1276 
1277     kernelStatePtr->dwCurbeOffset = stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
1278     kernelStatePtr->KernelParams.pBinary = kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1279     kernelStatePtr->KernelParams.iSize = kernelSize;
1280 
1281     CODECHAL_ENCODE_CHK_STATUS_RETURN(stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
1282                 stateHeapInterface,
1283                 kernelStatePtr->KernelParams.iBTCount,
1284                 &kernelStatePtr->dwSshSize,
1285                 &kernelStatePtr->dwBindingTableSize));
1286 
1287     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(stateHeapInterface, kernelStatePtr));
1288 
1289     return eStatus;
1290 }
1291 
CheckSupportedFormat(PMOS_SURFACE surface)1292 bool CodechalVdencAvcStateG11::CheckSupportedFormat(PMOS_SURFACE surface)
1293 {
1294     CODECHAL_ENCODE_FUNCTION_ENTER;
1295 
1296     bool colorFormatSupported = true;
1297     if (IS_Y_MAJOR_TILE_FORMAT(surface->TileType))
1298     {
1299         switch (surface->Format)
1300         {
1301         case Format_NV12:
1302             break;
1303         default:
1304             colorFormatSupported = false;
1305             break;
1306         }
1307     }
1308     else if (surface->TileType == MOS_TILE_LINEAR)
1309     {
1310         switch (surface->Format)
1311         {
1312         case Format_NV12:
1313         case Format_YUY2:
1314         case Format_YUYV:
1315         case Format_YVYU:
1316         case Format_UYVY:
1317         case Format_VYUY:
1318         case Format_AYUV:
1319         case Format_A8R8G8B8:
1320         case Format_A8B8G8R8:
1321             break;
1322         default:
1323             colorFormatSupported = false;
1324             break;
1325         }
1326     }
1327     else
1328     {
1329         colorFormatSupported = false;
1330     }
1331 
1332     return colorFormatSupported;
1333 }
1334 
GetTrellisQuantization(PCODECHAL_ENCODE_AVC_TQ_INPUT_PARAMS params,PCODECHAL_ENCODE_AVC_TQ_PARAMS trellisQuantParams)1335 MOS_STATUS CodechalVdencAvcStateG11::GetTrellisQuantization(PCODECHAL_ENCODE_AVC_TQ_INPUT_PARAMS params, PCODECHAL_ENCODE_AVC_TQ_PARAMS trellisQuantParams)
1336 {
1337     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1338 
1339     CODECHAL_ENCODE_FUNCTION_ENTER;
1340 
1341     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1342     CODECHAL_ENCODE_CHK_NULL_RETURN(trellisQuantParams);
1343 
1344     trellisQuantParams->dwTqEnabled = TrellisQuantizationEnable[params->ucTargetUsage];
1345     trellisQuantParams->dwTqRounding = trellisQuantParams->dwTqEnabled ? TrellisQuantizationRounding[params->ucTargetUsage] : 0;
1346 
1347     return eStatus;
1348 }
1349 
AddHucOutputRegistersHandling(MmioRegistersHuc * mmioRegisters,PMOS_COMMAND_BUFFER cmdBuffer,bool addToEncodeStatus)1350 MOS_STATUS CodechalVdencAvcStateG11::AddHucOutputRegistersHandling(
1351     MmioRegistersHuc*   mmioRegisters,
1352     PMOS_COMMAND_BUFFER cmdBuffer,
1353     bool                addToEncodeStatus)
1354 {
1355     CODECHAL_ENCODE_FUNCTION_ENTER;
1356 
1357     CODECHAL_ENCODE_CHK_NULL_RETURN(mmioRegisters);
1358     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
1359 
1360     return StoreHucErrorStatus(mmioRegisters, cmdBuffer, addToEncodeStatus);
1361 }
1362 
SetDmemHuCBrcInitReset()1363 MOS_STATUS CodechalVdencAvcStateG11::SetDmemHuCBrcInitReset()
1364 {
1365     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1366 
1367     CODECHAL_ENCODE_FUNCTION_ENTER;
1368 
1369     // Setup BRC DMEM
1370     MOS_LOCK_PARAMS lockFlagsWriteOnly;
1371     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1372     lockFlagsWriteOnly.WriteOnly = 1;
1373     auto dmem                    = (BrcInitDmem *)m_osInterface->pfnLockResource(
1374         m_osInterface, &m_resVdencBrcInitDmemBuffer[m_currRecycledBufIdx], &lockFlagsWriteOnly);
1375 
1376     CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
1377     MOS_ZeroMemory(dmem, sizeof(BrcInitDmem));
1378 
1379     SetDmemHuCBrcInitResetImpl<BrcInitDmem>(dmem);
1380 
1381     // fractional QP enable for extended rho domain
1382     dmem->INIT_FracQPEnable_U8 = m_lookaheadDepth > 0 ? 0 : (uint8_t)m_vdencInterface->IsRhoDomainStatsEnabled();
1383 
1384     dmem->INIT_SinglePassOnly = m_vdencSinglePassEnable;
1385 
1386     if (m_avcSeqParam->ScenarioInfo == ESCENARIO_GAMESTREAMING)
1387     {
1388         if (m_avcSeqParam->RateControlMethod == RATECONTROL_VBR)
1389         {
1390             m_avcSeqParam->MaxBitRate = m_avcSeqParam->TargetBitRate;
1391         }
1392 
1393         // Disable delta QP adaption for non-VCM/ICQ/LowDelay until we have better algorithm
1394         if ((m_avcSeqParam->RateControlMethod != RATECONTROL_VCM) &&
1395             (m_avcSeqParam->RateControlMethod != RATECONTROL_ICQ) &&
1396             (m_avcSeqParam->FrameSizeTolerance != EFRAMESIZETOL_EXTREMELY_LOW))
1397         {
1398             dmem->INIT_DeltaQP_Adaptation_U8 = 0;
1399         }
1400 
1401         dmem->INIT_New_DeltaQP_Adaptation_U8 = 1;
1402     }
1403 
1404     if (((m_avcSeqParam->TargetUsage & 0x07) == TARGETUSAGE_BEST_SPEED) &&
1405         (m_avcSeqParam->FrameWidth >= m_singlePassMinFrameWidth) &&
1406         (m_avcSeqParam->FrameHeight >= m_singlePassMinFrameHeight) &&
1407         (m_avcSeqParam->FramesPer100Sec >= m_singlePassMinFramePer100s))
1408     {
1409         dmem->INIT_SinglePassOnly = true;
1410     }
1411 
1412     dmem->INIT_LookaheadDepth_U8 = m_lookaheadDepth;
1413 
1414     //Override the DistQPDelta.
1415     if (m_mbBrcEnabled)
1416     {
1417         if (m_avcSeqParam->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)
1418         {
1419             MOS_SecureMemcpy(dmem->INIT_DistQPDelta_I8, 4 * sizeof(int8_t), (void*)m_brcInitDistQpDeltaI8LowDelay, 4 * sizeof(int8_t));
1420         }
1421         else
1422         {
1423             MOS_SecureMemcpy(dmem->INIT_DistQPDelta_I8, 4 * sizeof(int8_t), (void*)m_brcInitDistQpDeltaI8, 4 * sizeof(int8_t));
1424         }
1425     }
1426 
1427     CODECHAL_DEBUG_TOOL(
1428         CODECHAL_ENCODE_CHK_STATUS_RETURN(PopulateBrcInitParam(
1429             dmem));
1430     )
1431 
1432     m_osInterface->pfnUnlockResource(m_osInterface, &m_resVdencBrcInitDmemBuffer[m_currRecycledBufIdx]);
1433 
1434     return eStatus;
1435 }
1436 
SetDmemHuCBrcUpdate()1437 MOS_STATUS CodechalVdencAvcStateG11::SetDmemHuCBrcUpdate()
1438 {
1439     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1440 
1441     CODECHAL_ENCODE_FUNCTION_ENTER;
1442 
1443     // Program update DMEM
1444     MOS_LOCK_PARAMS lockFlags;
1445     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1446     lockFlags.WriteOnly = 1;
1447     auto dmem           = (BrcUpdateDmem *)m_osInterface->pfnLockResource(
1448         m_osInterface, &m_resVdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][m_currPass], &lockFlags);
1449     CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
1450     SetDmemHuCBrcUpdateImpl<BrcUpdateDmem>(dmem);
1451 
1452     MOS_LOCK_PARAMS lockFlagsReadOnly;
1453     MOS_ZeroMemory(&lockFlagsReadOnly, sizeof(MOS_LOCK_PARAMS));
1454     lockFlagsReadOnly.ReadOnly = 1;
1455     auto initDmem              = (BrcInitDmem *)m_osInterface->pfnLockResource(
1456         m_osInterface, &m_resVdencBrcInitDmemBuffer[m_currRecycledBufIdx], &lockFlagsReadOnly);
1457     CODECHAL_ENCODE_CHK_NULL_RETURN(initDmem);
1458 
1459     if (initDmem->INIT_AdaptiveHMEExtensionEnable_U8)
1460     {
1461         dmem->HME0XOffset_I8 = 32;
1462         dmem->HME0YOffset_I8 = 24;
1463         dmem->HME1XOffset_I8 = -32;
1464         dmem->HME1YOffset_I8 = -24;
1465     }
1466 
1467     m_osInterface->pfnUnlockResource(m_osInterface, &m_resVdencBrcInitDmemBuffer[m_currRecycledBufIdx]);
1468 
1469     if (m_16xMeSupported && (m_pictureCodingType == P_TYPE))
1470     {
1471         dmem->HmeDistAvailable_U8 = 1;
1472     }
1473     dmem->UPD_WidthInMB_U16 = m_picWidthInMb;
1474     dmem->UPD_HeightInMB_U16 = m_picHeightInMb;
1475 
1476     dmem->MOTION_ADAPTIVE_G4 = (m_avcSeqParam->ScenarioInfo == ESCENARIO_GAMESTREAMING) || ((m_avcPicParam->TargetFrameSize > 0) && (m_lookaheadDepth == 0)); // GS or TCBRC
1477     dmem->UPD_CQMEnabled_U8  = m_avcSeqParam->seq_scaling_matrix_present_flag || m_avcPicParam->pic_scaling_matrix_present_flag;
1478 
1479     dmem->UPD_LA_TargetSize_U32 = m_avcPicParam->TargetFrameSize << 3;
1480 
1481     if (m_lookaheadDepth > 0)
1482     {
1483         dmem->EnableLookAhead = 1;
1484         dmem->UPD_LA_TargetFulness_U32 = m_targetBufferFulness;
1485         dmem->UPD_Delta_U8 = m_avcPicParam->QpModulationStrength;
1486     }
1487 
1488     dmem->UPD_TCBRC_SCENARIO_U8 = m_avcSeqParam->bAutoMaxPBFrameSizeForSceneChange;
1489 
1490     CODECHAL_DEBUG_TOOL(
1491         CODECHAL_ENCODE_CHK_STATUS_RETURN(PopulateBrcUpdateParam(
1492             dmem));
1493     )
1494 
1495     m_osInterface->pfnUnlockResource(m_osInterface, &(m_resVdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][m_currPass]));
1496 
1497     return eStatus;
1498 }
1499 
LoadMvCost(uint8_t qp)1500 MOS_STATUS CodechalVdencAvcStateG11::LoadMvCost(uint8_t qp)
1501 {
1502     CODECHAL_ENCODE_FUNCTION_ENTER;
1503 
1504     for (uint8_t i=0; i< 8; i++)
1505     {
1506         m_vdEncMvCost[i] = Map44LutValue((uint32_t)(m_mvCostSkipBiasQPel[0][i]), 0x6f);
1507     }
1508 
1509     if (!m_vdencBrcEnabled)
1510     {
1511         if (qp == 47 || qp == 48 || qp == 49)
1512         {
1513             for (uint8_t i = 3; i < 8; i++)
1514             {
1515                 m_vdEncMvCost[i] = Map44LutValue((uint32_t)(m_mvCostSkipBiasQPel[1][i]), 0x6f);
1516             }
1517         }
1518         if (qp == 50 || qp == 51)
1519         {
1520             for (uint8_t i = 3; i < 8; i++)
1521             {
1522                 m_vdEncMvCost[i] = Map44LutValue((uint32_t)(m_mvCostSkipBiasQPel[2][i]), 0x6f);
1523             }
1524         }
1525     }
1526 
1527     return MOS_STATUS_SUCCESS;
1528 }
1529 
LoadHmeMvCost(uint8_t qp)1530 MOS_STATUS CodechalVdencAvcStateG11::LoadHmeMvCost(uint8_t qp)
1531 {
1532     CODECHAL_ENCODE_FUNCTION_ENTER;
1533 
1534     PCODEC_AVC_ENCODE_SEQUENCE_PARAMS avcSeqParams = m_avcSeqParam;
1535     const uint32_t(*vdencHmeCostTable)[CODEC_AVC_NUM_QP];
1536     if (avcSeqParams->ScenarioInfo == ESCENARIO_DISPLAYREMOTING)
1537     {
1538         vdencHmeCostTable = m_hmeCostDisplayRemote;
1539     }
1540     else
1541     {
1542         vdencHmeCostTable = m_hmeCost;
1543     }
1544 
1545     for (uint8_t i = 0; i < 8; i++)
1546     {
1547         m_vdEncHmeMvCost[i] = Map44LutValue(*(vdencHmeCostTable[i] + qp), 0x6f);
1548     }
1549 
1550     return MOS_STATUS_SUCCESS;
1551 }
1552 
LoadHmeMvCostTable(PCODEC_AVC_ENCODE_SEQUENCE_PARAMS seqParams,uint8_t hmeMVCostTable[8][42])1553 MOS_STATUS CodechalVdencAvcStateG11::LoadHmeMvCostTable(PCODEC_AVC_ENCODE_SEQUENCE_PARAMS seqParams, uint8_t hmeMVCostTable[8][42])
1554 {
1555     CODECHAL_ENCODE_FUNCTION_ENTER;
1556 
1557     const uint32_t(*vdencHmeCostTable)[CODEC_AVC_NUM_QP];
1558     if ((m_avcSeqParam->ScenarioInfo == ESCENARIO_DISPLAYREMOTING) || (m_avcSeqParam->RateControlMethod == RATECONTROL_QVBR))
1559     {
1560         vdencHmeCostTable = m_hmeCostDisplayRemote;
1561     }
1562     else
1563     {
1564         vdencHmeCostTable = m_hmeCost;
1565     }
1566 
1567     for (int i = 0; i < 8; i++)
1568     {
1569         for (int j = 0; j < 42; j++)
1570         {
1571             hmeMVCostTable[i][j] = Map44LutValue(*(vdencHmeCostTable[i] + j + 10), 0x6f);
1572         }
1573     }
1574 
1575     return MOS_STATUS_SUCCESS;
1576 }
1577 
AddVdencWalkerStateCmd(PMOS_COMMAND_BUFFER cmdBuffer)1578 MOS_STATUS CodechalVdencAvcStateG11::AddVdencWalkerStateCmd(
1579     PMOS_COMMAND_BUFFER cmdBuffer)
1580 {
1581     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1582 
1583     CODECHAL_ENCODE_FUNCTION_ENTER;
1584 
1585     MHW_VDBOX_VDENC_WALKER_STATE_PARAMS_G11 vdencWalkerStateParams;
1586     auto avcSlcParams = m_avcSliceParams;
1587     auto avcPicParams = m_avcPicParams[avcSlcParams->pic_parameter_set_id];
1588     auto avcSeqParams = m_avcSeqParams[avcPicParams->seq_parameter_set_id];
1589 
1590     vdencWalkerStateParams.Mode = CODECHAL_ENCODE_MODE_AVC;
1591     vdencWalkerStateParams.pAvcSeqParams = avcSeqParams;
1592     vdencWalkerStateParams.pAvcSlcParams = m_avcSliceParams;
1593     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWalkerStateCmd(cmdBuffer, &vdencWalkerStateParams));
1594 
1595     return eStatus;
1596 }
1597 
CalculateVdencCommandsSize()1598 MOS_STATUS CodechalVdencAvcStateG11::CalculateVdencCommandsSize()
1599 {
1600     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1601 
1602     CODECHAL_ENCODE_FUNCTION_ENTER;
1603 
1604     MHW_VDBOX_STATE_CMDSIZE_PARAMS_G11 stateCmdSizeParams;
1605     uint32_t vdencPictureStatesSize, vdencPicturePatchListSize;
1606     uint32_t vdencSliceStatesSize, vdencSlicePatchListSize;
1607     m_hwInterface->GetHxxStateCommandSize(
1608         CODECHAL_ENCODE_MODE_AVC,
1609         (uint32_t*)&vdencPictureStatesSize,
1610         (uint32_t*)&vdencPicturePatchListSize,
1611         &stateCmdSizeParams);
1612 
1613     m_pictureStatesSize += vdencPictureStatesSize;
1614     m_picturePatchListSize += vdencPicturePatchListSize;
1615 
1616     // Picture Level Commands
1617     m_hwInterface->GetVdencStateCommandsDataSize(
1618         CODECHAL_ENCODE_MODE_AVC,
1619         (uint32_t*)&vdencPictureStatesSize,
1620         (uint32_t*)&vdencPicturePatchListSize);
1621 
1622     m_pictureStatesSize += vdencPictureStatesSize;
1623     m_picturePatchListSize += vdencPicturePatchListSize;
1624 
1625     // Slice Level Commands
1626     m_hwInterface->GetVdencPrimitiveCommandsDataSize(
1627         CODECHAL_ENCODE_MODE_AVC,
1628         (uint32_t*)&vdencSliceStatesSize,
1629         (uint32_t*)&vdencSlicePatchListSize
1630     );
1631 
1632     m_sliceStatesSize += vdencSliceStatesSize;
1633     m_slicePatchListSize += vdencSlicePatchListSize;
1634 
1635     return eStatus;
1636 }
1637 
SendPrologWithFrameTracking(PMOS_COMMAND_BUFFER cmdBuffer,bool frameTracking,MHW_MI_MMIOREGISTERS * mmioRegister)1638 MOS_STATUS CodechalVdencAvcStateG11::SendPrologWithFrameTracking(
1639     PMOS_COMMAND_BUFFER         cmdBuffer,
1640     bool                        frameTracking,
1641     MHW_MI_MMIOREGISTERS       *mmioRegister)
1642 {
1643     if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe)
1644     {
1645         PMOS_CMD_BUF_ATTRI_VE attriExt =
1646                 (PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe);
1647         attriExt->bUseVirtualEngineHint = true;
1648         attriExt->VEngineHintParams.NeedSyncWithPrevious = 1;
1649     }
1650 
1651     return CodechalVdencAvcState::SendPrologWithFrameTracking(cmdBuffer, frameTracking, mmioRegister);
1652 }
1653 
CreateMhwVdboxStateCmdsizeParams()1654 PMHW_VDBOX_STATE_CMDSIZE_PARAMS CodechalVdencAvcStateG11::CreateMhwVdboxStateCmdsizeParams()
1655 {
1656     PMHW_VDBOX_STATE_CMDSIZE_PARAMS cmdSizeParams = MOS_New(MHW_VDBOX_STATE_CMDSIZE_PARAMS_G11);
1657 
1658     return cmdSizeParams;
1659 }
1660 
CreateMhwVdboxVdencWalkerStateParams()1661 PMHW_VDBOX_VDENC_WALKER_STATE_PARAMS CodechalVdencAvcStateG11::CreateMhwVdboxVdencWalkerStateParams()
1662 {
1663     PMHW_VDBOX_VDENC_WALKER_STATE_PARAMS vdencWalkerStateParams = MOS_New(MHW_VDBOX_VDENC_WALKER_STATE_PARAMS_G11);
1664 
1665     return vdencWalkerStateParams;
1666 }
1667 
InitKernelStateMe()1668 MOS_STATUS CodechalVdencAvcStateG11::InitKernelStateMe()
1669 {
1670     m_hmeKernel = MOS_New(CodechalKernelHmeG11, this);
1671     CODECHAL_ENCODE_CHK_NULL_RETURN(m_hmeKernel);
1672     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Initialize(
1673         GetCommonKernelHeaderAndSizeG11,
1674         m_kernelBase,
1675         m_kuidCommon));
1676     return MOS_STATUS_SUCCESS;
1677 }
1678 
ExecuteMeKernel()1679 MOS_STATUS CodechalVdencAvcStateG11::ExecuteMeKernel()
1680 {
1681     if (m_hmeKernel && m_hmeKernel->Is4xMeEnabled())
1682     {
1683         CodechalKernelHme::CurbeParam curbeParam = {};
1684         curbeParam.subPelMode = 3;
1685         curbeParam.currOriginalPic = m_avcPicParam->CurrOriginalPic;
1686         curbeParam.qpPrimeY = m_avcPicParam->pic_init_qp_minus26 + 26 + m_avcSliceParams->slice_qp_delta;
1687         curbeParam.targetUsage = m_avcSeqParam->TargetUsage;
1688         curbeParam.maxMvLen = CodecHalAvcEncode_GetMaxMvLen(m_avcSeqParam->Level);
1689         curbeParam.numRefIdxL0Minus1 = m_avcSliceParams->num_ref_idx_l0_active_minus1;
1690         curbeParam.numRefIdxL1Minus1 = m_avcSliceParams->num_ref_idx_l1_active_minus1;
1691 
1692         auto slcParams = m_avcSliceParams;
1693         curbeParam.list0RefID0FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_0, CODECHAL_ENCODE_REF_ID_0);
1694         curbeParam.list0RefID1FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_0, CODECHAL_ENCODE_REF_ID_1);
1695         curbeParam.list0RefID2FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_0, CODECHAL_ENCODE_REF_ID_2);
1696         curbeParam.list0RefID3FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_0, CODECHAL_ENCODE_REF_ID_3);
1697         curbeParam.list0RefID4FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_0, CODECHAL_ENCODE_REF_ID_4);
1698         curbeParam.list0RefID5FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_0, CODECHAL_ENCODE_REF_ID_5);
1699         curbeParam.list0RefID6FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_0, CODECHAL_ENCODE_REF_ID_6);
1700         curbeParam.list0RefID7FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_0, CODECHAL_ENCODE_REF_ID_7);
1701         curbeParam.list1RefID0FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_1, CODECHAL_ENCODE_REF_ID_0);
1702         curbeParam.list1RefID1FieldParity = CodecHalAvcEncode_GetFieldParity(slcParams, LIST_1, CODECHAL_ENCODE_REF_ID_1);
1703 
1704         CodechalKernelHme::SurfaceParams surfaceParam = {};
1705         surfaceParam.mbaffEnabled = m_mbaffEnabled;
1706         surfaceParam.numRefIdxL0ActiveMinus1 = m_avcSliceParams->num_ref_idx_l0_active_minus1;
1707         surfaceParam.numRefIdxL1ActiveMinus1 = m_avcSliceParams->num_ref_idx_l1_active_minus1;
1708         surfaceParam.verticalLineStride = m_verticalLineStride;
1709         surfaceParam.verticalLineStrideOffset = m_verticalLineStrideOffset;
1710         surfaceParam.refList = &m_refList[0];
1711         surfaceParam.picIdx = &m_picIdx[0];
1712         surfaceParam.currOriginalPic = &m_currOriginalPic;
1713         surfaceParam.refL0List = &(m_avcSliceParams->RefPicList[LIST_0][0]);
1714         surfaceParam.refL1List = &(m_avcSliceParams->RefPicList[LIST_1][0]);
1715         surfaceParam.vdencStreamInEnabled = m_vdencEnabled && (m_16xMeSupported || m_staticFrameDetectionInUse);
1716         surfaceParam.meVdencStreamInBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
1717         surfaceParam.vdencStreamInSurfaceSize = MOS_BYTES_TO_DWORDS(m_picHeightInMb * m_picWidthInMb * 64);
1718 
1719         if (m_hmeKernel->Is16xMeEnabled())
1720         {
1721             m_lastTaskInPhase = false;
1722             if (m_hmeKernel->Is32xMeEnabled())
1723             {
1724                 surfaceParam.downScaledWidthInMb = m_downscaledWidthInMb32x;
1725                 surfaceParam.downScaledHeightInMb = m_downscaledFrameFieldHeightInMb16x;
1726                 surfaceParam.downScaledBottomFieldOffset = m_scaled32xBottomFieldOffset;
1727                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel32x));
1728             }
1729             surfaceParam.downScaledWidthInMb = m_downscaledWidthInMb16x;
1730             surfaceParam.downScaledHeightInMb = m_downscaledFrameFieldHeightInMb16x;
1731             surfaceParam.downScaledBottomFieldOffset = m_scaled16xBottomFieldOffset;
1732             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel16x));
1733         }
1734 
1735         // On-demand sync for VDEnc SHME StreamIn surface
1736         auto syncParams = g_cInitSyncParams;
1737         syncParams.GpuContext = m_renderContext;
1738         syncParams.presSyncResource = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
1739 
1740         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
1741         m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams);
1742 
1743         // HME StreamIn
1744         m_lastTaskInPhase = !m_staticFrameDetectionInUse;
1745 
1746         surfaceParam.downScaledWidthInMb = m_downscaledWidthInMb4x;
1747         surfaceParam.downScaledHeightInMb = m_downscaledFrameFieldHeightInMb4x;
1748         surfaceParam.downScaledBottomFieldOffset = m_scaledBottomFieldOffset;
1749         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel4x));
1750         m_vdencStreamInEnabled = true;
1751     }
1752     return MOS_STATUS_SUCCESS;
1753 }
1754 
UpdateCmdBufAttribute(PMOS_COMMAND_BUFFER cmdBuffer,bool renderEngineInUse)1755 MOS_STATUS CodechalVdencAvcStateG11::UpdateCmdBufAttribute(
1756     PMOS_COMMAND_BUFFER cmdBuffer,
1757     bool                renderEngineInUse)
1758 {
1759     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1760 
1761     // should not be there. Will remove it in the next change
1762     CODECHAL_ENCODE_FUNCTION_ENTER;
1763     if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe)
1764     {
1765         PMOS_CMD_BUF_ATTRI_VE attriExt =
1766             (PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe);
1767 
1768         memset(attriExt, 0, sizeof(MOS_CMD_BUF_ATTRI_VE));
1769         attriExt->bUseVirtualEngineHint =
1770             attriExt->VEngineHintParams.NeedSyncWithPrevious = !renderEngineInUse;
1771     }
1772 
1773     return eStatus;
1774 }
1775 
1776 #if USE_CODECHAL_DEBUG_TOOL
PopulateBrcInitParam(void * cmd)1777 MOS_STATUS CodechalVdencAvcStateG11::PopulateBrcInitParam(
1778     void *cmd)
1779 {
1780     CODECHAL_DEBUG_FUNCTION_ENTER;
1781 
1782     CODECHAL_DEBUG_CHK_NULL(m_debugInterface);
1783 
1784     if (!m_debugInterface->DumpIsEnabled(CodechalDbgAttr::attrDumpEncodePar))
1785     {
1786         return MOS_STATUS_SUCCESS;
1787     }
1788 
1789     BrcInitDmem * dmem = (BrcInitDmem *)cmd;
1790 
1791     if (m_pictureCodingType == I_TYPE)
1792     {
1793         m_avcPar->MBBRCEnable                    = m_mbBrcEnabled;
1794         m_avcPar->MBRC                           = m_mbBrcEnabled;
1795         m_avcPar->BitRate                        = dmem->INIT_TargetBitrate_U32;
1796         m_avcPar->InitVbvFullnessInBit           = dmem->INIT_InitBufFull_U32;
1797         m_avcPar->MaxBitRate                     = dmem->INIT_MaxRate_U32;
1798         m_avcPar->VbvSzInBit                     = dmem->INIT_BufSize_U32;
1799         m_avcPar->UserMaxFrame                   = dmem->INIT_ProfileLevelMaxFrame_U32;
1800         m_avcPar->SlidingWindowEnable            = dmem->INIT_SlidingWidowRCEnable_U8;
1801         m_avcPar->SlidingWindowSize              = dmem->INIT_SlidingWindowSize_U8;
1802         m_avcPar->SlidingWindowMaxRateRatio      = dmem->INIT_SlidingWindowMaxRateRatio_U8;
1803         m_avcPar->LowDelayGoldenFrameBoost       = dmem->INIT_LowDelayGoldenFrameBoost_U8;
1804         m_avcPar->TopQPDeltaThrforAdaptive2Pass  = dmem->INIT_TopQPDeltaThrForAdapt2Pass_U8;
1805         m_avcPar->BotQPDeltaThrforAdaptive2Pass  = dmem->INIT_BotQPDeltaThrForAdapt2Pass_U8;
1806         m_avcPar->TopFrmSzPctThrforAdaptive2Pass = dmem->INIT_TopFrmSzThrForAdapt2Pass_U8;
1807         m_avcPar->BotFrmSzPctThrforAdaptive2Pass = dmem->INIT_BotFrmSzThrForAdapt2Pass_U8;
1808         m_avcPar->MBHeaderCompensation           = dmem->INIT_MBHeaderCompensation_U8;
1809         m_avcPar->QPSelectMethodforFirstPass     = dmem->INIT_QPSelectForFirstPass_U8;
1810         m_avcPar->MBQpCtrl                       = (dmem->INIT_MbQpCtrl_U8 > 0) ? true : false;
1811         m_avcPar->QPMax                          = dmem->INIT_MaxQP_U16;
1812         m_avcPar->QPMin                          = dmem->INIT_MinQP_U16;
1813         m_avcPar->HrdConformanceCheckDisable     = (dmem->INIT_HRDConformanceCheckDisable_U8 > 0) ? true : false;
1814         m_avcPar->ICQReEncode                    = (dmem->INIT_ICQReEncode_U8 > 0) ? true : false;
1815         m_avcPar->AdaptiveCostAdjustEnable       = (dmem->INIT_AdaptiveCostEnable_U8 > 0) ? true : false;
1816         m_avcPar->AdaptiveHMEExtension           = (dmem->INIT_AdaptiveHMEExtensionEnable_U8 > 0) ? true : false;
1817         m_avcPar->StreamInStaticRegion           = dmem->INIT_StaticRegionStreamIn_U8;
1818         ;
1819         m_avcPar->ScenarioInfo = dmem->INIT_ScenarioInfo_U8;
1820         ;
1821     }
1822 
1823     return MOS_STATUS_SUCCESS;
1824 }
1825 
PopulateBrcUpdateParam(void * cmd)1826 MOS_STATUS CodechalVdencAvcStateG11::PopulateBrcUpdateParam(
1827     void *cmd)
1828 {
1829     CODECHAL_DEBUG_FUNCTION_ENTER;
1830 
1831     CODECHAL_DEBUG_CHK_NULL(m_debugInterface);
1832 
1833     if (!m_debugInterface->DumpIsEnabled(CodechalDbgAttr::attrDumpEncodePar))
1834     {
1835         return MOS_STATUS_SUCCESS;
1836     }
1837 
1838     BrcUpdateDmem * dmem = (BrcUpdateDmem *)cmd;
1839 
1840     if (m_pictureCodingType == I_TYPE)
1841     {
1842         m_avcPar->EnableMultipass            = (dmem->UPD_MaxNumPass_U8 > 0) ? true : false;
1843         m_avcPar->MaxNumPakPasses            = dmem->UPD_MaxNumPass_U8;
1844         m_avcPar->SceneChgDetectEn           = (dmem->UPD_SceneChgDetectEn_U8 > 0) ? true : false;
1845         m_avcPar->SceneChgPrevIntraPctThresh = dmem->UPD_SceneChgPrevIntraPctThreshold_U8;
1846         m_avcPar->SceneChgCurIntraPctThresh  = dmem->UPD_SceneChgCurIntraPctThreshold_U8;
1847         m_avcPar->SceneChgWidth0             = dmem->UPD_SceneChgWidth_U8[0];
1848         m_avcPar->SceneChgWidth1             = dmem->UPD_SceneChgWidth_U8[1];
1849         m_avcPar->SliceSizeThr               = dmem->UPD_SLCSZ_TARGETSLCSZ_U16;
1850         m_avcPar->SliceMaxSize               = dmem->UPD_TargetSliceSize_U16;
1851     }
1852     else if (m_pictureCodingType == P_TYPE)
1853     {
1854         m_avcPar->Transform8x8PDisable = (dmem->UPD_DisablePFrame8x8Transform_U8 > 0) ? true : false;
1855     }
1856 
1857     return MOS_STATUS_SUCCESS;
1858 }
1859 
PopulateEncParam(uint8_t meMethod,void * cmd)1860 MOS_STATUS CodechalVdencAvcStateG11::PopulateEncParam(
1861     uint8_t meMethod,
1862     void    *cmd)
1863 {
1864     CODECHAL_DEBUG_FUNCTION_ENTER;
1865 
1866     CODECHAL_DEBUG_CHK_NULL(m_debugInterface);
1867 
1868     if (!m_debugInterface->DumpIsEnabled(CodechalDbgAttr::attrDumpEncodePar))
1869     {
1870         return MOS_STATUS_SUCCESS;
1871     }
1872 
1873     uint8_t         *data = nullptr;
1874     MOS_LOCK_PARAMS lockFlags;
1875     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1876     lockFlags.ReadOnly = 1;
1877 
1878     if (m_vdencBrcEnabled)
1879     {
1880         // BRC case: VDENC IMG STATE is updated by HuC FW
1881         data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &m_resVdencBrcImageStatesReadBuffer[m_currRecycledBufIdx], &lockFlags);
1882         data = data + mhw_vdbox_mfx_g11_X::MFX_AVC_IMG_STATE_CMD::byteSize;
1883     }
1884     else
1885     {
1886         // CQP case: VDENC IMG STATE is updated by driver or SFD kernel
1887         if (!m_staticFrameDetectionInUse)
1888         {
1889             data = m_batchBufferForVdencImgStat[m_currRecycledBufIdx].pData;
1890             data = data + mhw_vdbox_mfx_g11_X::MFX_AVC_IMG_STATE_CMD::byteSize;
1891         }
1892         else
1893         {
1894             data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &m_resVdencSfdImageStateReadBuffer, &lockFlags);
1895         }
1896     }
1897 
1898     CODECHAL_DEBUG_CHK_NULL(data);
1899 
1900     mhw_vdbox_vdenc_g11_X::VDENC_IMG_STATE_CMD vdencCmd;
1901     vdencCmd = *(mhw_vdbox_vdenc_g11_X::VDENC_IMG_STATE_CMD *)(data);
1902 
1903     if (m_pictureCodingType == I_TYPE)
1904     {
1905         m_avcPar->BlockBasedSkip = vdencCmd.DW4.BlockBasedSkipEnabled;
1906         m_avcPar->VDEncPerfMode  = vdencCmd.DW1.VdencPerfmode;
1907     }
1908     else if (m_pictureCodingType == P_TYPE)
1909     {
1910         m_avcPar->SubPelMode            = vdencCmd.DW4.SubPelMode;
1911         m_avcPar->FTQBasedSkip          = vdencCmd.DW4.ForwardTransformSkipCheckEnable;
1912         m_avcPar->BiMixDisable          = vdencCmd.DW1.BidirectionalMixDisable;
1913         m_avcPar->SurvivedSkipCost      = (vdencCmd.DW8.NonSkipZeroMvCostAdded << 1) + vdencCmd.DW8.NonSkipMbModeCostAdded;
1914         m_avcPar->UniMixDisable         = vdencCmd.DW2.UnidirectionalMixDisable;
1915         m_avcPar->VdencExtPakObjDisable = !vdencCmd.DW1.VdencExtendedPakObjCmdEnable;
1916         m_avcPar->PPMVDisable           = vdencCmd.DW34.PpmvDisable;
1917     }
1918 
1919     if (data)
1920     {
1921         if (m_vdencBrcEnabled)
1922         {
1923             m_osInterface->pfnUnlockResource(
1924                 m_osInterface,
1925                 &m_resVdencBrcImageStatesReadBuffer[m_currRecycledBufIdx]);
1926         }
1927         else
1928         {
1929             if (m_staticFrameDetectionInUse)
1930             {
1931                 m_osInterface->pfnUnlockResource(
1932                     m_osInterface,
1933                     &m_resVdencSfdImageStateReadBuffer);
1934             }
1935         }
1936     }
1937 
1938     return MOS_STATUS_SUCCESS;
1939 }
1940 
PopulatePakParam(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_BATCH_BUFFER secondLevelBatchBuffer)1941 MOS_STATUS CodechalVdencAvcStateG11::PopulatePakParam(
1942     PMOS_COMMAND_BUFFER cmdBuffer,
1943     PMHW_BATCH_BUFFER   secondLevelBatchBuffer)
1944 {
1945     CODECHAL_DEBUG_FUNCTION_ENTER;
1946 
1947     CODECHAL_DEBUG_CHK_NULL(m_debugInterface);
1948 
1949     if (!m_debugInterface->DumpIsEnabled(CodechalDbgAttr::attrDumpEncodePar))
1950     {
1951         return MOS_STATUS_SUCCESS;
1952     }
1953 
1954     uint8_t         *data = nullptr;
1955     MOS_LOCK_PARAMS lockFlags;
1956     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1957     lockFlags.ReadOnly = 1;
1958 
1959     if (cmdBuffer != nullptr)
1960     {
1961         data = (uint8_t*)(cmdBuffer->pCmdPtr - (mhw_vdbox_mfx_g11_X::MFX_AVC_IMG_STATE_CMD::byteSize / sizeof(uint32_t)));
1962     }
1963     else if (secondLevelBatchBuffer != nullptr)
1964     {
1965         data = secondLevelBatchBuffer->pData;
1966     }
1967     else
1968     {
1969         data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &m_resVdencBrcImageStatesReadBuffer[m_currRecycledBufIdx], &lockFlags);
1970     }
1971 
1972     CODECHAL_DEBUG_CHK_NULL(data);
1973 
1974     mhw_vdbox_mfx_g11_X::MFX_AVC_IMG_STATE_CMD mfxCmd;
1975     mfxCmd = *(mhw_vdbox_mfx_g11_X::MFX_AVC_IMG_STATE_CMD *)(data);
1976 
1977     if (m_pictureCodingType == I_TYPE)
1978     {
1979         m_avcPar->TrellisQuantizationEnable         = mfxCmd.DW5.TrellisQuantizationEnabledTqenb;
1980         m_avcPar->EnableAdaptiveTrellisQuantization = mfxCmd.DW5.TrellisQuantizationEnabledTqenb;
1981         m_avcPar->TrellisQuantizationRounding       = mfxCmd.DW5.TrellisQuantizationRoundingTqr;
1982         m_avcPar->TrellisQuantizationChromaDisable  = mfxCmd.DW5.TrellisQuantizationChromaDisableTqchromadisable;
1983         m_avcPar->ExtendedRhoDomainEn               = mfxCmd.DW17.ExtendedRhodomainStatisticsEnable;
1984     }
1985 
1986     if (data && (cmdBuffer == nullptr) && (secondLevelBatchBuffer == nullptr))
1987     {
1988         m_osInterface->pfnUnlockResource(
1989             m_osInterface,
1990             &m_resVdencBrcImageStatesReadBuffer[m_currRecycledBufIdx]);
1991     }
1992 
1993     return MOS_STATUS_SUCCESS;
1994 }
1995 #endif
1996