1 /*
2 * Copyright (c) 2017-2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     codechal_vdenc_hevc_g12.cpp
24 //! \brief    HEVC VDEnc encoder for GEN12.
25 //!
26 
27 #include "codechal_vdenc_hevc_g12.h"
28 #include "codechal_kernel_header_g12.h"
29 #include "codeckrnheader.h"
30 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
31 #include "igcodeckrn_g12.h"
32 #endif
33 #include "mhw_vdbox_g12_X.h"
34 #include "mhw_vdbox_hcp_g12_X.h"
35 #include "mhw_vdbox_vdenc_g12_X.h"
36 #include "mhw_mi_g12_X.h"
37 #include "mhw_render_g12_X.h"
38 #include "codechal_mmc_encode_hevc_g12.h"
39 #include "mhw_mmio_g12.h"
40 #include "hal_oca_interface.h"
41 #ifdef _ENCODE_VDENC_RESERVED
42 #include "codechal_debug_encode_brc.h"
43 #endif
44 const uint32_t CodechalVdencHevcStateG12::m_VdboxVDENCRegBase[4] = M_VDBOX_VDENC_REG_BASE;
45 
46 const double CodechalVdencHevcStateG12::m_devThreshIFPNEG[] = {
47     0.80, 0.60, 0.34, 0.2,
48 };
49 
50 const double CodechalVdencHevcStateG12::m_devThreshIFPPOS[] = {
51     0.2, 0.4 , 0.66, 0.9,
52 };
53 
54 const double CodechalVdencHevcStateG12::m_devThreshPBFPNEG[] = {
55     0.90, 0.66, 0.46, 0.3,
56 };
57 
58 const double CodechalVdencHevcStateG12::m_devThreshPBFPPOS[] = {
59     0.3, 0.46, 0.70, 0.90,
60 };
61 
62 const double CodechalVdencHevcStateG12::m_devThreshVBRNEG[] = {
63     0.90, 0.70, 0.50, 0.3,
64 };
65 
66 const double CodechalVdencHevcStateG12::m_devThreshVBRPOS[] = {
67     0.4, 0.5, 0.75, 0.90,
68 };
69 
70 const int8_t CodechalVdencHevcStateG12::m_lowdelayDevThreshPB[] = {
71     -45, -33, -23, -15, -8, 0, 15, 25,
72 };
73 const int8_t CodechalVdencHevcStateG12::m_lowdelayDevThreshVBR[] = {
74     -45, -35, -25, -15, -8, 0, 20, 40,
75 };
76 const int8_t CodechalVdencHevcStateG12::m_lowdelayDevThreshI[] = {
77     -40, -30, -17, -10, -5, 0, 10, 20,
78 };
79 
80 const int8_t CodechalVdencHevcStateG12::m_lowdelayDeltaFrmszI[][8] = {
81     { 0,  0, -8, -12, -16, -20, -28, -36 },
82     { 0,  0, -4, -8, -12,  -16, -24, -32 },
83     { 4,  2,  0, -1, -3,  -8, -16, -24 },
84     { 8,  4,  2,  0, -1,  -4,  -8, -16 },
85     { 20, 16,  4,  0, -1,  -4,  -8, -16 },
86     { 24, 20, 16,  8,  4,   0,  -4, -8 },
87     { 28, 24, 20, 16,  8,   4,  0, -8 },
88     { 32, 24, 20, 16, 8,   4,   0, -4 },
89     { 64, 48, 28, 20, 16,  12,  8,  4 },
90 };
91 
92 const int8_t CodechalVdencHevcStateG12::m_lowdelayDeltaFrmszP[][8] = {
93     { -8,  -24, -32, -40, -44, -48, -52, -80 },
94     { -8,  -16, -32, -40, -40,  -44, -44, -56 },
95     { 0,    0,  -12, -20, -24,  -28, -32, -36 },
96     { 8,   4,  0,   0,    -8,   -16,  -24, -32 },
97     { 32,  16,  8, 4,    -4,   -8,  -16,  -20 },
98     { 36,  24,  16, 8,    4,    -2,  -4, -8 },
99     { 40, 36, 24,   20, 16,  8,  0, -8 },
100     { 48, 40, 28,  24, 20,  12,  0, -4 },
101     { 64, 48, 28, 20, 16,  12,  8,  4 },
102 };
103 
104 const int8_t CodechalVdencHevcStateG12::m_lowdelayDeltaFrmszB[][8] = {
105     { 0, -4, -8, -16, -24, -32, -40, -48 },
106     { 1,  0, -4, -8, -16,  -24, -32, -40 },
107     { 4,  2,  0, -1, -3,  -8, -16, -24 },
108     { 8,  4,  2,  0, -1,  -4,  -8, -16 },
109     { 20, 16,  4,  0, -1,  -4,  -8, -16 },
110     { 24, 20, 16,  8,  4,   0,  -4, -8 },
111     { 28, 24, 20, 16,  8,   4,  0, -8 },
112     { 32, 24, 20, 16, 8,   4,   0, -4 },
113     { 64, 48, 28, 20, 16,  12,  8,  4 },
114 };
115 
116 const uint32_t CodechalVdencHevcStateG12::m_hucConstantData[]  = {
117     0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x012c012c, 0x012c012c, 0x012c012c,
118     0x012c012c, 0x012c012c, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00640064,
119     0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064,
120     0x00640064, 0x00640064, 0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x012c012c,
121     0x012c012c, 0x012c012c, 0x012c012c, 0x012c012c, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00c800c8,
122     0x00c800c8, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064,
123     0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x503c1e04, 0xffc88c78, 0x3c1e0400, 0xc88c7850,
124     0x140200ff, 0xa0824628, 0x0000ffc8, 0x00000000, 0x04030302, 0x00000000, 0x03030200, 0x0000ff04,
125     0x02020000, 0xffff0303, 0x01000000, 0xff020202, 0x0000ffff, 0x02020100, 0x00fffffe, 0x01010000,
126     0xfffffe02, 0x010000ff, 0xfefe0201, 0x0000ffff, 0xfe010100, 0x00fffffe, 0x01010000, 0x00000000,
127     0x03030200, 0x00000004, 0x03020000, 0x00ff0403, 0x02000000, 0xff030302, 0x000000ff, 0x02020201,
128     0x00ffffff, 0x02010000, 0xfffffe02, 0x01000000, 0xfffe0201, 0x0000ffff, 0xfe020101, 0x00fffffe,
129     0x01010000, 0xfffffefe, 0x01000000, 0x00000001, 0x03020000, 0x00000403, 0x02000000, 0xff040303,
130     0x00000000, 0x03030202, 0x0000ffff, 0x02020100, 0xffffff02, 0x01000000, 0xfffe0202, 0x000000ff,
131     0xfe020101, 0x00ffffff, 0x02010100, 0xfffffefe, 0x01000000, 0xfffefe01, 0x000000ff, 0xe0e00101,
132     0xc0d0d0d0, 0xe0e0b0c0, 0xd0d0d0e0, 0xf0f0c0d0, 0xd0e0e0e0, 0x0408d0d0, 0xe8f0f800, 0x1820dce0,
133     0xf8fc0210, 0x2024ecf0, 0x0008101c, 0x2428f8fc, 0x08101418, 0x2830f800, 0x0c14181c, 0x3040fc00,
134     0x0c10141c, 0xe8f80408, 0xc8d0d4e0, 0xf0f8b0c0, 0xccd4d8e0, 0x0000c0c8, 0xd8dce4f0, 0x0408d0d4,
135     0xf0f80000, 0x0808dce8, 0xf0f80004, 0x0810dce8, 0x00080808, 0x0810f8fc, 0x08080808, 0x1010f800,
136     0x08080808, 0x1020fc00, 0x08080810, 0xfc000408, 0xe0e8f0f8, 0x0001d0d8, 0xe8f0f8fc, 0x0204d8e0,
137     0xf8fdff00, 0x0408e8f0, 0xfcff0002, 0x1014f0f8, 0xfcff0004, 0x1418f0f8, 0x00040810, 0x181cf8fc,
138     0x04081014, 0x1820f800, 0x04081014, 0x3040fc00, 0x0c10141c, 0x40300408, 0x80706050, 0x30a0a090,
139     0x70605040, 0xa0a09080, 0x60504030, 0xa0908070, 0x040201a0, 0x18141008, 0x02012420, 0x0a080604,
140     0x01101010, 0x0c080402, 0x10101010, 0x05030201, 0x02010106, 0x00000503, 0xff030201, 0x02010000,
141     0x000000ff, 0xfffefe01, 0xfdfd0100, 0xfb00ffff, 0xfffffefd, 0xfefdfbfa, 0x030201ff, 0x01010605,
142     0x00050302, 0x03020101, 0x010000ff, 0x0000ff02, 0xffff0100, 0xfe0100ff, 0x00ffffff, 0xfffffefc,
143     0xfefcfb00, 0x0101ffff, 0x01050402, 0x04020101, 0x01010000, 0x0000ff02, 0x00ff0101, 0xff000000,
144     0x0100ffff, 0xfffffffe, 0xfffefd00, 0xfcfb00ff, 0x1efffffe, 0x070d0e10, 0x00003207, 0x00000000,
145     0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
146     0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
147     0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
148     0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
149     0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
150     0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
151     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
152     0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
153     0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
154     0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
155     0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
156     0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
157     0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
158     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
159     0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
160     0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
161     0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
162     0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
163     0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
164     0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
165     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
166     0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
167     0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
168     0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
169     0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
170     0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
171     0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
172     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
173     0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
174     0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
175     0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
176     0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
177     0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
178     0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
179     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
180     0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
181     0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
182     0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
183     0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
184     0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
185     0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
186     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
187     0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
188     0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
189     0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
190     0x00000000, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
191     0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
192     0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
193     0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000,
194     0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff,
195     0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff,
196     0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000,
197     0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff,
198     0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff,
199     0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
200     0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
201     0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
202     0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000,
203     0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff,
204     0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff,
205     0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000,
206     0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff,
207     0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff,
208     0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
209     0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
210     0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
211     0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000,
212     0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff,
213     0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff,
214     0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000,
215     0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff,
216     0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff,
217     0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
218     0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
219     0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
220     0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000,
221     0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff,
222     0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff,
223     0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000,
224     0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff,
225     0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff,
226     0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
227     0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
228     0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
229     0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff
230 };
231 
GetMaxBtCount()232 uint32_t CodechalVdencHevcStateG12::GetMaxBtCount()
233 {
234     CODECHAL_ENCODE_FUNCTION_ENTER;
235 
236     uint32_t maxBtCount = 0;
237 
238 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
239     auto btIdxAlignment = m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
240 
241     // DsConversion kernel
242     maxBtCount = (m_32xMeSupported ? 3 : 2) * (MOS_ALIGN_CEIL(m_cscDsState->GetBTCount(), btIdxAlignment));
243 
244     // add ME and stream-in kernel
245     if(m_b16XMeEnabled)
246     {
247         MHW_KERNEL_STATE kernelState = m_lowDelay ? m_vdencMeKernelState : m_vdencMeKernelStateRAB;
248         if(m_b32XMeEnabled)
249         {
250             maxBtCount += MOS_ALIGN_CEIL(kernelState.KernelParams.iBTCount, btIdxAlignment);
251         }
252         maxBtCount += MOS_ALIGN_CEIL(kernelState.KernelParams.iBTCount, btIdxAlignment);
253 
254         kernelState = m_lowDelay ? m_vdencStreaminKernelState : m_vdencStreaminKernelStateRAB;
255         maxBtCount += MOS_ALIGN_CEIL(kernelState.KernelParams.iBTCount, btIdxAlignment);
256     }
257 #endif
258 
259     return maxBtCount;
260 }
261 
InitKernelStateMe()262 MOS_STATUS CodechalVdencHevcStateG12::InitKernelStateMe()
263 {
264     CODECHAL_ENCODE_FUNCTION_ENTER;
265 
266     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
267 
268     CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface);
269     CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface->pStateHeapInterface);
270 
271     uint32_t kernelSize = m_combinedKernelSize;
272     CODECHAL_KERNEL_HEADER currKrnHeader;
273     CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
274         m_kernelBinary,
275         VDENC_ME_P,
276         0,
277         &currKrnHeader,
278         &kernelSize));
279 
280     auto kernelStatePtr = &m_vdencMeKernelState;
281     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
282         VDENC_ME_P,
283         &kernelStatePtr->KernelParams));
284 
285     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
286         VDENC_ME_P,
287         &m_vdencMeKernelBindingTable));
288 
289     kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
290     kernelStatePtr->KernelParams.pBinary =
291         m_kernelBinary +
292         (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
293     kernelStatePtr->KernelParams.iSize = kernelSize;
294 
295     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
296         m_stateHeapInterface,
297         kernelStatePtr->KernelParams.iBTCount,
298         &kernelStatePtr->dwSshSize,
299         &kernelStatePtr->dwBindingTableSize));
300 
301     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
302 
303     CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
304         m_kernelBinary,
305         VDENC_ME_B,
306         0,
307         &currKrnHeader,
308         &kernelSize));
309 
310     kernelStatePtr = &m_vdencMeKernelStateRAB;
311     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
312         VDENC_ME_B,
313         &kernelStatePtr->KernelParams));
314 
315     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
316         VDENC_ME_B,
317         &m_vdencStreaminKernelBindingTable));
318 
319     kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
320     kernelStatePtr->KernelParams.pBinary =
321         m_kernelBinary +
322         (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
323     kernelStatePtr->KernelParams.iSize = kernelSize;
324 
325     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
326         m_stateHeapInterface,
327         kernelStatePtr->KernelParams.iBTCount,
328         &kernelStatePtr->dwSshSize,
329         &kernelStatePtr->dwBindingTableSize));
330 
331     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
332 
333     return eStatus;
334 }
335 
InitKernelStateStreamIn()336 MOS_STATUS CodechalVdencHevcStateG12::InitKernelStateStreamIn()
337 {
338     CODECHAL_ENCODE_FUNCTION_ENTER;
339     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
340 
341     CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface);
342     CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface->pStateHeapInterface);
343 
344     uint32_t kernelSize = m_combinedKernelSize;
345     CODECHAL_KERNEL_HEADER currKrnHeader;
346     CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
347         m_kernelBinary,
348         VDENC_STREAMIN_HEVC,
349         0,
350         &currKrnHeader,
351         &kernelSize));
352 
353     auto kernelStatePtr = &m_vdencStreaminKernelState;
354     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
355         VDENC_STREAMIN_HEVC,
356         &kernelStatePtr->KernelParams));
357 
358     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
359         VDENC_STREAMIN_HEVC,
360         &m_vdencStreaminKernelBindingTable));
361 
362     kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
363     kernelStatePtr->KernelParams.pBinary =
364         m_kernelBinary +
365         (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
366     kernelStatePtr->KernelParams.iSize = kernelSize;
367 
368     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
369         m_stateHeapInterface,
370         kernelStatePtr->KernelParams.iBTCount,
371         &kernelStatePtr->dwSshSize,
372         &kernelStatePtr->dwBindingTableSize));
373 
374     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
375 
376     CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
377         m_kernelBinary,
378         VDENC_STREAMIN_HEVC_RAB,
379         0,
380         &currKrnHeader,
381         &kernelSize));
382 
383     kernelStatePtr = &m_vdencStreaminKernelStateRAB;
384     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
385         VDENC_STREAMIN_HEVC_RAB,
386         &kernelStatePtr->KernelParams));
387 
388     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
389         VDENC_STREAMIN_HEVC_RAB,
390         &m_vdencStreaminKernelBindingTable));
391 
392     kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
393     kernelStatePtr->KernelParams.pBinary =
394         m_kernelBinary +
395         (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
396     kernelStatePtr->KernelParams.iSize = kernelSize;
397 
398     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
399         m_stateHeapInterface,
400         kernelStatePtr->KernelParams.iBTCount,
401         &kernelStatePtr->dwSshSize,
402         &kernelStatePtr->dwBindingTableSize));
403 
404     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
405 
406     return eStatus;
407 }
408 
InitKernelState()409 MOS_STATUS CodechalVdencHevcStateG12::InitKernelState()
410 {
411     CODECHAL_ENCODE_FUNCTION_ENTER;
412 
413     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
414 
415 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
416     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMe());
417     CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateStreamIn());
418 #endif
419 
420     return eStatus;
421 }
422 
DecideEncodingPipeNumber()423 MOS_STATUS CodechalVdencHevcStateG12::DecideEncodingPipeNumber()
424 {
425     CODECHAL_ENCODE_FUNCTION_ENTER;
426 
427     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
428 
429     m_numPipePre = m_numPipe;
430     m_numPipe = m_numVdbox;
431 
432     uint8_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
433 
434     CODECHAL_ENCODE_VERBOSEMESSAGE("Tile Columns = %d.", numTileColumns);
435 
436     if (numTileColumns > m_numPipe)
437     {
438         // Streaming buffer does does work if numTileColumns > m_numPipe
439         if (m_hevcSeqParams->EnableStreamingBufferLLC || m_hevcSeqParams->EnableStreamingBufferDDR)
440         {
441             CODECHAL_ENCODE_ASSERTMESSAGE("Streaming buffer does does work if numTileColumns > m_numPipe!");
442             return MOS_STATUS_INVALID_PARAMETER;
443         }
444         m_numPipe = 1;
445     }
446 
447     if (numTileColumns < m_numPipe)
448     {
449         if (numTileColumns >= 1 && numTileColumns <= 4)
450         {
451             m_numPipe = numTileColumns;
452         }
453         else
454         {
455             m_numPipe = 1;  // invalid tile column test cases and switch back to the single VDBOX mode
456         }
457     }
458 
459     // Tile replay needs scalability enabled, Remove Resolution check for scalability
460 
461     m_useVirtualEngine = true;  // always use virtual engine interface for single pipe and scalability mode
462 
463     m_numUsedVdbox       = m_numPipe;
464     m_numberTilesInFrame = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1);
465 
466     if (m_scalabilityState)
467     {
468         // Create/ re-use a GPU context with 2 pipes
469         m_scalabilityState->ucScalablePipeNum = m_numPipe;
470     }
471 
472     CODECHAL_ENCODE_VERBOSEMESSAGE("System VDBOX number = %d, decided pipe num = %d.", m_numVdbox, m_numPipe);
473 
474     return eStatus;
475 }
476 
CheckSupportedFormat(PMOS_SURFACE surface)477 bool CodechalVdencHevcStateG12::CheckSupportedFormat(PMOS_SURFACE surface)
478 {
479     CODECHAL_ENCODE_FUNCTION_ENTER;
480 
481     bool isColorFormatSupported = false;
482 
483     if (nullptr == surface)
484     {
485         CODECHAL_ENCODE_ASSERTMESSAGE("Invalid (nullptr) Pointer.");
486         return isColorFormatSupported;
487     }
488 
489     switch (surface->Format)
490     {
491     case Format_NV12:
492     case Format_NV21:
493     case Format_P010:       // Planar 4:2:0
494     case Format_YUY2:
495     case Format_YUYV:
496     case Format_YVYU:
497     case Format_UYVY:
498     case Format_VYUY:
499     case Format_A8R8G8B8:
500     case Format_A8B8G8R8:
501     case Format_R10G10B10A2:// Packed RGB 4:4:4
502     case Format_B10G10R10A2:// Packed RGB 4:4:4
503     case Format_AYUV:
504     case Format_Y410:       // Packed 4:4:4
505         isColorFormatSupported = true;
506         break;
507     case Format_Y210:       // Packed 4:2:2
508         if (MEDIA_IS_WA(m_waTable, WaHEVCVDEncY210LinearInputNotSupported))
509         {
510             isColorFormatSupported = surface->TileType == MOS_TILE_Y;
511         }
512         else
513         {
514             isColorFormatSupported = true;
515         }
516         break;
517     default:
518         CODECHAL_ENCODE_ASSERTMESSAGE("Input surface color format = %d not supported!", surface->Format);
519         break;
520     }
521 
522     return isColorFormatSupported;
523 }
524 
PlatformCapabilityCheck()525 MOS_STATUS CodechalVdencHevcStateG12::PlatformCapabilityCheck()
526 {
527     CODECHAL_ENCODE_FUNCTION_ENTER;
528 
529     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
530 
531     CODECHAL_ENCODE_CHK_STATUS_RETURN(DecideEncodingPipeNumber());
532 
533     if (MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
534     {
535         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ChkGpuCtxReCreation(this, m_scalabilityState,
536             (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
537     }
538 
539     if (m_frameWidth * m_frameHeight > ENCODE_HEVC_MAX_16K_PIC_WIDTH * ENCODE_HEVC_MAX_16K_PIC_HEIGHT)
540     {
541         eStatus = MOS_STATUS_INVALID_PARAMETER;
542         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Frame resolution greater than 16k not supported");
543     }
544 
545     if (m_hevcSeqParams->SliceSizeControl && m_frameWidth * m_frameHeight < ENCODE_HEVC_MIN_DSS_PIC_WIDTH * ENCODE_HEVC_MIN_DSS_PIC_HEIGHT)
546     {
547         eStatus = MOS_STATUS_INVALID_PARAMETER;
548         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "DSS is not supported when frame resolution less than 320p");
549     }
550 
551     if (m_hevcSeqParams->ParallelBRC)
552     {
553         eStatus = MOS_STATUS_INVALID_PARAMETER;
554         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Parallel BRC is not supported on VDENC");
555     }
556 
557     if (m_hevcSeqParams->bit_depth_luma_minus8 >= 4 || m_hevcSeqParams->bit_depth_chroma_minus8 >= 4)
558     {
559         eStatus = MOS_STATUS_INVALID_PARAMETER;
560         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "12bit encoding is not supported on VDENC");
561     }
562 
563     if (m_hevcSeqParams->chroma_format_idc == 2)
564     {
565         eStatus = MOS_STATUS_INVALID_PARAMETER;
566         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "422 recon format encoding is not supported on HEVC VDENC");
567     }
568 
569     // TU configuration for RDOQ
570     if (m_hevcRdoqEnabled)
571     {
572         m_hevcRdoqEnabled = (m_hevcSeqParams->TargetUsage < 7);
573     }
574 
575     // set RDOQ Intra blocks Threshold for Gen11+
576     m_rdoqIntraTuThreshold = 0;
577     if (m_hevcRdoqEnabled)
578     {
579         if (1 == m_hevcSeqParams->TargetUsage)
580         {
581             m_rdoqIntraTuThreshold = 0xffff;
582         }
583         else if (4 == m_hevcSeqParams->TargetUsage)
584         {
585             m_rdoqIntraTuThreshold = m_picWidthInMb * m_picHeightInMb;
586             m_rdoqIntraTuThreshold = MOS_MIN(m_rdoqIntraTuThreshold / 10, 0xffff);
587         }
588     }
589 
590     if (m_enableSCC && m_hevcPicParams->pps_curr_pic_ref_enabled_flag)
591     {
592         if (m_hevcPicParams->tiles_enabled_flag)
593         {
594             for (auto i = 0; i < m_hevcPicParams->num_tile_columns_minus1 + 1; i++)
595             {
596                 if (m_hevcPicParams->tile_column_width[i] < 5)
597                 {
598                     CODECHAL_ENCODE_ASSERTMESSAGE("SCC IBC mode can't support tile width < 5 LCU");
599                     return MOS_STATUS_PLATFORM_NOT_SUPPORTED;
600                 }
601             }
602         }
603         else
604         {
605             if (MOS_ROUNDUP_DIVIDE(m_frameWidth, MAX_LCU_SIZE) < 5)
606             {
607                 CODECHAL_ENCODE_ASSERTMESSAGE("in tiling disabled case, SCC IBC mode can't support picture width < 5 LCU");
608                 return MOS_STATUS_PLATFORM_NOT_SUPPORTED;
609             }
610         }
611     }
612 
613     return eStatus;
614 }
615 
~CodechalVdencHevcStateG12()616 CodechalVdencHevcStateG12::~CodechalVdencHevcStateG12()
617 {
618     CODECHAL_ENCODE_FUNCTION_ENTER;
619 
620     if (m_scalabilityState)
621     {
622         MOS_FreeMemAndSetNull(m_scalabilityState);
623     }
624     //Note: virtual engine interface destroy is done in MOS layer
625 
626     CODECHAL_DEBUG_TOOL(
627         MOS_Delete(m_encodeParState);
628     )
629 #ifdef _ENCODE_VDENC_RESERVED
630     if (m_rsvdState)
631     {
632         MOS_Delete(m_rsvdState);
633         m_rsvdState = nullptr;
634     }
635 #endif
636     if(m_gpuCtxCreatOpt)
637     {
638         MOS_Delete(m_gpuCtxCreatOpt);
639         m_gpuCtxCreatOpt = nullptr;
640     }
641     return;
642 }
643 
AllocatePakResources()644 MOS_STATUS CodechalVdencHevcStateG12::AllocatePakResources()
645 {
646     CODECHAL_ENCODE_FUNCTION_ENTER;
647 
648     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
649 
650     uint32_t mvt_size = MOS_ALIGN_CEIL(((m_frameWidth + 63) >> 6)*((m_frameHeight + 15) >> 4), 2) * CODECHAL_CACHELINE_SIZE;
651     uint32_t mvtb_size = MOS_ALIGN_CEIL(((m_frameWidth + 31) >> 5)*((m_frameHeight + 31) >> 5), 2) * CODECHAL_CACHELINE_SIZE;
652     m_sizeOfMvTemporalBuffer = MOS_MAX(mvt_size, mvtb_size);
653 
654     const uint32_t picWidthInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_LCU_SIZE);        //assume smallest LCU to get max width
655     const uint32_t picHeightInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameHeight, CODECHAL_HEVC_MIN_LCU_SIZE);      //assume smallest LCU to get max height
656 
657     MHW_VDBOX_HCP_BUFFER_SIZE_PARAMS hcpBufSizeParam;
658     MOS_ZeroMemory(&hcpBufSizeParam, sizeof(hcpBufSizeParam));
659     hcpBufSizeParam.ucMaxBitDepth = m_bitDepth;
660     hcpBufSizeParam.ucChromaFormat = m_chromaFormat;
661     // We should move the buffer allocation to picture level if the size is dependent on LCU size
662     hcpBufSizeParam.dwCtbLog2SizeY = 6; //assume Max LCU size
663     hcpBufSizeParam.dwPicWidth = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE);
664     hcpBufSizeParam.dwPicHeight = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE);
665 
666     MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
667     MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
668     allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
669     allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
670     allocParamsForBufferLinear.Format = Format_Buffer;
671 
672     // Deblocking Filter Row Store Scratch data surface
673     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
674         MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_LINE,
675         &hcpBufSizeParam);
676 
677     if (eStatus != MOS_STATUS_SUCCESS)
678     {
679         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Row Store Scratch Buffer.");
680         return eStatus;
681     }
682 
683     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
684     allocParamsForBufferLinear.pBufName = "DeblockingScratchBuffer";
685 
686     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
687         m_osInterface,
688         &allocParamsForBufferLinear,
689         &m_resDeblockingFilterRowStoreScratchBuffer);
690 
691     if (eStatus != MOS_STATUS_SUCCESS)
692     {
693         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Row Store Scratch Buffer.");
694         return eStatus;
695     }
696 
697     // Deblocking Filter Tile Row Store Scratch data surface
698     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
699         MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_LINE,
700         &hcpBufSizeParam);
701 
702     if (eStatus != MOS_STATUS_SUCCESS)
703     {
704         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Row Store Scratch Buffer.");
705         return eStatus;
706     }
707 
708     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
709     allocParamsForBufferLinear.pBufName = "DeblockingTileRowScratchBuffer";
710 
711     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
712         m_osInterface,
713         &allocParamsForBufferLinear,
714         &m_resDeblockingFilterTileRowStoreScratchBuffer);
715 
716     if (eStatus != MOS_STATUS_SUCCESS)
717     {
718         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Row Store Scratch Buffer.");
719         return eStatus;
720     }
721 
722     // Deblocking Filter Column Row Store Scratch data surface
723     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
724         MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_COL,
725         &hcpBufSizeParam);
726 
727     if (eStatus != MOS_STATUS_SUCCESS)
728     {
729         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Column Store Scratch Buffer.");
730         return eStatus;
731     }
732 
733     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
734     allocParamsForBufferLinear.pBufName = "DeblockingColumnScratchBuffer";
735 
736     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
737         m_osInterface,
738         &allocParamsForBufferLinear,
739         &m_resDeblockingFilterColumnRowStoreScratchBuffer);
740 
741     if (eStatus != MOS_STATUS_SUCCESS)
742     {
743         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Column Row Store Scratch Buffer.");
744         return eStatus;
745     }
746 
747     // Metadata Line buffer
748     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
749         MHW_VDBOX_HCP_INTERNAL_BUFFER_META_LINE,
750         &hcpBufSizeParam);
751 
752     if (eStatus != MOS_STATUS_SUCCESS)
753     {
754         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Line Buffer.");
755         return eStatus;
756     }
757 
758     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
759     allocParamsForBufferLinear.pBufName = "MetadataLineBuffer";
760 
761     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
762         m_osInterface,
763         &allocParamsForBufferLinear,
764         &m_resMetadataLineBuffer);
765 
766     if (eStatus != MOS_STATUS_SUCCESS)
767     {
768         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Line Buffer.");
769         return eStatus;
770     }
771 
772     // Metadata Tile Line buffer
773     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
774         MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_LINE,
775         &hcpBufSizeParam);
776 
777     if (eStatus != MOS_STATUS_SUCCESS)
778     {
779         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Line Buffer.");
780         return eStatus;
781     }
782 
783     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
784     allocParamsForBufferLinear.pBufName = "MetadataTileLineBuffer";
785 
786     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
787         m_osInterface,
788         &allocParamsForBufferLinear,
789         &m_resMetadataTileLineBuffer);
790 
791     if (eStatus != MOS_STATUS_SUCCESS)
792     {
793         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Line Buffer.");
794         return eStatus;
795     }
796 
797     // Metadata Tile Column buffer
798     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
799         MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_COL,
800         &hcpBufSizeParam);
801 
802     if (eStatus != MOS_STATUS_SUCCESS)
803     {
804         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Column Buffer.");
805         return eStatus;
806     }
807 
808     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
809     allocParamsForBufferLinear.pBufName = "MetadataTileColumnBuffer";
810 
811     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
812         m_osInterface,
813         &allocParamsForBufferLinear,
814         &m_resMetadataTileColumnBuffer);
815 
816     if (eStatus != MOS_STATUS_SUCCESS)
817     {
818         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Column Buffer.");
819         return eStatus;
820     }
821 
822     // SAO Line buffer
823     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
824         MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_LINE,
825         &hcpBufSizeParam);
826 
827     if (eStatus != MOS_STATUS_SUCCESS)
828     {
829         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Line Buffer.");
830         return eStatus;
831     }
832 
833     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
834     allocParamsForBufferLinear.pBufName = "SaoLineBuffer";
835 
836     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
837         m_osInterface,
838         &allocParamsForBufferLinear,
839         &m_resSaoLineBuffer);
840 
841     if (eStatus != MOS_STATUS_SUCCESS)
842     {
843         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Line Buffer.");
844         return eStatus;
845     }
846 
847     // SAO Tile Line buffer
848     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
849         MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_LINE,
850         &hcpBufSizeParam);
851 
852     if (eStatus != MOS_STATUS_SUCCESS)
853     {
854         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Line Buffer.");
855         return eStatus;
856     }
857 
858     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
859     allocParamsForBufferLinear.pBufName = "SaoTileLineBuffer";
860 
861     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
862         m_osInterface,
863         &allocParamsForBufferLinear,
864         &m_resSaoTileLineBuffer);
865 
866     if (eStatus != MOS_STATUS_SUCCESS)
867     {
868         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Line Buffer.");
869         return eStatus;
870     }
871 
872     // SAO Tile Column buffer
873     eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
874         MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_COL,
875         &hcpBufSizeParam);
876 
877     if (eStatus != MOS_STATUS_SUCCESS)
878     {
879         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Column Buffer.");
880         return eStatus;
881     }
882 
883     allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
884     allocParamsForBufferLinear.pBufName = "SaoTileColumnBuffer";
885 
886     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
887         m_osInterface,
888         &allocParamsForBufferLinear,
889         &m_resSaoTileColumnBuffer);
890 
891     if (eStatus != MOS_STATUS_SUCCESS)
892     {
893         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Column Buffer.");
894         return eStatus;
895     }
896 
897     // Lcu ILDB StreamOut buffer
898     allocParamsForBufferLinear.dwBytes = CODECHAL_CACHELINE_SIZE;
899     allocParamsForBufferLinear.pBufName = "LcuILDBStreamOutBuffer";
900 
901     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
902         m_osInterface,
903         &allocParamsForBufferLinear,
904         &m_resLcuIldbStreamOutBuffer);
905 
906     if (eStatus != MOS_STATUS_SUCCESS)
907     {
908         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU ILDB StreamOut Buffer.");
909         return eStatus;
910     }
911 
912     // Lcu Base Address buffer
913     // HEVC Encoder Mode: Slice size is written to this buffer when slice size conformance is enabled.
914     // 1 CL (= 16 DWs = 64 bytes) per slice * Maximum number of slices in a frame.
915     // Align to page for HUC requirement
916     uint32_t maxLcu = picWidthInMinLCU * picHeightInMinLCU;
917     allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(maxLcu * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
918     allocParamsForBufferLinear.pBufName = "LcuBaseAddressBuffer";
919 
920     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
921         m_osInterface,
922         &allocParamsForBufferLinear,
923         &m_resLcuBaseAddressBuffer);
924 
925     if (eStatus != MOS_STATUS_SUCCESS)
926     {
927         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU Base Address Buffer.");
928         return eStatus;
929     }
930 
931     // SAO Row Store buffer
932     // Aligned to 4 for each tile column
933     uint32_t maxTileColumn = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE);
934     allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(picWidthInMinLCU + 3 * maxTileColumn, 4) * 16;
935     allocParamsForBufferLinear.pBufName = "SaoRowStoreBuffer";
936 
937     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
938         m_osInterface,
939         &allocParamsForBufferLinear,
940         &m_vdencSAORowStoreBuffer);
941 
942     if (eStatus != MOS_STATUS_SUCCESS)
943     {
944         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO row store Buffer.");
945         return eStatus;
946     }
947 
948     // SAO StreamOut buffer
949     uint32_t size = MOS_ALIGN_CEIL(picWidthInMinLCU, 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU;
950     //extra added size to cover tile enabled case, per tile width aligned to 4.  20: max tile column No.
951     size += 3 * 20 * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU;
952     allocParamsForBufferLinear.dwBytes = size;
953     allocParamsForBufferLinear.pBufName = "SaoStreamOutBuffer";
954 
955     eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
956         m_osInterface,
957         &allocParamsForBufferLinear,
958         &m_resSaoStreamOutBuffer);
959 
960     if (eStatus != MOS_STATUS_SUCCESS)
961     {
962         CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO StreamOut Buffer.");
963         return eStatus;
964     }
965 
966     MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
967     allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
968     allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
969     allocParamsForBufferLinear.Format = Format_Buffer;
970 
971     // Allocate Frame Statistics Streamout Data Destination Buffer. DW98-100 in HCP PipeBufAddr command
972     size = MOS_ALIGN_CEIL(m_sizeOfHcpPakFrameStats * m_maxTileNumber, CODECHAL_PAGE_SIZE);  //Each tile has 9 cache size bytes of data, Align to page is HuC requirement
973     allocParamsForBufferLinear.dwBytes = size;
974     allocParamsForBufferLinear.pBufName = "FrameStatStreamOutBuffer";
975 
976     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
977         m_osInterface,
978         &allocParamsForBufferLinear,
979         &m_resFrameStatStreamOutBuffer),
980         "Failed to create VDENC FrameStatStreamOutBuffer Buffer");
981 
982     // PAK Statistics buffer
983     size = MOS_ALIGN_CEIL(m_vdencBrcPakStatsBufferSize, CODECHAL_PAGE_SIZE);
984     CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
985         m_standard, size, 1, pakStats, "pakStats"));
986 
987     // Slice Count buffer 1 DW = 4 Bytes
988     allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
989     allocParamsForBufferLinear.pBufName = "Slice Count Buffer";
990 
991     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
992         m_osInterface,
993         &allocParamsForBufferLinear,
994         &m_sliceCountBuffer),
995         "Failed to create VDENC Slice Count Buffer");
996 
997     // VDEncMode Timer buffer 1 DW = 4 Bytes
998     allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
999     allocParamsForBufferLinear.pBufName = "VDEncMode Timer Buffer";
1000 
1001     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1002         m_osInterface,
1003         &allocParamsForBufferLinear,
1004         &m_vdencModeTimerBuffer),
1005         "Failed to create VDEncMode Timer Buffer");
1006 
1007     uint32_t frameWidthInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MIN_CU_SIZE);
1008     uint32_t frameHeightInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MIN_CU_SIZE);
1009     uint32_t frameWidthInLcus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MAX_LCU_SIZE_G10);
1010     uint32_t frameHeightInLcus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MAX_LCU_SIZE_G10);
1011     uint32_t maxTileColumns    = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE);
1012 
1013     // PAK CU Level Streamout Data:   DW57-59 in HCP pipe buffer address command
1014     // One CU has 16-byte. But, each tile needs to be aliged to the cache line
1015     size = MOS_ALIGN_CEIL(frameWidthInCus * frameHeightInCus * 16, CODECHAL_CACHELINE_SIZE);
1016     allocParamsForBufferLinear.dwBytes = size;
1017     allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
1018 
1019     CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
1020         m_osInterface,
1021         &allocParamsForBufferLinear,
1022         &m_resPakcuLevelStreamoutData.sResource));
1023     m_resPakcuLevelStreamoutData.dwSize = size;
1024     CODECHAL_ENCODE_VERBOSEMESSAGE("first allocate cu steam out buffer, size=0x%x.\n", size);
1025 
1026     // these 2 buffers are not used so far, but put the correct size calculation here
1027     // PAK CU Level Streamout Data:   DW57-59 in HCP pipe buffer address command
1028     // One CU has 16-byte. But, each tile needs to be aliged to the cache line
1029     // size = MOS_ALIGN_CEIL(frameWidthInCus * frameHeightInCus * 16, CODECHAL_CACHELINE_SIZE);
1030 
1031     // PAK Slice Level Streamut Data. DW60-DW62 in HCP pipe buffer address command
1032     // one LCU has one cache line. Use CU as LCU during creation
1033     // size = frameWidthInLcus * frameHeightInLcus * CODECHAL_CACHELINE_SIZE;
1034 
1035     // Allocate SSE Source Pixel Row Store Buffer
1036     m_sizeOfSseSrcPixelRowStoreBufferPerLcu = CODECHAL_CACHELINE_SIZE * (4 + 4) << 1;
1037     allocParamsForBufferLinear.dwBytes      = 2 * m_sizeOfSseSrcPixelRowStoreBufferPerLcu * (m_widthAlignedMaxLcu + 3 * maxTileColumns);
1038     allocParamsForBufferLinear.pBufName = "SseSrcPixelRowStoreBuffer";
1039 
1040     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1041                                                   m_osInterface,
1042                                                   &allocParamsForBufferLinear,
1043                                                   &m_resSseSrcPixelRowStoreBuffer),
1044         "Failed to create SseSrcPixelRowStoreBuffer");
1045 
1046     //HCP scalability Sync buffer
1047     allocParamsForBufferLinear.dwBytes = CODECHAL_HEVC_MAX_NUM_HCP_PIPE * CODECHAL_CACHELINE_SIZE;
1048     allocParamsForBufferLinear.pBufName = "GEN11 HCP scalability Sync buffer ";
1049 
1050     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1051                                                   m_osInterface,
1052                                                   &allocParamsForBufferLinear,
1053                                                   &m_resHcpScalabilitySyncBuffer.sResource),
1054         "Failed to create GEN11 HCP scalability Sync Buffer");
1055 
1056     // create the tile coding state parameters
1057     for (auto i = 0; i < CODECHAL_NUM_UNCOMPRESSED_SURFACE_HEVC; i++)
1058     {
1059         m_tileParams[i] = (PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12)MOS_AllocAndZeroMemory(
1060             sizeof(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12)* m_maxTileNumber);
1061     }
1062 
1063     if (m_enableHWSemaphore)
1064     {
1065         // Create the HW sync objects which will be used by each reference frame and BRC in GEN11
1066         allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1067         allocParamsForBufferLinear.pBufName = "SemaphoreMemory";
1068 
1069         MOS_LOCK_PARAMS lockFlagsWriteOnly;
1070         MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1071         lockFlagsWriteOnly.WriteOnly = 1;
1072 
1073         uint32_t* data = nullptr;
1074 
1075         for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_refSync); i++)
1076         {
1077             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1078                                                           m_osInterface,
1079                                                           &allocParamsForBufferLinear,
1080                                                           &m_refSync[i].resSemaphoreMem.sResource),
1081                 "Failed to create HW Semaphore Memory.");
1082             m_refSync[i].resSemaphoreMem.dwSize = allocParamsForBufferLinear.dwBytes;
1083 
1084             CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1085                                                 m_osInterface,
1086                                                 &m_refSync[i].resSemaphoreMem.sResource,
1087                                                 &lockFlagsWriteOnly));
1088 
1089             *data = 1;
1090 
1091             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1092                 m_osInterface,
1093                 &m_refSync[i].resSemaphoreMem.sResource));
1094         }
1095     }
1096 
1097     // create the HW semaphore buffer to sync up between VDBOXes. This is used to WA HW internal lock issue
1098     if (m_enableVdBoxHWSemaphore)
1099     {
1100         allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1101         allocParamsForBufferLinear.pBufName = "VDBOX SemaphoreMemory";
1102 
1103         MOS_LOCK_PARAMS lockFlagsWriteOnly;
1104         MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1105         lockFlagsWriteOnly.WriteOnly = 1;
1106 
1107         uint32_t* data = nullptr;
1108 
1109         for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resVdBoxSemaphoreMem); i++)
1110         {
1111             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1112                                                           m_osInterface,
1113                                                           &allocParamsForBufferLinear,
1114                                                           &m_resVdBoxSemaphoreMem[i].sResource),
1115                 "Failed to create VDBOX HW Semaphore Memory.");
1116 
1117             CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1118                                                 m_osInterface,
1119                                                 &m_resVdBoxSemaphoreMem[i].sResource,
1120                                                 &lockFlagsWriteOnly));
1121 
1122             *data = 0;
1123 
1124             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1125                 m_osInterface,
1126                 &m_resVdBoxSemaphoreMem[i].sResource));
1127         }
1128 
1129         for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resPipeStartSemaMem); i++)
1130         {
1131             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1132                                                           m_osInterface,
1133                                                           &allocParamsForBufferLinear,
1134                                                           &m_resPipeStartSemaMem[i].sResource),
1135                 "Failed to create VDBOX HW Semaphore Memory.");
1136 
1137             CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1138                                                 m_osInterface,
1139                                                 &m_resPipeStartSemaMem[i].sResource,
1140                                                 &lockFlagsWriteOnly));
1141 
1142             *data = 0;
1143 
1144             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1145                 m_osInterface,
1146                 &m_resPipeStartSemaMem[i].sResource));
1147         }
1148     }
1149 
1150     uint32_t* data = nullptr;
1151     MOS_LOCK_PARAMS lockFlagsWriteOnly;
1152     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1153     lockFlagsWriteOnly.WriteOnly = 1;
1154 
1155     allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1156     allocParamsForBufferLinear.pBufName = "BrcPakSemaphoreMemory";
1157 
1158     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1159                                                   m_osInterface,
1160                                                   &allocParamsForBufferLinear,
1161                                                   &m_resBrcPakSemaphoreMem.sResource),
1162         "Failed to create BRC PAK Semaphore Memory.");
1163 
1164     CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1165                                         m_osInterface,
1166                                         &m_resBrcPakSemaphoreMem.sResource,
1167                                         &lockFlagsWriteOnly));
1168 
1169     *data = 0;
1170 
1171     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1172         m_osInterface,
1173         &m_resBrcPakSemaphoreMem.sResource));
1174 
1175     // 3rd level batch buffer
1176     // To be moved to a more proper place later
1177     MOS_ZeroMemory(&m_thirdLevelBatchBuffer, sizeof(m_thirdLevelBatchBuffer));
1178     m_thirdLevelBatchBuffer.bSecondLevel = true;
1179     CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
1180         m_osInterface,
1181         &m_thirdLevelBatchBuffer,
1182         nullptr,
1183         m_thirdLBSize));
1184 
1185     if (m_enableTileStitchByHW)
1186     {
1187         if (Mos_ResourceIsNull(&m_resHucStatus2Buffer))
1188         {
1189             // HUC STATUS 2 Buffer for HuC status check in COND_BB_END
1190             allocParamsForBufferLinear.dwBytes = sizeof(uint64_t);
1191             allocParamsForBufferLinear.pBufName    = "HUC STATUS 2 Buffer";
1192             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1193                 m_osInterface->pfnAllocateResource(
1194                     m_osInterface,
1195                     &allocParamsForBufferLinear,
1196                     &m_resHucStatus2Buffer),
1197                 "%s: Failed to allocate HUC STATUS 2 Buffer\n",
1198                 __FUNCTION__);
1199         }
1200         uint8_t *data;
1201         for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
1202         {
1203             for (auto j = 0; j < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; j++)
1204             {
1205                 // HuC stitching Data buffer
1206                 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucCommandDataVdencG12), CODECHAL_PAGE_SIZE);
1207                 allocParamsForBufferLinear.pBufName = "HEVC HuC Stitch Data Buffer";
1208                 CODECHAL_ENCODE_CHK_STATUS_RETURN(
1209                     m_osInterface->pfnAllocateResource(
1210                         m_osInterface,
1211                         &allocParamsForBufferLinear,
1212                         &m_resHucStitchDataBuffer[i][j]));
1213                 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1214                 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1215                 lockFlagsWriteOnly.WriteOnly = 1;
1216                 uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource(
1217                     m_osInterface,
1218                     &m_resHucStitchDataBuffer[i][j],
1219                     &lockFlagsWriteOnly);
1220                 CODECHAL_ENCODE_CHK_NULL_RETURN(pData);
1221                 MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes);
1222                 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[i][j]);
1223             }
1224         }
1225         //Second level BB for huc stitching cmd
1226         MOS_ZeroMemory(&m_HucStitchCmdBatchBuffer, sizeof(m_HucStitchCmdBatchBuffer));
1227         m_HucStitchCmdBatchBuffer.bSecondLevel = true;
1228         CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
1229             m_osInterface,
1230             &m_HucStitchCmdBatchBuffer,
1231             nullptr,
1232             m_hwInterface->m_HucStitchCmdBatchBufferSize));
1233     }
1234 
1235     if (m_numDelay)
1236     {
1237         allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1238         allocParamsForBufferLinear.pBufName = "DelayMinusMemory";
1239 
1240         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1241             m_osInterface,
1242             &allocParamsForBufferLinear,
1243             &m_resDelayMinus), "Failed to allocate delay minus memory.");
1244 
1245         uint8_t* data;
1246         MOS_LOCK_PARAMS lockFlags;
1247         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1248         lockFlags.WriteOnly = 1;
1249         data = (uint8_t*)m_osInterface->pfnLockResource(
1250             m_osInterface,
1251             &m_resDelayMinus,
1252             &lockFlags);
1253 
1254         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1255 
1256         MOS_ZeroMemory(data, sizeof(uint32_t));
1257 
1258         m_osInterface->pfnUnlockResource(m_osInterface, &m_resDelayMinus);
1259     }
1260 
1261     return eStatus;
1262 }
1263 
FreePakResources()1264 MOS_STATUS CodechalVdencHevcStateG12::FreePakResources()
1265 {
1266     CODECHAL_ENCODE_FUNCTION_ENTER;
1267 
1268     m_osInterface->pfnFreeResource(m_osInterface, &m_resSseSrcPixelRowStoreBuffer);
1269     m_osInterface->pfnFreeResource(m_osInterface, &m_resHcpScalabilitySyncBuffer.sResource);
1270     m_osInterface->pfnFreeResource(m_osInterface, &m_vdencSAORowStoreBuffer);
1271     m_osInterface->pfnFreeResource(m_osInterface, &m_resPakcuLevelStreamoutData.sResource);
1272     if (!Mos_ResourceIsNull(&m_resHwCountTileReplay))
1273     {
1274         m_osInterface->pfnFreeResource(m_osInterface, &m_resHwCountTileReplay);
1275     }
1276 
1277     for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resTileBasedStatisticsBuffer); i++)
1278     {
1279         m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[i].sResource);
1280     }
1281     for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileRecordBuffer); i++)
1282     {
1283         m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[i].sResource);
1284     }
1285     m_osInterface->pfnFreeResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource);
1286 
1287     m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcDataBuffer);
1288 
1289     for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
1290     {
1291         for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
1292         {
1293             m_osInterface->pfnFreeResource(m_osInterface, &m_resHucPakStitchDmemBuffer[k][i]);
1294         }
1295     }
1296 
1297     if (m_numDelay)
1298     {
1299         m_osInterface->pfnFreeResource(m_osInterface, &m_resDelayMinus);
1300     }
1301 
1302     for (auto i = 0; i < CODECHAL_NUM_UNCOMPRESSED_SURFACE_HEVC; i++)
1303     {
1304         MOS_FreeMemory(m_tileParams[i]);
1305     }
1306 
1307     // command buffer for VE, allocated in MOS_STATUS CodechalEncodeHevcBase::VerifyCommandBufferSize()
1308     for (auto i = 0; i < CODECHAL_NUM_UNCOMPRESSED_SURFACE_HEVC; i++)
1309     {
1310         for (auto j = 0; j < CODECHAL_HEVC_MAX_NUM_HCP_PIPE; j++)
1311         {
1312             for (auto k = 0; k < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; k++)
1313             {
1314                 PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[i][j][k];
1315 
1316                 if (!Mos_ResourceIsNull(&cmdBuffer->OsResource))
1317                 {
1318                     if (cmdBuffer->pCmdBase)
1319                     {
1320                         m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
1321                     }
1322                     m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
1323                 }
1324             }
1325         }
1326     }
1327 
1328     for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_refSync); i++)
1329     {
1330         auto sync = &m_refSync[i];
1331 
1332         if (!Mos_ResourceIsNull(&sync->resSyncObject))
1333         {
1334             // if this object has been signaled before, we need to wait to ensure singal-wait is in pair.
1335             if (sync->uiSemaphoreObjCount || sync->bInUsed)
1336             {
1337                 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
1338                 syncParams.GpuContext = m_renderContext;
1339                 syncParams.presSyncResource = &sync->resSyncObject;
1340                 syncParams.uiSemaphoreCount = sync->uiSemaphoreObjCount;
1341                 m_osInterface->pfnEngineWait(m_osInterface, &syncParams);
1342             }
1343         }
1344         m_osInterface->pfnFreeResource(m_osInterface, &sync->resSemaphoreMem.sResource);
1345     }
1346 
1347     for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resVdBoxSemaphoreMem); i++)
1348     {
1349         m_osInterface->pfnFreeResource(m_osInterface, &m_resVdBoxSemaphoreMem[i].sResource);
1350     }
1351 
1352     for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resPipeStartSemaMem); i++)
1353     {
1354         m_osInterface->pfnFreeResource(m_osInterface, &m_resPipeStartSemaMem[i].sResource);
1355     }
1356 
1357    if (m_enableTileStitchByHW)
1358     {
1359         for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
1360         {
1361             for (auto j = 0; j < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; j++)
1362             {
1363                 // HuC stitching Data buffer
1364                 m_osInterface->pfnFreeResource(
1365                     m_osInterface,
1366                     &m_resHucStitchDataBuffer[i][j]);
1367             }
1368         }
1369         //Second level BB for huc stitching cmd
1370         Mhw_FreeBb(m_osInterface, &m_HucStitchCmdBatchBuffer, nullptr);
1371     }
1372 
1373     Mhw_FreeBb(m_osInterface, &m_thirdLevelBatchBuffer, nullptr);
1374     FreeTileLevelBatch();
1375     FreeTileRowLevelBRCBatch();
1376 
1377     m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcPakSemaphoreMem.sResource);
1378 
1379     return CodechalVdencHevcState::FreePakResources();
1380 }
1381 
AllocateEncResources()1382 MOS_STATUS CodechalVdencHevcStateG12::AllocateEncResources()
1383 {
1384     CODECHAL_ENCODE_FUNCTION_ENTER;
1385 
1386     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1387 
1388     //CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::AllocateEncResources());
1389 
1390     if (m_hmeSupported)
1391     {
1392         HmeParams hmeParams;
1393 
1394         MOS_ZeroMemory(&hmeParams, sizeof(hmeParams));
1395         hmeParams.b4xMeDistortionBufferSupported = true;
1396         hmeParams.ps16xMeMvDataBuffer = &m_s16XMeMvDataBuffer;
1397         hmeParams.ps32xMeMvDataBuffer = &m_s32XMeMvDataBuffer;
1398         hmeParams.ps4xMeDistortionBuffer = &m_s4XMeDistortionBuffer;
1399         hmeParams.ps4xMeMvDataBuffer = &m_s4XMeMvDataBuffer;
1400         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources4xME(&hmeParams));
1401         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources16xME(&hmeParams));
1402         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources32xME(&hmeParams));
1403     }
1404 
1405     // VDENC tile row store buffer
1406     MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
1407     MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1408     allocParamsForBufferLinear.Type     = MOS_GFXRES_BUFFER;
1409     allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1410     allocParamsForBufferLinear.Format   = Format_Buffer;
1411     allocParamsForBufferLinear.dwBytes  = MOS_ROUNDUP_DIVIDE(m_frameWidth, 32) * CODECHAL_CACHELINE_SIZE * 2;
1412     allocParamsForBufferLinear.pBufName = "VDENC Tile Row Store Buffer";
1413 
1414     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1415             m_osInterface,
1416             &allocParamsForBufferLinear,
1417             &m_vdencTileRowStoreBuffer),
1418             "Failed to allocate VDENC Tile Row Store Buffer");
1419 
1420     MOS_ALLOC_GFXRES_PARAMS allocParamsForSurface;
1421     MOS_ZeroMemory(&allocParamsForSurface, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1422     allocParamsForSurface.Type = MOS_GFXRES_BUFFER;
1423     allocParamsForSurface.TileType = MOS_TILE_LINEAR;
1424     allocParamsForSurface.Format = Format_Buffer;
1425     allocParamsForSurface.dwBytes = m_numLcu * 4;
1426     allocParamsForSurface.pBufName = "VDEnc Cumulative CU Count Streamout Surface";
1427 
1428     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1429             m_osInterface,
1430             &allocParamsForSurface,
1431             &m_vdencCumulativeCuCountStreamoutSurface),
1432             "Failed to allocate VDEnc Cumulative CU Count Streamout Surface");
1433 
1434     // Move from CodechalVdencHevcState::AllocateEncResources()
1435 
1436     // PAK stream-out buffer
1437     allocParamsForBufferLinear.dwBytes = CODECHAL_HEVC_PAK_STREAMOUT_SIZE;
1438     allocParamsForBufferLinear.pBufName = "Pak StreamOut Buffer";
1439     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1440         m_osInterface,
1441         &allocParamsForBufferLinear,
1442         &m_resStreamOutBuffer[0]),
1443         "Failed to allocate Pak Stream Out Buffer.");
1444 
1445     // VDENC Intra Row Store Scratch buffer
1446     // 1 cacheline per MB
1447     //  Double the size for Tile Replay
1448     uint32_t size = MOS_ROUNDUP_DIVIDE(m_frameWidth, MAX_LCU_SIZE) * CODECHAL_CACHELINE_SIZE * 2 * 2;
1449     CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
1450         m_standard, size, 1, vdencIntraRowStoreScratch, "vdencIntraRowStoreScratch"));
1451 
1452     // VDENC Statistics buffer
1453     // Enabled for BRC only
1454     size = MOS_ALIGN_CEIL(m_vdencBrcStatsBufferSize * m_maxTileNumber, CODECHAL_PAGE_SIZE);
1455     CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
1456         m_standard, size, 1, vdencStats, "vdencStats"));
1457 
1458     // end of CodechalVdencHevcState::AllocateEncResources()
1459 
1460     if (m_enableSCC)
1461     {
1462         MOS_STATUS              eStatus = MOS_STATUS_SUCCESS;
1463         MOS_ALLOC_GFXRES_PARAMS allocParamsForBuffer2D;
1464         uint32_t                alignedWidth, alignedHeight;
1465 
1466         // Allocate the recon not filtered surface for IBC
1467         // First align to LCU size 64x64
1468         alignedWidth  = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE);
1469         alignedHeight = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE);
1470 
1471         MOS_ZeroMemory(&allocParamsForBuffer2D, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1472         allocParamsForBuffer2D.Type     = MOS_GFXRES_2D;
1473         allocParamsForBuffer2D.TileType = MOS_TILE_Y;
1474         // default setting
1475         allocParamsForBuffer2D.Format   = Format_NV12;
1476         allocParamsForBuffer2D.pBufName = "Recon not Filtered Surface";
1477         allocParamsForBuffer2D.dwWidth  = alignedWidth;
1478         allocParamsForBuffer2D.dwHeight = alignedHeight;
1479 
1480         // The format and size is dependent on chroma format and bit depth
1481         CODECHAL_ENCODE_ASSERT(m_bitDepth < 12);
1482 
1483         if (HCP_CHROMA_FORMAT_YUV420 == m_chromaFormat)
1484         {
1485             if (10 == m_bitDepth)
1486             {
1487                 if (m_mmcState && m_mmcState->IsMmcEnabled())
1488                 {
1489                     allocParamsForBuffer2D.dwWidth = alignedWidth * 2;
1490                 }
1491                 else
1492                 {
1493                     allocParamsForBuffer2D.Format = Format_P010;
1494                 }
1495             }
1496         }
1497         else if (HCP_CHROMA_FORMAT_YUV444 == m_chromaFormat)
1498         {
1499             if (8 == m_bitDepth)
1500             {
1501                 allocParamsForBuffer2D.Format   = Format_AYUV;
1502                 allocParamsForBuffer2D.dwWidth  = alignedWidth >> 2;
1503                 allocParamsForBuffer2D.dwHeight = alignedHeight * 3;
1504             }
1505             else
1506             {
1507                 allocParamsForBuffer2D.Format   = Format_Y410;
1508                 allocParamsForBuffer2D.dwWidth  = alignedWidth >> 1;
1509                 allocParamsForBuffer2D.dwHeight = alignedHeight * 3;
1510             }
1511         }
1512         else
1513         {
1514             CODECHAL_ENCODE_ASSERTMESSAGE("4:2:2 is not supported for SCC feature!");
1515             eStatus = MOS_STATUS_INVALID_PARAMETER;
1516             return eStatus;
1517         }
1518 
1519         if (m_mmcState && m_mmcState->IsMmcEnabled())
1520         {
1521             allocParamsForBuffer2D.bIsCompressible = true;
1522             allocParamsForBuffer2D.CompressionMode = MOS_MMC_MC;
1523         }
1524         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1525                                                       m_osInterface,
1526                                                       &allocParamsForBuffer2D,
1527                                                       &m_vdencRecNotFilteredBuffer),
1528             "Failed to allocate Recon not filtered surface for IBC");
1529     }
1530 
1531     return eStatus;
1532 }
1533 
FreeEncResources()1534 MOS_STATUS CodechalVdencHevcStateG12::FreeEncResources()
1535 {
1536     CODECHAL_ENCODE_FUNCTION_ENTER;
1537 
1538     m_osInterface->pfnFreeResource(m_osInterface, &m_vdencTileRowStoreBuffer);
1539     m_osInterface->pfnFreeResource(m_osInterface, &m_vdencCumulativeCuCountStreamoutSurface);
1540 
1541     // Free ME resources
1542     HmeParams hmeParams;
1543 
1544     MOS_ZeroMemory(&hmeParams, sizeof(hmeParams));
1545     hmeParams.ps16xMeMvDataBuffer = &m_s16XMeMvDataBuffer;
1546     hmeParams.ps32xMeMvDataBuffer = &m_s32XMeMvDataBuffer;
1547     hmeParams.ps4xMeDistortionBuffer = &m_s4XMeDistortionBuffer;
1548     hmeParams.ps4xMeMvDataBuffer = &m_s4XMeMvDataBuffer;
1549     DestroyMEResources(&hmeParams);
1550 
1551     m_osInterface->pfnFreeResource(m_osInterface, &m_vdencRecNotFilteredBuffer);
1552 
1553     return CodechalVdencHevcState::FreeEncResources();
1554 }
1555 
AllocateBrcResources()1556 MOS_STATUS CodechalVdencHevcStateG12::AllocateBrcResources()
1557 {
1558     CODECHAL_ENCODE_FUNCTION_ENTER;
1559     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1560 
1561     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::AllocateBrcResources());
1562 
1563     uint32_t* data = nullptr;
1564     MOS_LOCK_PARAMS lockFlagsWriteOnly;
1565     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1566     lockFlagsWriteOnly.WriteOnly = 1;
1567 
1568     MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
1569     MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1570     allocParamsForBufferLinear.Type     = MOS_GFXRES_BUFFER;
1571     allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1572     allocParamsForBufferLinear.Format   = Format_Buffer;
1573     allocParamsForBufferLinear.dwBytes  = sizeof(uint32_t);
1574     allocParamsForBufferLinear.pBufName = "TileRowBRCSyncSemaphore";
1575 
1576     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1577                                                   m_osInterface,
1578                                                   &allocParamsForBufferLinear,
1579                                                   &m_resTileRowBRCsyncSemaphore),
1580         "Failed to create Tile Row BRC sync Semaphore Memory.");
1581 
1582     CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1583                                         m_osInterface,
1584                                         &m_resTileRowBRCsyncSemaphore,
1585                                         &lockFlagsWriteOnly));
1586 
1587     *data = 0;
1588 
1589     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1590         m_osInterface,
1591         &m_resTileRowBRCsyncSemaphore));
1592 
1593     return eStatus;
1594 }
1595 
FreeBrcResources()1596 MOS_STATUS CodechalVdencHevcStateG12::FreeBrcResources()
1597 {
1598     CODECHAL_ENCODE_FUNCTION_ENTER;
1599 
1600     m_osInterface->pfnFreeResource(m_osInterface, &m_resTileRowBRCsyncSemaphore);
1601     return CodechalVdencHevcState::FreeBrcResources();
1602 }
1603 
AllocateTileLevelBatch()1604 MOS_STATUS CodechalVdencHevcStateG12::AllocateTileLevelBatch()
1605 {
1606     CODECHAL_ENCODE_FUNCTION_ENTER;
1607 
1608     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1609 
1610     // Only allocate when the number of tile changed
1611     if (m_numTileBatchAllocated >= m_numTiles)
1612     {
1613         return eStatus;
1614     }
1615 
1616     // Make it simple, free first if need reallocate
1617     if (m_numTileBatchAllocated > 0)
1618     {
1619         CODECHAL_ENCODE_CHK_STATUS_RETURN(FreeTileLevelBatch());
1620     }
1621 
1622     // First allocate the batch buffer array
1623     for (int32_t idx = 0; idx < CODECHAL_VDENC_BRC_NUM_OF_PASSES; idx++)
1624     {
1625         if (m_tileLevelBatchBuffer[idx] == nullptr)
1626         {
1627             m_tileLevelBatchBuffer[idx] = (PMHW_BATCH_BUFFER)MOS_AllocAndZeroMemory(sizeof(MHW_BATCH_BUFFER) * m_numTiles);
1628 
1629             if (nullptr == m_tileLevelBatchBuffer[idx])
1630             {
1631                 CODECHAL_ENCODE_ASSERTMESSAGE("Allocate memory for tile batch buffer failed");
1632                 return MOS_STATUS_NO_SPACE;
1633             }
1634         }
1635 
1636         // Allocate the batch buffer for each tile
1637         uint32_t  i = 0;
1638         for (i = 0; i < m_numTiles; i++)
1639         {
1640             MOS_ZeroMemory(&m_tileLevelBatchBuffer[idx][i], sizeof(MHW_BATCH_BUFFER));
1641             m_tileLevelBatchBuffer[idx][i].bSecondLevel = true;
1642             CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
1643                 m_osInterface,
1644                 &m_tileLevelBatchBuffer[idx][i],
1645                 nullptr,
1646                 m_tileLevelBatchSize));
1647         }
1648     }
1649 
1650     // Record the number of allocated batch buffer for tiles
1651     m_numTileBatchAllocated = m_numTiles;
1652     return eStatus;
1653 }
1654 
FreeTileLevelBatch()1655 MOS_STATUS CodechalVdencHevcStateG12::FreeTileLevelBatch()
1656 {
1657     CODECHAL_ENCODE_FUNCTION_ENTER;
1658 
1659     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1660 
1661     // Free the batch buffer for each tile
1662     uint32_t  i = 0;
1663     uint32_t  j = 0;
1664     for (i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
1665     {
1666         for (j = 0; j < m_numTileBatchAllocated; j++)
1667         {
1668             CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_FreeBb(m_osInterface, &m_tileLevelBatchBuffer[i][j], nullptr));
1669         }
1670 
1671         MOS_FreeMemory(m_tileLevelBatchBuffer[i]);
1672         m_tileLevelBatchBuffer[i] = nullptr;
1673     }
1674 
1675     // Reset the number of tile batch allocated
1676     m_numTileBatchAllocated = 0;
1677 
1678     return eStatus;
1679 }
1680 
AllocateTileRowLevelBRCBatch()1681 MOS_STATUS CodechalVdencHevcStateG12::AllocateTileRowLevelBRCBatch()
1682 {
1683     CODECHAL_ENCODE_FUNCTION_ENTER;
1684 
1685     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1686 
1687     // Only allocate when the number of tile row changed
1688     if (m_numTileRowBRCBatchAllocated >= m_numTileRows)
1689     {
1690         return eStatus;
1691     }
1692 
1693     // Make it simple, free first if need reallocate
1694     if (m_numTileRowBRCBatchAllocated > 0)
1695     {
1696         CODECHAL_ENCODE_CHK_STATUS_RETURN(FreeTileRowLevelBRCBatch());
1697     }
1698 
1699     // First allocate the batch buffer array
1700     for (int32_t idx = 0; idx < CODECHAL_VDENC_BRC_NUM_OF_PASSES; idx++)
1701     {
1702         if (m_TileRowBRCBatchBuffer[idx] == nullptr)
1703         {
1704             m_TileRowBRCBatchBuffer[idx] = (PMHW_BATCH_BUFFER)MOS_AllocAndZeroMemory(sizeof(MHW_BATCH_BUFFER) * m_numTileRows);
1705 
1706             if (nullptr == m_TileRowBRCBatchBuffer[idx])
1707             {
1708                 CODECHAL_ENCODE_ASSERTMESSAGE("Allocate memory for tile row level BRC batch buffer failed");
1709                 return MOS_STATUS_NO_SPACE;
1710             }
1711         }
1712 
1713         // Allocate the batch buffer for each tile row
1714         uint32_t  i = 0;
1715         for (i = 0; i < m_numTileRows; i++)
1716         {
1717             MOS_ZeroMemory(&m_TileRowBRCBatchBuffer[idx][i], sizeof(MHW_BATCH_BUFFER));
1718             m_TileRowBRCBatchBuffer[idx][i].bSecondLevel = true;
1719             CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
1720                 m_osInterface,
1721                 &m_TileRowBRCBatchBuffer[idx][i],
1722                 nullptr,
1723                 m_hwInterface->m_hucCommandBufferSize));
1724         }
1725     }
1726 
1727     // Record the number of allocated batch buffer for tiles
1728     m_numTileRowBRCBatchAllocated = m_numTileRows;
1729     return eStatus;
1730 }
1731 
FreeTileRowLevelBRCBatch()1732 MOS_STATUS CodechalVdencHevcStateG12::FreeTileRowLevelBRCBatch()
1733 {
1734     CODECHAL_ENCODE_FUNCTION_ENTER;
1735 
1736     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1737 
1738     // Free the batch buffer for each tile row
1739     uint32_t  i = 0;
1740     uint32_t  j = 0;
1741     for (i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
1742     {
1743         for (j = 0; j < m_numTileRowBRCBatchAllocated; j++)
1744         {
1745             CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_FreeBb(m_osInterface, &m_TileRowBRCBatchBuffer[i][j], nullptr));
1746         }
1747 
1748         MOS_FreeMemory(m_TileRowBRCBatchBuffer[i]);
1749         m_TileRowBRCBatchBuffer[i] = nullptr;
1750     }
1751 
1752     // Reset the number of tile row BRC batch allocated
1753     m_numTileRowBRCBatchAllocated = 0;
1754 
1755     return eStatus;
1756 }
1757 
InitializePicture(const EncoderParams & params)1758 MOS_STATUS CodechalVdencHevcStateG12::InitializePicture(const EncoderParams& params)
1759 {
1760     CODECHAL_ENCODE_FUNCTION_ENTER;
1761 
1762     m_numNAL = params.uiNumNalUnits;
1763     m_overallNALPayload = params.uiOverallNALPayload;
1764 
1765     // common initilization
1766     return CodechalVdencHevcState::InitializePicture(params);
1767 }
1768 
SetSequenceStructs()1769 MOS_STATUS CodechalVdencHevcStateG12::SetSequenceStructs()
1770 {
1771     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1772 
1773     CODECHAL_ENCODE_FUNCTION_ENTER;
1774 
1775     MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
1776     MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1777     allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
1778     allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1779     allocParamsForBufferLinear.Format = Format_Buffer;
1780     allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE);
1781     allocParamsForBufferLinear.pBufName = "VDENC Read Batch Buffer";
1782 
1783     uint32_t batchBufferSize = m_hwInterface->m_vdencReadBatchBufferSize +
1784                                ENCODE_HEVC_VDENC_NUM_MAX_SLICES * (m_numNAL * mhw_vdbox_hcp_g12_X::HCP_PAK_INSERT_OBJECT_CMD::byteSize + m_overallNALPayload);
1785 
1786     if (batchBufferSize > allocParamsForBufferLinear.dwBytes && allocParamsForBufferLinear.dwBytes != m_prevVdencReadBatchBufferSize)
1787     {
1788         m_hwInterface->m_vdencReadBatchBufferSize = batchBufferSize;
1789         m_hwInterface->m_vdenc2ndLevelBatchBufferSize = batchBufferSize;
1790         m_tileLevelBatchSize = batchBufferSize;
1791         allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE);
1792         m_prevVdencReadBatchBufferSize = allocParamsForBufferLinear.dwBytes;
1793 
1794         for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
1795         {
1796             for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
1797             {
1798                 if (!Mos_ResourceIsNull(&m_vdencReadBatchBuffer[k][i]))
1799                 {
1800                     m_osInterface->pfnFreeResource(m_osInterface, &m_vdencReadBatchBuffer[k][i]);
1801                 }
1802             }
1803 
1804             for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
1805             {
1806                 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1807                                                               m_osInterface,
1808                                                               &allocParamsForBufferLinear,
1809                                                               &m_vdencReadBatchBuffer[k][i]),
1810                     "Failed to allocate VDENC Read Batch Buffer");
1811             }
1812         }
1813     }
1814 
1815     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::SetSequenceStructs());
1816 
1817     return eStatus;
1818 }
1819 
SetPictureStructs()1820 MOS_STATUS CodechalVdencHevcStateG12::SetPictureStructs()
1821 {
1822     CODECHAL_ENCODE_FUNCTION_ENTER;
1823 
1824     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1825 
1826     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::SetPictureStructs());
1827 
1828     if ((uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_chromaFormat &&
1829         (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat)
1830     {
1831         if (Format_YUY2 != m_reconSurface.Format)
1832         {
1833             eStatus = MOS_STATUS_INVALID_PARAMETER;
1834             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Recon surface format is not correct!");
1835         }
1836         else if (m_reconSurface.dwHeight < m_oriFrameHeight * 2 ||
1837             m_reconSurface.dwWidth < m_oriFrameWidth / 2)
1838         {
1839             eStatus = MOS_STATUS_INVALID_PARAMETER;
1840             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Recon surface allocation size is not correct!");
1841         }
1842         else
1843         {
1844             // update Recon surface to Variant format
1845             CodechalEncodeHevcBase::UpdateYUY2SurfaceInfo(&m_reconSurface, m_is10BitHevc);
1846         }
1847     }
1848 
1849     // Frame level BRC pass set to one pass when tile replay is enabled
1850     if (m_enableTileReplay)
1851     {
1852         m_numPasses = 0;
1853     }
1854 
1855     // Error concealment, disable IBC if slice coding type is I type
1856     if (m_enableSCC && m_hevcPicParams->pps_curr_pic_ref_enabled_flag)
1857     {
1858         for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
1859         {
1860             if (m_hevcSliceParams[slcCount].slice_type == CODECHAL_ENCODE_HEVC_I_SLICE)
1861             {
1862                 m_hevcPicParams->pps_curr_pic_ref_enabled_flag = false;
1863                 break;
1864             }
1865         }
1866     }
1867 
1868     // EOS is not working on GEN12, disable it by setting below to false (WA)
1869     m_lastPicInSeq = false;
1870     m_lastPicInStream = false;
1871     return eStatus;
1872 }
1873 
GetStatusReport(EncodeStatus * encodeStatus,EncodeStatusReport * encodeStatusReport)1874 MOS_STATUS CodechalVdencHevcStateG12::GetStatusReport(
1875     EncodeStatus *encodeStatus,
1876     EncodeStatusReport *encodeStatusReport)
1877 {
1878     CODECHAL_ENCODE_FUNCTION_ENTER;
1879 
1880     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1881 
1882     CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus);
1883     CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport);
1884 
1885     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpVdencOutputs()));
1886 
1887     // When tile replay is enabled with tile replay, need to report out the tile size and the bit stream is not continous
1888     if ((encodeStatusReport->UsedVdBoxNumber == 1) && (!m_enableTileReplay || (m_enableTileReplay && encodeStatusReport->NumberTilesInFrame == 1)))
1889     {
1890         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::GetStatusReport(encodeStatus, encodeStatusReport));
1891         return eStatus;
1892     }
1893 
1894     // Allocate the tile size report memory
1895     encodeStatusReport->SizeOfTileInfoBuffer = encodeStatusReport->NumberTilesInFrame * sizeof(CodechalTileInfo);
1896     if (encodeStatusReport->pHEVCTileinfo)
1897     {
1898         MOS_FreeMemory(encodeStatusReport->pHEVCTileinfo);
1899         encodeStatusReport->pHEVCTileinfo = nullptr;
1900     }
1901     encodeStatusReport->pHEVCTileinfo = (CodechalTileInfo *)MOS_AllocAndZeroMemory(encodeStatusReport->SizeOfTileInfoBuffer);
1902     CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport->pHEVCTileinfo);
1903 
1904     // In case of CQP, PAK integration kernel is not called, so used tile size record from HW
1905     // PAK integration kernel does not handle stitching for single pipe mode
1906     PCODECHAL_ENCODE_BUFFER tileSizeStatusReport = &m_tileRecordBuffer[encodeStatusReport->CurrOriginalPic.FrameIdx];
1907     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[encodeStatusReport->CurrOriginalPic.FrameIdx];
1908 
1909     MOS_LOCK_PARAMS lockFlags;
1910     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1911     lockFlags.ReadOnly = 1;
1912     HCPPakHWTileSizeRecord_G12* tileStatusReport = (HCPPakHWTileSizeRecord_G12*)m_osInterface->pfnLockResource(
1913         m_osInterface,
1914         &tileSizeStatusReport->sResource,
1915         &lockFlags);
1916     CODECHAL_ENCODE_CHK_NULL_RETURN(tileStatusReport);
1917 
1918     encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
1919     encodeStatusReport->PanicMode = false;
1920     encodeStatusReport->AverageQp = 0;
1921     encodeStatusReport->QpY = 0;
1922     encodeStatusReport->SuggestedQpYDelta = 0;
1923     encodeStatusReport->NumberPasses = 1;
1924     encodeStatusReport->bitstreamSize = 0;
1925     encodeStatus->ImageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQp = 0;
1926     encodeStatusReport->NumberSlices = 0;
1927 
1928     uint32_t* sliceSize = nullptr;
1929 
1930     // pSliceSize is set/ allocated only when dynamic slice is enabled. Cannot use SSC flag here, as it is an asynchronous call
1931     if (encodeStatus->sliceReport.pSliceSize)
1932     {
1933         sliceSize = (uint32_t*)m_osInterface->pfnLockResource(m_osInterface, encodeStatus->sliceReport.pSliceSize, &lockFlags);
1934         CODECHAL_ENCODE_CHK_NULL_RETURN(sliceSize);
1935     }
1936 
1937     uint32_t totalCU = 0;
1938     uint32_t sliceCount = 0;
1939     double sumQp = 0.0;
1940     for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
1941     {
1942         if (tileStatusReport[i].Length == 0)
1943         {
1944             encodeStatusReport->CodecStatus = CODECHAL_STATUS_INCOMPLETE;
1945             return eStatus;
1946         }
1947         //update tile info with HW counter
1948         if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHwCounterIncrement(m_osInterface) && m_enableTileReplay)
1949         {
1950             if (Mos_ResourceIsNull(&m_resHwCountTileReplay))
1951             {
1952                 CODECHAL_ENCODE_ASSERTMESSAGE("m_resHwCountTileReplay is not allocated");
1953                 return MOS_STATUS_NULL_POINTER;
1954             }
1955 
1956             MOS_LOCK_PARAMS LockFlagsNoOverWrite;
1957             MOS_ZeroMemory(&LockFlagsNoOverWrite, sizeof(MOS_LOCK_PARAMS));
1958             LockFlagsNoOverWrite.WriteOnly = 1;
1959             LockFlagsNoOverWrite.NoOverWrite = 1;
1960 
1961             uint8_t* dataHWCountTileReplay = (uint8_t*)m_osInterface->pfnLockResource(
1962                                                        m_osInterface,
1963                                                        &m_resHwCountTileReplay,
1964                                                        &LockFlagsNoOverWrite);
1965 
1966             CODECHAL_ENCODE_CHK_NULL_RETURN(dataHWCountTileReplay);
1967             uint64_t *pAddress2Counter = (uint64_t *)(dataHWCountTileReplay + i * sizeof(HwCounter));
1968             encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.Count = *pAddress2Counter;
1969             encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.Count = SwapEndianness(encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.Count); //Report back in Big endian
1970             encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.IV = *(++pAddress2Counter);
1971             encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.IV = SwapEndianness(encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.IV); //Report back in Big endian
1972             CODECHAL_ENCODE_NORMALMESSAGE("tile = %d, hwCounterValue.Count = 0x%llx, hwCounterValue.IV = 0x%llx", i, encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.Count, encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.IV);
1973             if (dataHWCountTileReplay)
1974             {
1975                 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHwCountTileReplay);
1976             }
1977         }
1978         encodeStatusReport->pHEVCTileinfo[i].TileSizeInBytes     = tileStatusReport[i].Length;
1979         // The offset only valid if there is no stream stitching
1980         encodeStatusReport->pHEVCTileinfo[i].TileBitStreamOffset = tileParams[i].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
1981         encodeStatusReport->pHEVCTileinfo[i].TileRowNum = i / tileParams[i].NumOfTileColumnsInFrame;
1982         encodeStatusReport->pHEVCTileinfo[i].TileColNum = i % tileParams[i].NumOfTileColumnsInFrame;
1983         encodeStatusReport->NumTileReported =  i + 1;
1984         encodeStatusReport->bitstreamSize += tileStatusReport[i].Length;
1985         totalCU += (tileParams[i].TileHeightInMinCbMinus1 + 1) * (tileParams[i].TileWidthInMinCbMinus1 + 1);
1986         sumQp += tileStatusReport[i].Hcp_Qp_Status_Count;
1987 
1988         if (sliceSize)
1989         {
1990             encodeStatusReport->pSliceSizes = (uint16_t*)sliceSize;
1991             encodeStatusReport->NumberSlices += (uint8_t)tileStatusReport[i].Hcp_Slice_Count_Tile;
1992             uint16_t prevCumulativeSliceSize = 0;
1993             // HW writes out a DW for each slice size. Copy in place the DW into 16bit fields expected by App
1994             for (uint32_t idx = 0; idx < tileStatusReport[i].Hcp_Slice_Count_Tile; idx++)
1995             {
1996                 // PAK output the sliceSize at 16DW intervals.
1997                 CODECHAL_ENCODE_CHK_NULL_RETURN(&sliceSize[sliceCount * 16]);
1998 
1999                 //convert cummulative slice size to individual, first slice may have PPS/SPS,
2000                 uint32_t CurrAccumulatedSliceSize = sliceSize[sliceCount * 16];
2001                 encodeStatusReport->pSliceSizes[sliceCount] = CurrAccumulatedSliceSize - prevCumulativeSliceSize;
2002                 prevCumulativeSliceSize += encodeStatusReport->pSliceSizes[sliceCount];
2003                 sliceCount++;
2004             }
2005         }
2006     }
2007 
2008     if (sliceSize)
2009     {
2010         encodeStatusReport->SizeOfSliceSizesBuffer = sizeof(uint16_t) * encodeStatusReport->NumberSlices;
2011         encodeStatusReport->SliceSizeOverflow = (encodeStatus->sliceReport.SliceSizeOverflow >> 16) & 1;
2012         m_osInterface->pfnUnlockResource(m_osInterface, encodeStatus->sliceReport.pSliceSize);
2013     }
2014 
2015     CODECHAL_ENCODE_CHK_STATUS_RETURN(CalculatePSNR(encodeStatus, encodeStatusReport));
2016 
2017     if (encodeStatusReport->bitstreamSize == 0 ||
2018         encodeStatusReport->bitstreamSize >m_bitstreamUpperBound)
2019     {
2020         encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR;
2021         encodeStatusReport->bitstreamSize = 0;
2022         return MOS_STATUS_INVALID_FILE_SIZE;
2023     }
2024 
2025     if (totalCU != 0)
2026     {
2027         encodeStatusReport->QpY = encodeStatusReport->AverageQp =
2028             (uint8_t)((sumQp / (double)totalCU) / 4.0); // due to TU is 4x4 and there are 4 TUs in one CU
2029     }
2030     else
2031     {
2032         return MOS_STATUS_INVALID_PARAMETER;
2033     }
2034 
2035     if (m_enableTileStitchByHW)
2036     {
2037         if (tileStatusReport)
2038         {
2039             // clean-up the tile status report buffer
2040             MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * encodeStatusReport->NumberTilesInFrame);
2041             m_osInterface->pfnUnlockResource(m_osInterface, &tileSizeStatusReport->sResource);
2042         }
2043         return eStatus;
2044     }
2045 
2046     //Driver stitching is not allowed for secure encode case
2047     if (!m_osInterface->osCpInterface->IsCpEnabled())
2048     {
2049         uint8_t *tempBsBuffer = nullptr, *bufPtr = nullptr;
2050         tempBsBuffer = bufPtr = (uint8_t*)MOS_AllocAndZeroMemory(encodeStatusReport->bitstreamSize);
2051         CODECHAL_ENCODE_CHK_NULL_RETURN(tempBsBuffer);
2052 
2053         PCODEC_REF_LIST currRefList = encodeStatus->encodeStatusReport.pCurrRefList;
2054         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2055         lockFlags.ReadOnly = 1;
2056         uint8_t* bitstream = (uint8_t*)m_osInterface->pfnLockResource(
2057             m_osInterface,
2058             &currRefList->resBitstreamBuffer,
2059             &lockFlags);
2060 
2061         if (bitstream == nullptr)
2062         {
2063             MOS_SafeFreeMemory(tempBsBuffer);
2064             return MOS_STATUS_NULL_POINTER;
2065         }
2066 
2067         for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
2068         {
2069             uint32_t offset = tileParams[i].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
2070             uint32_t len = tileStatusReport[i].Length;
2071 
2072             MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len);
2073             bufPtr += len;
2074         }
2075 
2076         MOS_SecureMemcpy(bitstream, encodeStatusReport->bitstreamSize, tempBsBuffer, encodeStatusReport->bitstreamSize);
2077         MOS_ZeroMemory(&bitstream[encodeStatusReport->bitstreamSize], m_bitstreamUpperBound - encodeStatusReport->bitstreamSize);
2078 
2079         if (bitstream)
2080         {
2081             m_osInterface->pfnUnlockResource(m_osInterface, &currRefList->resBitstreamBuffer);
2082         }
2083 
2084         MOS_FreeMemory(tempBsBuffer);
2085     }
2086 
2087     if (tileStatusReport)
2088     {
2089         // clean-up the tile status report buffer
2090         MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * encodeStatusReport->NumberTilesInFrame);
2091 
2092         m_osInterface->pfnUnlockResource(m_osInterface, &tileSizeStatusReport->sResource);
2093     }
2094 
2095     return eStatus;
2096 }
2097 
ValidateRefFrameData(PCODEC_HEVC_ENCODE_SLICE_PARAMS slcParams)2098 MOS_STATUS CodechalVdencHevcStateG12::ValidateRefFrameData(PCODEC_HEVC_ENCODE_SLICE_PARAMS slcParams)
2099 {
2100     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2101     bool isRandomAccess = false;
2102 
2103     CODECHAL_ENCODE_CHK_NULL_RETURN(slcParams);
2104 
2105     if (slcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
2106     {
2107         if (slcParams->num_ref_idx_l0_active_minus1 != slcParams->num_ref_idx_l1_active_minus1)
2108         {
2109             isRandomAccess = true;
2110         }
2111 
2112         for (auto j = 0; j < CODEC_MAX_NUM_REF_FRAME_HEVC; j++)
2113         {
2114             if (slcParams->RefPicList[0][j].PicEntry != slcParams->RefPicList[1][j].PicEntry)
2115             {
2116                 isRandomAccess = true;
2117             }
2118         }
2119     }
2120 
2121     if (isRandomAccess)
2122     {
2123         if (m_hevcPicParams->bEnableRollingIntraRefresh)
2124         {
2125             CODECHAL_ENCODE_ASSERT(false);
2126             eStatus = MOS_STATUS_INVALID_PARAMETER;
2127         }
2128     }
2129 
2130     if (isRandomAccess && m_enableSCC)
2131     {
2132         CODECHAL_ENCODE_ASSERT(false);
2133         CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
2134     }
2135 
2136     uint8_t maxNumRef0 = isRandomAccess ? 2 : m_numMaxVdencL0Ref;
2137     uint8_t maxNumRef1 = isRandomAccess ? 1 : m_numMaxVdencL1Ref;
2138 
2139     if (slcParams->num_ref_idx_l0_active_minus1 > maxNumRef0 - 1)
2140     {
2141         CODECHAL_ENCODE_ASSERT(false);
2142         slcParams->num_ref_idx_l0_active_minus1 = maxNumRef0 - 1;
2143     }
2144 
2145     if (slcParams->num_ref_idx_l1_active_minus1 > maxNumRef1 - 1)
2146     {
2147         CODECHAL_ENCODE_ASSERT(false);
2148         slcParams->num_ref_idx_l1_active_minus1 = maxNumRef1 - 1;
2149     }
2150 
2151     return eStatus;
2152 }
2153 
UserFeatureKeyReport()2154 MOS_STATUS CodechalVdencHevcStateG12::UserFeatureKeyReport()
2155 {
2156     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2157 
2158     CODECHAL_ENCODE_FUNCTION_ENTER;
2159 
2160     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::UserFeatureKeyReport());
2161 
2162 #if (_DEBUG || _RELEASE_INTERNAL)
2163     CodecHalEncode_WriteKey64(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE, m_kmdVeOveride.Value, m_osInterface->pOsContext);
2164     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENCODE_USED_VDBOX_NUM_ID, m_numPipe, m_osInterface->pOsContext);
2165     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENABLE_ENCODE_VE_CTXSCHEDULING_ID, MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface), m_osInterface->pOsContext);
2166 #endif
2167     return eStatus;
2168 }
2169 
EncodeKernelFunctions()2170 MOS_STATUS CodechalVdencHevcStateG12::EncodeKernelFunctions()
2171 {
2172     CODECHAL_ENCODE_FUNCTION_ENTER;
2173 
2174     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2175 
2176 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
2177     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
2178         m_rawSurfaceToEnc,
2179         CodechalDbgAttr::attrEncodeRawInputSurface,
2180         "SrcSurf")));
2181 
2182     CODECHAL_DEBUG_TOOL(
2183         PCODEC_PICTURE l0RefFrameList = m_hevcSliceParams->RefPicList[LIST_0];
2184         for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++)
2185         {
2186             CODEC_PICTURE refPic = l0RefFrameList[refIdx];
2187 
2188             if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
2189             {
2190                 // L0 references
2191                 uint8_t refPicIdx            = m_picIdx[refPic.FrameIdx].ucPicIdx;
2192                 m_debugInterface->m_refIndex = (uint16_t)m_refList[refPicIdx]->iFieldOrderCnt[0];
2193                 std::string refSurfName      = "RefSurf_List0_POC" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex));
2194                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
2195                     &m_refList[refPicIdx]->sRefBuffer,
2196                     CodechalDbgAttr::attrReferenceSurfaces,
2197                     refSurfName.data()))
2198             }
2199         }
2200 
2201         if (!m_lowDelay)
2202         {
2203             PCODEC_PICTURE l1RefFrameList = m_hevcSliceParams->RefPicList[LIST_1];
2204             for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l1_active_minus1; refIdx++)
2205             {
2206                 CODEC_PICTURE refPic = l1RefFrameList[refIdx];
2207 
2208                 if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
2209                 {
2210                     // L1 references
2211                     uint8_t refPicIdx            = m_picIdx[refPic.FrameIdx].ucPicIdx;
2212                     m_debugInterface->m_refIndex = (uint16_t)m_refList[refPicIdx]->iFieldOrderCnt[0];
2213                     std::string refSurfName      = "RefSurf_List1_POC" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex));
2214                     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
2215                         &m_refList[refPicIdx]->sRefBuffer,
2216                         CodechalDbgAttr::attrReferenceSurfaces,
2217                         refSurfName.data()))
2218                 }
2219             }
2220         });
2221 
2222     auto singleTaskPhaseSupported = m_singleTaskPhaseSupported;    // local variable to save current setting before overwriting
2223 
2224     if (m_16xMeSupported)
2225     {
2226         // Enable SingleTaskPhase for now with SHME
2227         m_singleTaskPhaseSupported = true;
2228         m_maxBtCount = GetMaxBtCount();
2229 
2230         CodechalEncodeCscDs::KernelParams cscScalingKernelParams;
2231         MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams));
2232 
2233         cscScalingKernelParams.bLastTaskInPhaseCSC  =
2234         cscScalingKernelParams.bLastTaskInPhase4xDS = false;
2235         cscScalingKernelParams.bLastTaskInPhase16xDS    = !(m_32xMeSupported || m_hmeEnabled);
2236         cscScalingKernelParams.bLastTaskInPhase32xDS    = !m_hmeEnabled;
2237 
2238         m_firstTaskInPhase = true;
2239         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->SetHevcCscFlagAndRawColor());
2240         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->KernelFunctions(&cscScalingKernelParams));
2241 
2242         CODECHAL_DEBUG_TOOL(
2243             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
2244                 m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER),
2245                 CodechalDbgAttr::attrReconstructedSurface,
2246                 "4x_Scaled_Surf"));
2247 
2248             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
2249                 m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER),
2250                 CodechalDbgAttr::attrReconstructedSurface,
2251                 "16x_Scaled_Surf"));
2252 
2253             if (m_b32XMeEnabled)
2254             {
2255                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
2256                     m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER),
2257                     CodechalDbgAttr::attrReconstructedSurface,
2258                     "32x_Scaled_Surf"));
2259             }
2260         )
2261     }
2262 
2263     if (m_b16XMeEnabled)
2264     {
2265         if (m_b32XMeEnabled)
2266         {
2267             //HME_P kernel for 32xME
2268             m_lastTaskInPhase = false;
2269             CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_32x));
2270         }
2271 
2272         //HME_P kernel for 16xME
2273         m_lastTaskInPhase = false;
2274         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_16x));
2275 
2276         //StreamIn kernel, 4xME
2277         m_lastTaskInPhase = true;
2278         CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_4x));
2279     }
2280 
2281     // retrieve SingleTaskPhase setting (SAO will need STP enabled setting)
2282     m_singleTaskPhaseSupported = singleTaskPhaseSupported;
2283 
2284     CODECHAL_DEBUG_TOOL(
2285         if (m_hmeEnabled) {
2286             CODECHAL_ME_OUTPUT_PARAMS meOutputParams;
2287 
2288             MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams));
2289             meOutputParams.psMeMvBuffer            = &m_s4XMeMvDataBuffer;
2290             meOutputParams.psMeBrcDistortionBuffer = nullptr;
2291             meOutputParams.psMeDistortionBuffer    = &m_s4XMeDistortionBuffer;
2292             meOutputParams.b16xMeInUse = false;
2293             meOutputParams.b32xMeInUse = false;
2294 
2295             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
2296                 &meOutputParams.psMeMvBuffer->OsResource,
2297                 CodechalDbgAttr::attrOutput,
2298                 "MvData",
2299                 meOutputParams.psMeMvBuffer->dwHeight *meOutputParams.psMeMvBuffer->dwPitch,
2300                 CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 32), 64) * (m_downscaledFrameFieldHeightInMb4x * 4) : 0,
2301                 CODECHAL_MEDIA_STATE_4X_ME));
2302 
2303             //CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
2304             //    &meOutputParams.psMeBrcDistortionBuffer->OsResource,
2305             //    CodechalDbgAttr::attrOutput,
2306             //    "BrcDist",
2307             //    meOutputParams.psMeBrcDistortionBuffer->dwHeight *meOutputParams.psMeBrcDistortionBuffer->dwPitch,
2308             //    CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64) * MOS_ALIGN_CEIL((m_downscaledFrameFieldHeightInMb4x * 4), 8) : 0,
2309             //    CODECHAL_MEDIA_STATE_4X_ME));
2310             if (meOutputParams.psMeDistortionBuffer)
2311             {
2312                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
2313                     &meOutputParams.psMeDistortionBuffer->OsResource,
2314                     CodechalDbgAttr::attrOutput,
2315                     "MeDist",
2316                     meOutputParams.psMeDistortionBuffer->dwHeight *meOutputParams.psMeDistortionBuffer->dwPitch,
2317                     CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64) * MOS_ALIGN_CEIL((m_downscaledFrameFieldHeightInMb4x * 4 * 10), 8) : 0,
2318                     CODECHAL_MEDIA_STATE_4X_ME));
2319             }
2320             if (m_b16XMeEnabled)
2321             {
2322                 MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams));
2323                 meOutputParams.psMeMvBuffer            = &m_s16XMeMvDataBuffer;
2324                 meOutputParams.psMeBrcDistortionBuffer = nullptr;
2325                 meOutputParams.psMeDistortionBuffer = nullptr;
2326                 meOutputParams.b16xMeInUse = true;
2327                 meOutputParams.b32xMeInUse = false;
2328 
2329                 CODECHAL_ENCODE_CHK_STATUS_RETURN(
2330                     m_debugInterface->DumpBuffer(
2331                         &meOutputParams.psMeMvBuffer->OsResource,
2332                         CodechalDbgAttr::attrOutput,
2333                         "MvData",
2334                         meOutputParams.psMeMvBuffer->dwHeight *meOutputParams.psMeMvBuffer->dwPitch,
2335                         CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb16x * 32), 64) * (m_downscaledFrameFieldHeightInMb16x * 4) : 0,
2336                         CODECHAL_MEDIA_STATE_16X_ME));
2337             }
2338             if (m_b32XMeEnabled)
2339             {
2340                 MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams));
2341                 meOutputParams.psMeMvBuffer = &m_s32XMeMvDataBuffer;
2342                 meOutputParams.psMeBrcDistortionBuffer = nullptr;
2343                 meOutputParams.psMeDistortionBuffer = nullptr;
2344                 meOutputParams.b16xMeInUse = false;
2345                 meOutputParams.b32xMeInUse = true;
2346 
2347                 CODECHAL_ENCODE_CHK_STATUS_RETURN(
2348                     m_debugInterface->DumpBuffer(
2349                         &meOutputParams.psMeMvBuffer->OsResource,
2350                         CodechalDbgAttr::attrOutput,
2351                         "MvData",
2352                         meOutputParams.psMeMvBuffer->dwHeight *meOutputParams.psMeMvBuffer->dwPitch,
2353                         CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb32x * 32), 64) * (m_downscaledFrameFieldHeightInMb32x * 4) : 0,
2354                         CODECHAL_MEDIA_STATE_32X_ME));
2355             }
2356 
2357             MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams));
2358             meOutputParams.pResVdenStreamInBuffer = &(m_resVdencStreamInBuffer[m_currRecycledBufIdx]);
2359             meOutputParams.psMeMvBuffer = &m_s4XMeMvDataBuffer;
2360             meOutputParams.psMeDistortionBuffer = &m_s4XMeDistortionBuffer;
2361             meOutputParams.b16xMeInUse = false;
2362             meOutputParams.bVdencStreamInInUse = true;
2363             if (m_vdencStreamInEnabled) {
2364                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
2365                     &m_resVdencStreamInBuffer[m_currRecycledBufIdx],
2366                     CodechalDbgAttr::attrOutput,
2367                     "StreaminData",
2368                     m_picWidthInMb * m_picHeightInMb * CODECHAL_CACHELINE_SIZE,
2369                     0,
2370                     CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN));
2371             }
2372         })
2373 #endif
2374 
2375     return eStatus;
2376 }
2377 
ReadSliceSize(PMOS_COMMAND_BUFFER cmdBuffer)2378 MOS_STATUS CodechalVdencHevcStateG12::ReadSliceSize(PMOS_COMMAND_BUFFER cmdBuffer)
2379 {
2380     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2381 
2382     CODECHAL_ENCODE_FUNCTION_ENTER;
2383 
2384     // Use FrameStats buffer if in single pipe mode.
2385     if (m_numPipe == 1)
2386     {
2387         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::ReadSliceSize(cmdBuffer));
2388         return eStatus;
2389     }
2390 
2391     // Report slice size to app only when dynamic scaling is enabled
2392     if (!m_hevcSeqParams->SliceSizeControl)
2393     {
2394         return eStatus;
2395     }
2396 
2397     // In multi-tile multi-pipe mode, use PAK integration kernel output
2398     // PAK integration kernel accumulates frame statistics across tiles, which should be used to setup slice size report
2399     MOS_LOCK_PARAMS lockFlags;
2400     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2401     lockFlags.WriteOnly = true;
2402 
2403     uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize + sizeof(uint32_t) * 2);  // encodeStatus is offset by 2 DWs in the resource
2404     uint32_t sizeOfSliceSizesBuffer = MOS_ALIGN_CEIL(m_numLcu * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
2405 
2406     if (IsFirstPipe())
2407     {
2408         if (IsFirstPass())
2409         {
2410             // Create/ Initialize slice report buffer once per frame, to be used across passes
2411             if (Mos_ResourceIsNull(&m_resSliceReport[m_encodeStatusBuf.wCurrIndex]))
2412             {
2413                 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
2414                 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
2415                 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
2416                 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
2417                 allocParamsForBufferLinear.Format = Format_Buffer;
2418                 allocParamsForBufferLinear.dwBytes = sizeOfSliceSizesBuffer;
2419 
2420                 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
2421                     m_osInterface,
2422                     &allocParamsForBufferLinear,
2423                     &m_resSliceReport[m_encodeStatusBuf.wCurrIndex]),
2424                     "Failed to create HEVC VDEnc Slice Report Buffer ");
2425             }
2426 
2427             // Clear slice size structure to be sent in EncodeStatusReport buffer
2428             uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex], &lockFlags);
2429             CODECHAL_ENCODE_CHK_NULL_RETURN(data);
2430             MOS_ZeroMemory(data, sizeOfSliceSizesBuffer);
2431             m_osInterface->pfnUnlockResource(m_osInterface, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex]);
2432 
2433             // Set slice size pointer in slice size structure
2434             MHW_MI_FLUSH_DW_PARAMS  miFlushDwParams;
2435             MOS_ZeroMemory(&miFlushDwParams, sizeof(miFlushDwParams));
2436             miFlushDwParams.pOsResource      = &m_encodeStatusBuf.resStatusBuffer;
2437             miFlushDwParams.dwResourceOffset = CODECHAL_OFFSETOF(EncodeStatusSliceReport, pSliceSize) + baseOffset + m_encodeStatusBuf.dwSliceReportOffset;
2438             miFlushDwParams.dwDataDW1        = (uint32_t)((uint64_t)&m_resSliceReport[m_encodeStatusBuf.wCurrIndex] & 0xFFFFFFFF);
2439             miFlushDwParams.dwDataDW2        = (uint32_t)(((uint64_t)&m_resSliceReport[m_encodeStatusBuf.wCurrIndex] & 0xFFFFFFFF00000000) >> 32);
2440             miFlushDwParams.bQWordEnable     = 1;
2441             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
2442                 cmdBuffer,
2443                 &miFlushDwParams));
2444         }
2445 
2446         // Copy Slize size data buffer from PAK to be sent back to App
2447         CODECHAL_ENCODE_CHK_STATUS_RETURN(CopyDataBlock(cmdBuffer,
2448             &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
2449             m_hevcTileStatsOffset.uiHevcSliceStreamout,
2450             &m_resSliceReport[m_encodeStatusBuf.wCurrIndex],
2451             0,
2452             sizeOfSliceSizesBuffer));
2453 
2454         MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
2455         MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS));
2456         miCpyMemMemParams.presSrc = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Slice size overflow is in m_resFrameStatStreamOutBuffer DW0[16]
2457         miCpyMemMemParams.dwSrcOffset = m_hevcFrameStatsOffset.uiHevcPakStatistics;
2458         miCpyMemMemParams.presDst = &m_encodeStatusBuf.resStatusBuffer;
2459         miCpyMemMemParams.dwDstOffset = baseOffset + m_encodeStatusBuf.dwSliceReportOffset;     // Slice size overflow is at DW0 EncodeStatusSliceReport
2460         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
2461     }
2462 
2463     return eStatus;
2464 }
2465 
ExecutePictureLevel()2466 MOS_STATUS CodechalVdencHevcStateG12::ExecutePictureLevel()
2467 {
2468     CODECHAL_ENCODE_FUNCTION_ENTER;
2469 
2470     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2471 
2472     int32_t currentPass = GetCurrentPass();
2473     int32_t currentPipe = GetCurrentPipe();
2474 
2475     if (IsFirstPipe() && IsFirstPass())
2476     {
2477         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData(m_tileParams[m_virtualEngineBbIndex]));
2478         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileStatistics());
2479         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRoundingValues());
2480     }
2481 
2482     if (m_hevcPicParams->bUsedAsRef || m_brcEnabled)
2483     {
2484         if (m_currRefSync == nullptr)
2485         {
2486             m_currRefSync = &m_refSync[m_currMbCodeIdx];
2487         }
2488     }
2489     else
2490     {
2491         m_currRefSync = nullptr;
2492     }
2493 
2494     if (m_lookaheadPass && (m_hevcSeqParams->MaxAdaptiveGopPicSize > 0))
2495     {
2496         bool forceIntra =  m_intraInterval >= m_hevcSeqParams->MaxAdaptiveGopPicSize;
2497         if ((!IsFirstPass() || forceIntra) && (m_hevcPicParams->CodingType != I_TYPE))
2498         {
2499             m_vdencStreamInEnabled = true;
2500         }
2501 
2502         if (!m_lookaheadAdaptiveI)
2503         {
2504             m_intraInterval = forceIntra ? 1 : m_intraInterval + 1;
2505         }
2506     }
2507 
2508     m_firstTaskInPhase = m_singleTaskPhaseSupported ? IsFirstPass() : false;
2509     m_lastTaskInPhase = m_singleTaskPhaseSupported ? IsLastPass() : true;
2510 
2511     // Per frame maximum HuC kernels is 5 - BRC Init, BRC Update, PAK Int, BRC Update, PAK Int
2512     m_hucCommandsSize = m_hwInterface->m_hucCommandBufferSize * 5;
2513 
2514     PerfTagSetting perfTag;
2515     CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE);
2516 
2517     if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex())                                                                         \
2518     {
2519         CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum");
2520         eStatus = MOS_STATUS_INVALID_PARAMETER;
2521         return eStatus;
2522     }
2523 
2524     CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifyCommandBufferSize());
2525 
2526     if (!m_singleTaskPhaseSupportedInPak)
2527     {
2528         // Command buffer or patch list size are too small and so we cannot submit multiple pass of PAKs together
2529         m_firstTaskInPhase = true;
2530         m_lastTaskInPhase = true;
2531     }
2532 
2533     if (m_lookaheadPass)
2534     {
2535         if (m_swLaMode != nullptr)
2536         {
2537             m_lastTaskInPhase = true;
2538         }
2539         else
2540         {
2541             m_lastTaskInPhase = !m_singleTaskPhaseSupported;
2542         }
2543     }
2544     else if (m_swBrcMode != nullptr)
2545     {
2546         m_lastTaskInPhase = !IsFirstPass();
2547     }
2548 
2549     // PAK pass type for each pass: VDEnc+PAK vs. PAK-only
2550     SetPakPassType();
2551 
2552     bool pakOnlyMultipassEnable = false;
2553 
2554     bool panicEnabled = (m_brcEnabled) && (m_panicEnable) && (IsLastPass()) && !m_pakOnlyPass;
2555 
2556     uint32_t rollingILimit = (m_hevcPicParams->bEnableRollingIntraRefresh == ROLLING_I_ROW) ? MOS_ROUNDUP_DIVIDE(m_frameHeight, 32) : (m_hevcPicParams->bEnableRollingIntraRefresh == ROLLING_I_COLUMN) ? MOS_ROUNDUP_DIVIDE(m_frameWidth, 32) : 0;
2557 
2558     m_refList[m_currReconstructedPic.FrameIdx]->rollingIntraRefreshedPosition =
2559         CodecHal_Clip3(0, rollingILimit, m_hevcPicParams->IntraInsertionLocation + m_hevcPicParams->IntraInsertionSize);
2560 
2561     // For ACQP / BRC, update pic params rolling intra reference location here before cmd buffer is prepared.
2562     PCODEC_PICTURE l0RefFrameList = m_hevcSliceParams->RefPicList[LIST_0];
2563     for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++)
2564     {
2565         CODEC_PICTURE refPic = l0RefFrameList[refIdx];
2566 
2567         if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
2568         {
2569             uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
2570             m_hevcPicParams->RollingIntraReferenceLocation[refIdx] = m_refList[refPicIdx]->rollingIntraRefreshedPosition;
2571         }
2572     }
2573 
2574     if (IsFirstPass())
2575     {
2576         MOS_COMMAND_BUFFER cmdBuffer;
2577         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2578         MHW_MI_MMIOREGISTERS mmioRegister;
2579         bool validMmio = m_hwInterface->GetMfxInterface()->ConvertToMiRegister(m_vdboxIndex, mmioRegister);
2580         if (validMmio)
2581         {
2582             HalOcaInterface::On1stLevelBBStart(
2583                 cmdBuffer,
2584                 *m_hwInterface->GetOsInterface()->pOsContext,
2585                 m_hwInterface->GetOsInterface()->CurrentGpuContextHandle,
2586                 *m_hwInterface->GetMiInterface(),
2587                 mmioRegister);
2588         }
2589         CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2590     }
2591 
2592     if (m_numPipe >= 2)
2593     {
2594         // Send Cmd Buffer Header for VE in last pipe only
2595         MOS_COMMAND_BUFFER cmdBuffer;
2596         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2597         bool requestFrameTracking = m_singleTaskPhaseSupported ? IsFirstPass() : IsLastPass();
2598         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
2599 
2600         MHW_MI_FORCE_WAKEUP_PARAMS forceWakeupParams;
2601         MOS_ZeroMemory(&forceWakeupParams, sizeof(MHW_MI_FORCE_WAKEUP_PARAMS));
2602         forceWakeupParams.bMFXPowerWellControl = true;
2603         forceWakeupParams.bMFXPowerWellControlMask = true;
2604         forceWakeupParams.bHEVCPowerWellControl = true;
2605         forceWakeupParams.bHEVCPowerWellControlMask = true;
2606         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiForceWakeupCmd(
2607             &cmdBuffer,
2608             &forceWakeupParams));
2609 
2610         CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2611     }
2612     else if (IsFirstPass())
2613     {
2614         // Send force wake command for VDBOX
2615         MOS_COMMAND_BUFFER cmdBuffer;
2616         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2617 
2618         MHW_MI_FORCE_WAKEUP_PARAMS forceWakeupParams;
2619         MOS_ZeroMemory(&forceWakeupParams, sizeof(MHW_MI_FORCE_WAKEUP_PARAMS));
2620         forceWakeupParams.bMFXPowerWellControl = true;
2621         forceWakeupParams.bMFXPowerWellControlMask = true;
2622         forceWakeupParams.bHEVCPowerWellControl = true;
2623         forceWakeupParams.bHEVCPowerWellControlMask = true;
2624         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiForceWakeupCmd(
2625             &cmdBuffer,
2626             &forceWakeupParams));
2627         CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2628     }
2629 
2630     if (m_numPipe >= 2 && IsFirstPass())
2631     {
2632         MOS_COMMAND_BUFFER cmdBuffer;
2633         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2634 
2635         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStopCmd(&cmdBuffer));
2636 
2637         //HW Semaphore cmd to make sure all pipes start encode at the same time
2638         for (uint32_t i = 0; i < m_numPipe; i++)
2639         {
2640             CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(
2641                 &m_resPipeStartSemaMem[i].sResource,
2642                 1,
2643                 MHW_MI_ATOMIC_INC,
2644                 &cmdBuffer));
2645         }
2646         auto pipeNum = GetCurrentPipe();
2647         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
2648             &m_resPipeStartSemaMem[pipeNum].sResource,
2649             &cmdBuffer,
2650             m_numPipe));
2651 
2652         //clean HW semaphore memory
2653         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSemaphoreMem(
2654             &m_resPipeStartSemaMem[pipeNum].sResource,
2655             &cmdBuffer,
2656             0x0));
2657 
2658         //Start Watchdog Timer
2659         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStartCmd(&cmdBuffer));
2660 
2661         CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2662     }
2663 
2664     if (m_vdencHucUsed && IsFirstPipe())
2665     {
2666         // STF: HuC+VDEnc+PAK single BB, non-STF: HuC Init/HuC Update/(VDEnc+PAK) in separate BBs
2667         uint16_t callType = CODECHAL_ENCODE_PERFTAG_CALL_BRC_INIT_RESET;
2668         if (m_singleTaskPhaseSupported)
2669         {
2670             perfTag.CallType = IsFirstPass() ? CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE : CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE_SECOND_PASS;
2671         }
2672         CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, callType);
2673 
2674         m_resVdencBrcUpdateDmemBufferPtr[0] = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakInfo);
2675 
2676         // Invoke BRC init/reset FW
2677         if (m_brcInit || m_brcReset)
2678         {
2679             CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcInitReset());
2680         }
2681 
2682         if (!m_singleTaskPhaseSupported)
2683         {
2684             CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE);
2685         }
2686 
2687         // Invoke BRC update FW
2688         // When tile replay is enabled, BRC update is also called at tile row level
2689         if (m_enableTileReplay)
2690         {
2691             m_FrameLevelBRCForTileRow = true;
2692             m_TileRowLevelBRC         = false;
2693         }
2694         else
2695         {
2696             m_FrameLevelBRCForTileRow = false;
2697             m_TileRowLevelBRC         = false;
2698         }
2699         CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcUpdate());
2700 
2701         m_brcInit = m_brcReset = false;
2702     }
2703 
2704     MOS_COMMAND_BUFFER cmdBuffer;
2705     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2706 
2707     if ((!m_singleTaskPhaseSupported || (m_firstTaskInPhase && !m_hevcVdencAcqpEnabled)) && (m_numPipe == 1))
2708     {
2709         // Send command buffer header at the beginning (OS dependent)
2710         // frame tracking tag is only added in the last command buffer header
2711         bool requestFrameTracking = m_singleTaskPhaseSupported ?
2712             m_firstTaskInPhase :
2713             ((m_lookaheadPass && (!m_swLaMode || (m_currPass < m_numPasses))) ? false : m_lastTaskInPhase);
2714         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
2715     }
2716 
2717     // clean-up per VDBOX semaphore memory
2718     if (currentPipe < 0)
2719     {
2720         eStatus = MOS_STATUS_INVALID_PARAMETER;
2721         return eStatus;
2722     }
2723 
2724     // Ensure the previous BRC Update is done, before executing PAK
2725     if (m_vdencHucUsed && (m_numPipe >= 2))
2726     {
2727         int32_t currentPass = GetCurrentPass() + 1;
2728         if (IsFirstPipe())
2729         {
2730             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSemaphoreMem(
2731                 &m_resBrcPakSemaphoreMem.sResource,
2732                 &cmdBuffer,
2733                 currentPass));
2734         }
2735         else
2736         {
2737             CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
2738                 &m_resBrcPakSemaphoreMem.sResource,
2739                 &cmdBuffer,
2740                 currentPass));
2741 
2742             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSemaphoreMem(
2743                 &m_resBrcPakSemaphoreMem.sResource,
2744                 &cmdBuffer,
2745                 0x0));
2746          }
2747     }
2748 
2749     if ((!IsFirstPass()) && m_vdencHuCConditional2ndPass)
2750     {
2751         MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
2752 
2753         // Insert conditional batch buffer end
2754         MOS_ZeroMemory(
2755             &miConditionalBatchBufferEndParams,
2756             sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
2757 
2758         // VDENC uses HuC FW generated semaphore for conditional 2nd pass
2759         miConditionalBatchBufferEndParams.presSemaphoreBuffer =
2760             &m_resPakMmioBuffer;
2761 
2762         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
2763             &cmdBuffer,
2764             &miConditionalBatchBufferEndParams));
2765 
2766         auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
2767         CODECHAL_ENCODE_CHK_NULL_RETURN(mmioRegisters);
2768         uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2;  // encodeStatus is offset by 2 DWs in the resource
2769 
2770         // Write back the HCP image control register for RC6 may clean it out
2771         MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
2772         MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
2773         miLoadRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2774         miLoadRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOffset;
2775         miLoadRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2776         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(&cmdBuffer, &miLoadRegMemParams));
2777 
2778         MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
2779         MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2780         miStoreRegMemParams.presStoreBuffer = &m_vdencBrcBuffers.resBrcPakStatisticBuffer[m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite];
2781         miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
2782         miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2783         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2784 
2785         MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2786         miStoreRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2787         miStoreRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset;
2788         miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2789         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2790     }
2791 
2792     if (IsFirstPass() && m_osInterface->bTagResourceSync)
2793     {
2794         // This is a short term WA to solve the sync tag issue: the sync tag write for PAK is inserted at the end of 2nd pass PAK BB
2795         // which may be skipped in multi-pass PAK enabled case. The idea here is to insert the previous frame's tag at the beginning
2796         // of the BB and keep the current frame's tag at the end of the BB. There will be a delay for tag update but it should be fine
2797         // as long as Dec/VP/Enc won't depend on this PAK so soon.
2798 
2799         PMOS_RESOURCE globalGpuContextSyncTagBuffer = nullptr;
2800 
2801         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource(
2802             m_osInterface,
2803             globalGpuContextSyncTagBuffer));
2804         CODECHAL_ENCODE_CHK_NULL_RETURN(globalGpuContextSyncTagBuffer);
2805 
2806         MHW_MI_STORE_DATA_PARAMS params;
2807         params.pOsResource = globalGpuContextSyncTagBuffer;
2808         params.dwResourceOffset = m_osInterface->pfnGetGpuStatusTagOffset(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
2809         uint32_t value = m_osInterface->pfnGetGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
2810         params.dwValue = (value > 0) ? (value - 1) : 0;
2811         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &params));
2812     }
2813 
2814     if (IsFirstPipe())
2815     {
2816         if (IsFirstPass())
2817         {
2818             // Check other dependent VDBOXs if they are ready
2819             // The inter frame sync method was changed, remove this first, to be tuned
2820             // CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitForVDBOX(&cmdBuffer));
2821 
2822             // clean-up HW semaphore memory
2823             if (m_currRefSync && !Mos_ResourceIsNull(&m_currRefSync->resSemaphoreMem.sResource))
2824             {
2825                 // Ensure this semaphore is not used before. If yes, wait until it is done.
2826                 // The inter frame sync method was changed, remove this first, to be tuned
2827                 // CODECHAL_ENCODE_CHK_STATUS_RETURN(
2828                 //    SendHWWaitCommand(&pCurrRefSync->resSemaphoreMem.sResource, &cmdBuffer, 1));
2829 
2830                 MHW_MI_STORE_DATA_PARAMS storeDataParams;
2831                 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2832                 storeDataParams.pOsResource      = &m_currRefSync->resSemaphoreMem.sResource;
2833                 storeDataParams.dwResourceOffset = 0;
2834                 storeDataParams.dwValue = 0;
2835 
2836                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2837                     &cmdBuffer,
2838                     &storeDataParams));
2839             }
2840         }
2841 
2842         if (!m_lookaheadPass || m_swLaMode)
2843         {
2844             CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2845         }
2846     }
2847 
2848     PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 pipeModeSelectParams = dynamic_cast<PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12>(m_vdencInterface->CreateMhwVdboxPipeModeSelectParams());
2849     ENCODE_CHK_NULL_RETURN(pipeModeSelectParams);
2850 
2851     auto release_func = [&]()
2852     {
2853         m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams);
2854         pipeModeSelectParams = nullptr;
2855     };
2856 
2857     SetHcpPipeModeSelectParams(*pipeModeSelectParams);
2858 
2859     // HCP_PIPE_SELECT can not be generated by FW in BRC mode for GEN11+
2860     {
2861         MHW_VDBOX_VDENC_CONTROL_STATE_PARAMS  vdencControlStateParams;
2862         MHW_MI_VD_CONTROL_STATE_PARAMS        vdControlStateParams;
2863 
2864         //set up VDENC_CONTROL_STATE command
2865         {
2866             MOS_ZeroMemory(&vdencControlStateParams, sizeof(MHW_VDBOX_VDENC_CONTROL_STATE_PARAMS));
2867             vdencControlStateParams.bVdencInitialization  = true;
2868             CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(
2869                 static_cast<MhwVdboxVdencInterfaceG12X*>(m_vdencInterface)->AddVdencControlStateCmd(&cmdBuffer, &vdencControlStateParams), release_func);
2870         }
2871 
2872         //set up VD_CONTROL_STATE command
2873         {
2874             MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
2875             vdControlStateParams.initialization = true;
2876             CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(
2877                 static_cast<MhwMiInterfaceG12*>(m_miInterface)->AddMiVdControlStateCmd(&cmdBuffer, &vdControlStateParams), release_func);
2878         }
2879 
2880         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams), release_func);
2881     }
2882 
2883     MHW_VDBOX_SURFACE_PARAMS srcSurfaceParams;
2884     SetHcpSrcSurfaceParams(srcSurfaceParams);
2885     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &srcSurfaceParams), release_func);
2886 
2887     MHW_VDBOX_SURFACE_PARAMS reconSurfaceParams{};
2888     SetHcpReconSurfaceParams(reconSurfaceParams);
2889 
2890 #ifdef _MMC_SUPPORTED
2891     // Recon P010v MMC state set from RC for compression write
2892     MOS_MEMCOMP_STATE tempMmcState = reconSurfaceParams.mmcState;
2893     if (m_reconSurface.Format == Format_P010 && MmcEnable(tempMmcState))
2894     {
2895         reconSurfaceParams.mmcState = MOS_MEMCOMP_RC;
2896     }
2897 #endif
2898     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &reconSurfaceParams), release_func); //this is for Recon surf cmd set
2899 
2900     MHW_VDBOX_SURFACE_PARAMS refSurfaceParams{};
2901     SetHcpRefSurfaceParams(refSurfaceParams);  //it set MMC state and MMCFormat
2902 
2903     // Add the surface state for reference picture, GEN12 HW change
2904     *m_pipeBufAddrParams = {};
2905     SetHcpPipeBufAddrParams(*m_pipeBufAddrParams);
2906 
2907 #ifdef _MMC_SUPPORTED
2908     if (m_enableSCC && m_hevcPicParams->pps_curr_pic_ref_enabled_flag)
2909     {
2910         refSurfaceParams.mmcSkipMask   = (1 << m_slotForRecNotFiltered); //add this for ref
2911     }
2912 #endif
2913 
2914     if (m_mmcState->IsMmcEnabled())
2915     {
2916 
2917         refSurfaceParams.refsMmcEnable = 0;
2918         refSurfaceParams.refsMmcType   = 0;
2919         refSurfaceParams.dwCompressionFormat = 0;
2920 
2921         //add for B frame support
2922         if (m_pictureCodingType != I_TYPE)
2923         {
2924             for (uint8_t i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
2925             {
2926                 if (i < CODEC_MAX_NUM_REF_FRAME_HEVC &&
2927                     m_picIdx[i].bValid && m_currUsedRefPic[i])
2928                 {
2929                     uint8_t idx          = m_picIdx[i].ucPicIdx;
2930                     uint8_t frameStoreId = m_refIdxMapping[i];
2931 
2932                     MOS_MEMCOMP_STATE mmcState  = MOS_MEMCOMP_DISABLED;
2933                     ENCODE_CHK_STATUS_RETURN(m_mmcState->GetSurfaceMmcState(const_cast<PMOS_SURFACE>(&m_refList[idx]->sRefReconBuffer), &mmcState));
2934                     refSurfaceParams.refsMmcEnable |= (mmcState == MOS_MEMCOMP_RC || mmcState == MOS_MEMCOMP_MC) ? (1 << frameStoreId) : 0;
2935                     refSurfaceParams.refsMmcType |= (mmcState == MOS_MEMCOMP_RC) ? (1 << frameStoreId) : 0;
2936                     if (mmcState == MOS_MEMCOMP_RC || mmcState == MOS_MEMCOMP_MC)
2937                     {
2938                         ENCODE_CHK_STATUS_RETURN(m_mmcState->GetSurfaceMmcFormat(const_cast<PMOS_SURFACE>(&m_refList[idx]->sRefReconBuffer), &refSurfaceParams.dwCompressionFormat));
2939                     }
2940                 }
2941             }
2942         }
2943     }
2944 
2945     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &refSurfaceParams), release_func);
2946 
2947     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(AddHcpPipeBufAddrCmd(&cmdBuffer), release_func);
2948 
2949     MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
2950     SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
2951     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams), release_func);
2952 
2953     MHW_VDBOX_QM_PARAMS fqmParams, qmParams;
2954     SetHcpQmStateParams(fqmParams, qmParams);
2955     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpFqmStateCmd(&cmdBuffer, &fqmParams), release_func);
2956     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpQmStateCmd(&cmdBuffer, &qmParams), release_func);
2957 
2958     SetVdencPipeModeSelectParams(*pipeModeSelectParams);
2959     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams), release_func);
2960 
2961     MHW_VDBOX_SURFACE_PARAMS dsSurfaceParams[2] = {};
2962     SetVdencSurfaceStateParams(srcSurfaceParams, refSurfaceParams, dsSurfaceParams[0], dsSurfaceParams[1]);
2963     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencSrcSurfaceStateCmd(&cmdBuffer, &srcSurfaceParams), release_func);
2964     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &refSurfaceParams), release_func);  //  this is for Ref, no mmc related setting
2965     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencDsRefSurfaceStateCmd(&cmdBuffer, &dsSurfaceParams[0], 2), release_func);
2966 
2967     SetVdencPipeBufAddrParams(*m_pipeBufAddrParams);
2968     m_pipeBufAddrParams->pRawSurfParam = &srcSurfaceParams;
2969     m_pipeBufAddrParams->pDecodedReconParam = &reconSurfaceParams;
2970 #ifdef _MMC_SUPPORTED
2971     m_mmcState->SetPipeBufAddr(m_pipeBufAddrParams);
2972 #endif
2973     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencPipeBufAddrCmd(&cmdBuffer, m_pipeBufAddrParams), release_func);
2974 
2975     MHW_VDBOX_HEVC_PIC_STATE_G12 picStateParams;
2976     SetHcpPicStateParams(picStateParams);
2977 
2978     if (m_vdencHucUsed && (!m_hevcPicParams->tiles_enabled_flag))
2979     {
2980         // 2nd level batch buffer
2981         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
2982         HalOcaInterface::OnSubLevelBBStart(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource, 0, true, 0);
2983         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]), release_func);
2984 
2985         // save offset for next 2nd level batch buffer usage
2986         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset += m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
2987     }
2988     // When tile is enabled, below commands are needed for each tile instead of each picture
2989     else if (!m_hevcPicParams->tiles_enabled_flag)
2990     {
2991         SetAddCommands(CODECHAL_CMD1, &cmdBuffer, true, m_roundInterValue, m_roundIntraValue, m_lowDelay);
2992 
2993         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpPicStateCmd(&cmdBuffer, &picStateParams), release_func);
2994 
2995         SetAddCommands(CODECHAL_CMD2, &cmdBuffer, true, m_roundInterValue, m_roundIntraValue, m_lowDelay, m_refIdxMapping, m_slotForRecNotFiltered);
2996     }
2997 
2998     // Send HEVC_VP9_RDOQ_STATE command
2999     if (m_hevcRdoqEnabled && !m_hevcPicParams->tiles_enabled_flag)
3000     {
3001         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&cmdBuffer, &picStateParams), release_func);
3002     }
3003 
3004     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(ReturnCommandBuffer(&cmdBuffer), release_func);
3005 
3006     m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams);
3007 
3008     return eStatus;
3009 }
3010 
ExecuteSliceLevel()3011 MOS_STATUS CodechalVdencHevcStateG12::ExecuteSliceLevel()
3012 {
3013     CODECHAL_ENCODE_FUNCTION_ENTER;
3014 
3015     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3016 
3017     if (!m_hevcPicParams->tiles_enabled_flag)
3018     {
3019         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::ExecuteSliceLevel());
3020 
3021         if (m_lookaheadPass)
3022         {
3023             CODECHAL_ENCODE_CHK_STATUS_RETURN(AnalyzeLookaheadStats());
3024 
3025             CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
3026                 &m_vdencLaStatsBuffer,
3027                 CodechalDbgAttr::attrVdencOutput,
3028                 "_LookaheadStats",
3029                 m_brcLooaheadStatsBufferSize,
3030                 0,
3031                 CODECHAL_NUM_MEDIA_STATES)));
3032         }
3033     }
3034     else
3035     {
3036         if (m_vdencHucUsed && m_enableTileReplay)
3037         {
3038             CODECHAL_ENCODE_CHK_STATUS_RETURN(EncWithTileRowLevelBRC());
3039         }
3040         else
3041         {
3042             CODECHAL_ENCODE_CHK_STATUS_RETURN(EncTileLevel());
3043         }
3044     }
3045 
3046     return eStatus;
3047 }
3048 
EncTileLevel()3049 MOS_STATUS CodechalVdencHevcStateG12::EncTileLevel()
3050 {
3051     CODECHAL_ENCODE_FUNCTION_ENTER;
3052 
3053     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3054 
3055     int32_t currentPipe = GetCurrentPipe();
3056     int32_t currentPass = GetCurrentPass();
3057 
3058     if (currentPipe < 0 || currentPass < 0)
3059     {
3060         CODECHAL_ENCODE_ASSERTMESSAGE("Invalid pipe number or pass number");
3061         return MOS_STATUS_INVALID_PARAMETER;
3062     }
3063 
3064     // Currently this implementation is only for CQP, single pass
3065     // Allocate more tile batch when try multiple passes
3066     if (IsFirstPass() && IsFirstPipe() && (!m_osInterface->bUsesPatchList))
3067     {
3068         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileLevelBatch());
3069     }
3070 
3071     PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 pipeModeSelectParams = dynamic_cast<PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12>(m_vdencInterface->CreateMhwVdboxPipeModeSelectParams());
3072     ENCODE_CHK_NULL_RETURN(pipeModeSelectParams);
3073 
3074     auto release_func = [&]()
3075     {
3076         m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams);
3077         pipeModeSelectParams = nullptr;
3078     };
3079 
3080     SetHcpPipeModeSelectParams(*pipeModeSelectParams);
3081     SetVdencPipeModeSelectParams(*pipeModeSelectParams);
3082 
3083     MHW_VDBOX_HEVC_SLICE_STATE_G12 sliceState;
3084     SetHcpSliceStateCommonParams(sliceState);
3085 
3086     MOS_COMMAND_BUFFER cmdBuffer;
3087     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(GetCommandBuffer(&cmdBuffer), release_func);
3088 
3089     MHW_MI_VD_CONTROL_STATE_PARAMS     vdControlStateParams;
3090     uint32_t                           numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
3091     uint32_t                           numTileRows    = m_hevcPicParams->num_tile_rows_minus1 + 1;
3092     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
3093 
3094     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
3095 
3096     // Construct The third level batch buffer
3097     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(ConstructTLB(&m_thirdLevelBatchBuffer), release_func);
3098 
3099     for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
3100     {
3101         for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
3102         {
3103             PCODEC_ENCODER_SLCDATA  slcData = m_slcData;
3104             uint32_t                slcCount, idx, sliceNumInTile = 0;
3105 
3106             idx = tileRow * numTileColumns + tileCol;
3107 
3108             if ((m_numPipe > 1) && (tileCol != currentPipe))
3109             {
3110                 continue;
3111             }
3112 
3113             MOS_COMMAND_BUFFER  tileBatchBuf;
3114             PMOS_COMMAND_BUFFER tempCmdBuf = &cmdBuffer;
3115             uint8_t             *data      = nullptr;
3116 
3117             // Move tile level commands to first level command buffer when use patch list.
3118             if (!m_osInterface->bUsesPatchList)
3119             {
3120                 MOS_LOCK_PARAMS lockFlags;
3121                 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3122                 lockFlags.WriteOnly = true;
3123 
3124                 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(m_tileLevelBatchBuffer[currentPass][idx].OsResource), &lockFlags);
3125                 CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(data, release_func);
3126 
3127                 MOS_ZeroMemory(&tileBatchBuf, sizeof(tileBatchBuf));
3128                 tileBatchBuf.pCmdBase = tileBatchBuf.pCmdPtr = (uint32_t *)data;
3129                 tileBatchBuf.iRemaining = m_tileLevelBatchSize;
3130 
3131                 HalOcaInterface::OnSubLevelBBStart(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, &m_tileLevelBatchBuffer[m_tileRowPass][idx].OsResource, 0, true, 0);
3132                 // Add batch buffer start for tile
3133                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_tileLevelBatchBuffer[currentPass][idx]), release_func);
3134 
3135                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, &cmdBuffer), release_func);
3136 
3137                 if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHwCounterIncrement(m_osInterface) && m_enableTileReplay)
3138                 {
3139                     CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(m_hwInterface->GetCpInterface(), release_func);
3140 
3141                     // Lazy allocation
3142                     if (Mos_ResourceIsNull(&m_resHwCountTileReplay))
3143                     {
3144                         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3145                         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3146                         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3147                         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3148                         allocParamsForBufferLinear.Format = Format_Buffer;
3149 
3150                         uint32_t maxTileRow = MOS_ROUNDUP_DIVIDE(m_frameHeight, CODECHAL_HEVC_MIN_TILE_SIZE);
3151                         uint32_t maxTileColumn = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE);
3152 
3153                         allocParamsForBufferLinear.dwBytes = maxTileRow*maxTileColumn*(sizeof(HwCounter));
3154                         allocParamsForBufferLinear.pBufName = "HWCounter";
3155                         allocParamsForBufferLinear.bIsPersistent = true;
3156                         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_osInterface->pfnAllocateResource(
3157                             m_osInterface,
3158                             &allocParamsForBufferLinear,
3159                             &m_resHwCountTileReplay),
3160                             release_func);
3161                         allocParamsForBufferLinear.bIsPersistent = false;
3162                     }
3163 
3164                     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hwInterface->GetCpInterface()->ReadEncodeCounterFromHW(
3165                         m_osInterface,
3166                         &tileBatchBuf,
3167                         &m_resHwCountTileReplay,
3168                         (uint16_t)idx),
3169                         release_func);
3170                 }
3171 
3172                 tempCmdBuf = &tileBatchBuf;
3173             }
3174 
3175             // Construct the tile batch
3176             // To be moved to one sub function later
3177             // HCP Lock for multiple pipe mode
3178             if (m_numPipe > 1)
3179             {
3180                 MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
3181                 vdControlStateParams.scalableModePipeLock = true;
3182                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(static_cast<MhwMiInterfaceG12 *>(m_miInterface)->AddMiVdControlStateCmd(tempCmdBuf, &vdControlStateParams), release_func);
3183             }
3184             // VDENC_PIPE_MODE_SELECT
3185             CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(tempCmdBuf, pipeModeSelectParams), release_func);
3186             // HCP_PIPE_MODE_SELECT
3187             CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(tempCmdBuf, pipeModeSelectParams), release_func);
3188 
3189             // 3rd level batch buffer
3190             if (m_hevcVdencAcqpEnabled || m_brcEnabled)
3191             {
3192                 HalOcaInterface::OnSubLevelBBStart(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource, 0, true, 0);
3193                 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
3194                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiBatchBufferStartCmd(tempCmdBuf, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]), release_func);
3195                 if (m_hevcRdoqEnabled)
3196                 {
3197                     MHW_VDBOX_HEVC_PIC_STATE_G12 picStateParams;
3198                     SetHcpPicStateParams(picStateParams);
3199                     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(tempCmdBuf, &picStateParams), release_func);
3200                 }
3201             }
3202             else
3203             {
3204                 HalOcaInterface::OnSubLevelBBStart(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, &m_thirdLevelBatchBuffer.OsResource, 0, true, 0);
3205                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiBatchBufferStartCmd(tempCmdBuf, &m_thirdLevelBatchBuffer), release_func);
3206             }
3207 
3208             // HCP_TILE_CODING commmand
3209             // Set Tile replay related parameters
3210             tileParams[idx].IsFirstPass        = IsFirstPass();
3211             tileParams[idx].IsLastPass         = IsLastPass();
3212             tileParams[idx].bTileReplayEnable  = m_enableTileReplay;
3213             CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(static_cast<MhwVdboxHcpInterfaceG12 *>(m_hcpInterface)->AddHcpTileCodingCmd(tempCmdBuf, &tileParams[idx]), release_func);
3214 
3215             for (slcCount = 0; slcCount < m_numSlices; slcCount++)
3216             {
3217                 bool lastSliceInTile = false, sliceInTile = false;
3218 
3219                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(IsSliceInTile(slcCount,
3220                     &tileParams[idx],
3221                     &sliceInTile,
3222                     &lastSliceInTile),
3223                     release_func);
3224 
3225                 if (!sliceInTile)
3226                 {
3227                     continue;
3228                 }
3229 
3230                 if (m_hevcVdencAcqpEnabled || m_brcEnabled)
3231                 {
3232                     // save offset for next 2nd level batch buffer usage
3233                     // This is because we don't know how many times HCP_WEIGHTOFFSET_STATE & HCP_PAK_INSERT_OBJECT will be inserted for each slice
3234                     // dwVdencBatchBufferPerSliceConstSize: constant size for each slice
3235                     // m_vdencBatchBufferPerSliceVarSize:   variable size for each slice
3236 
3237                     // starting location for executing slice level cmds
3238                     // To do: Improvize to only add current slice wSlcCount
3239                     m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
3240 
3241                     for (uint32_t j = 0; j < slcCount; j++)
3242                     {
3243                         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset
3244                             += (m_hwInterface->m_vdencBatchBufferPerSliceConstSize + m_vdencBatchBufferPerSliceVarSize[j]);
3245                     }
3246 
3247                 }
3248 
3249                 SetHcpSliceStateParams(sliceState, slcData, (uint16_t)slcCount, tileParams, lastSliceInTile, idx);
3250 
3251                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(SendHwSliceEncodeCommand(tempCmdBuf, &sliceState), release_func);
3252 
3253                 // Send VD_PIPELINE_FLUSH command  for each slice
3254                 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
3255                 vdPipelineFlushParams.Flags.bWaitDoneMFX = 1;
3256                 vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1;
3257                 vdPipelineFlushParams.Flags.bFlushVDENC = 1;
3258                 vdPipelineFlushParams.Flags.bFlushHEVC  = 1;
3259                 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
3260                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(tempCmdBuf, &vdPipelineFlushParams), release_func);
3261 
3262                 sliceNumInTile++;
3263             } // end of slice
3264 
3265             if (0 == sliceNumInTile)
3266             {
3267                 // One tile must have at least one slice
3268                 CODECHAL_ENCODE_ASSERT(false);
3269                 eStatus = MOS_STATUS_INVALID_PARAMETER;
3270                 break;
3271             }
3272 
3273             if (sliceNumInTile > 1 && (numTileColumns > 1 || numTileRows > 1))
3274             {
3275                 CODECHAL_ENCODE_ASSERTMESSAGE("Multi-slices in a tile is not supported!");
3276                 release_func();
3277                 return MOS_STATUS_INVALID_PARAMETER;
3278             }
3279 
3280             //HCP unLock for multiple pipe mode
3281             if (m_numPipe > 1)
3282             {
3283                 MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
3284                 vdControlStateParams.scalableModePipeUnlock = true;
3285                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(static_cast<MhwMiInterfaceG12 *>(m_miInterface)->AddMiVdControlStateCmd(tempCmdBuf, &vdControlStateParams), release_func);
3286             }
3287 
3288             // Send VD_PIPELINE_FLUSH command
3289             MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
3290             vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
3291             vdPipelineFlushParams.Flags.bFlushHEVC = 1;
3292             vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
3293             CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(tempCmdBuf, &vdPipelineFlushParams), release_func);
3294 
3295             // Send MI_FLUSH command
3296             MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3297             MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3298             flushDwParams.bVideoPipelineCacheInvalidate = true;
3299             CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiFlushDwCmd(tempCmdBuf, &flushDwParams), release_func);
3300 
3301             // Update head pointer for capture mode
3302             if (m_CaptureModeEnable && IsLastPipe())
3303             {
3304                 MHW_MI_LOAD_REGISTER_IMM_PARAMS     registerImmParams;
3305                 MOS_ZeroMemory(&registerImmParams, sizeof(registerImmParams));
3306                 registerImmParams.dwData      = 1;
3307                 registerImmParams.dwRegister  = m_VdboxVDENCRegBase[currentPipe] + 0x90;
3308                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(tempCmdBuf, &registerImmParams), release_func);
3309             }
3310 
3311             if (!m_osInterface->bUsesPatchList)
3312             {
3313                 // Add batch buffer end at the end of each tile batch, 2nd level batch buffer
3314                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiBatchBufferEnd(tempCmdBuf, nullptr), release_func);
3315 
3316                 std::string pakPassName = "PAK_PASS[" + std::to_string(GetCurrentPass()) + "]_PIPE[" + std::to_string(GetCurrentPipe()) + "]_TILELEVEL";
3317                 CODECHAL_DEBUG_TOOL(
3318                     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_debugInterface->DumpCmdBuffer(
3319                         tempCmdBuf,
3320                         CODECHAL_NUM_MEDIA_STATES,
3321                         pakPassName.data()),
3322                         release_func);)
3323 
3324                 if (data)
3325                 {
3326                     m_osInterface->pfnUnlockResource(m_osInterface, &(m_tileLevelBatchBuffer[currentPass][idx].OsResource));
3327                 }
3328             }
3329 
3330         } // end of row tile
3331     } // end of column tile
3332 
3333     m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams);
3334 
3335     // Insert end of sequence/stream if set
3336     // To be moved to slice level?
3337     if ((m_lastPicInStream || m_lastPicInSeq) && IsLastPipe())
3338     {
3339         MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
3340         MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
3341         pakInsertObjectParams.bLastPicInSeq = m_lastPicInSeq;
3342         pakInsertObjectParams.bLastPicInStream = m_lastPicInStream;
3343         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&cmdBuffer, &pakInsertObjectParams));
3344     }
3345 
3346     // Send VD_CONTROL_STATE (Memory Implict Flush)
3347     MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
3348     vdControlStateParams.memoryImplicitFlush = true;
3349     CODECHAL_ENCODE_CHK_STATUS_RETURN(
3350         static_cast<MhwMiInterfaceG12*>(m_miInterface)->AddMiVdControlStateCmd(&cmdBuffer, &vdControlStateParams));
3351 
3352 
3353     // Send VD_PIPELINE_FLUSH command
3354     MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
3355     vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
3356     vdPipelineFlushParams.Flags.bFlushHEVC = 1;
3357     vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
3358     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
3359 
3360     // Send MI_FLUSH command
3361     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3362     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3363     flushDwParams.bVideoPipelineCacheInvalidate = true;
3364     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
3365 
3366     // Set the HW semaphore to indicate current pipe done
3367     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3368     flushDwParams.bVideoPipelineCacheInvalidate = true;
3369     if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[currentPipe].sResource))
3370     {
3371         flushDwParams.pOsResource = &m_resVdBoxSemaphoreMem[currentPipe].sResource;
3372         flushDwParams.dwDataDW1 = currentPass + 1;
3373     }
3374     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
3375 
3376     if (IsFirstPipe())
3377     {
3378         // first pipe needs to ensure all other pipes are ready
3379         for (uint32_t i = 0; i < m_numPipe; i++)
3380         {
3381             if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[i].sResource))
3382             {
3383                 CODECHAL_ENCODE_CHK_STATUS_RETURN(
3384                     SendHWWaitCommand(&m_resVdBoxSemaphoreMem[i].sResource,
3385                         &cmdBuffer,
3386                         currentPass + 1));
3387                 CODECHAL_ENCODE_CHK_STATUS_RETURN(
3388                     SetSemaphoreMem(&m_resVdBoxSemaphoreMem[i].sResource,
3389                         &cmdBuffer,
3390                         0x0));
3391             }
3392         }
3393 
3394         // Whenever ACQP/ BRC is enabled with tiling, PAK Integration kernel is needed.
3395         // ACQP/ BRC need PAK integration kernel to aggregate statistics
3396         if (m_vdencHucUsed)
3397         {
3398             CODECHAL_ENCODE_CHK_STATUS_RETURN(HucPakIntegrate(&cmdBuffer));
3399         }
3400 
3401         // Use HW stitch commands only in the scalable mode
3402         // For single pipe with tile replay, stitch also needed
3403         if (m_enableTileStitchByHW)
3404         {
3405             if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP && !m_hevcVdencAcqpEnabled)
3406             {
3407                 CODECHAL_ENCODE_CHK_STATUS_RETURN(HucPakIntegrateStitch(&cmdBuffer));
3408             }
3409             // 2nd level BB buffer for stitching cmd
3410             // current location to add cmds in 2nd level batch buffer
3411             m_HucStitchCmdBatchBuffer.iCurrent = 0;
3412             // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
3413             m_HucStitchCmdBatchBuffer.dwOffset = 0;
3414             HalOcaInterface::OnSubLevelBBStart(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, &m_HucStitchCmdBatchBuffer.OsResource, 0, true, 0);
3415             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_HucStitchCmdBatchBuffer));
3416             // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases
3417             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, m_osInterface->osCpInterface->IsCpEnabled() ? true : false));
3418         }
3419 
3420         CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(&cmdBuffer));
3421 
3422         CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSliceSize(&cmdBuffer));
3423 
3424         CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
3425 
3426         if (m_numPipe <= 1)  // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer
3427         {
3428             CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
3429 
3430             // BRC PAK statistics different for each pass
3431             if (m_brcEnabled)
3432             {
3433                 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStats(&cmdBuffer));
3434             }
3435         }
3436 
3437         MHW_MI_STORE_DATA_PARAMS    storeDataParams;
3438         // Signal HW semaphore for the reference frame dependency (i.e., current coding frame waits for the reference frame being ready)
3439         if (m_currRefSync && !Mos_ResourceIsNull(&m_currRefSync->resSemaphoreMem.sResource))
3440         {
3441             // the reference frame semaphore must be set in each pass because of the conditional BRC batch buffer. Some BRC passes could be skipped.
3442             MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3443             storeDataParams.pOsResource      = &m_currRefSync->resSemaphoreMem.sResource;
3444             storeDataParams.dwResourceOffset = 0;
3445             storeDataParams.dwValue = 1;
3446 
3447             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiStoreDataImmCmd(
3448                 &cmdBuffer,
3449                 &storeDataParams));
3450         }
3451     }
3452 
3453     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
3454     {
3455         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
3456     }
3457 
3458     std::string pakPassName = "PAK_PASS[" + std::to_string(GetCurrentPass()) + "]_PIPE[" + std::to_string(GetCurrentPipe()) + "]";
3459     CODECHAL_DEBUG_TOOL(
3460         CODECHAL_ENCODE_CHK_STATUS_RETURN( m_debugInterface->DumpCmdBuffer(
3461             &cmdBuffer,
3462             CODECHAL_NUM_MEDIA_STATES,
3463             pakPassName.data()));)
3464 
3465     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
3466 
3467     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
3468     {
3469         bool nullRendering = m_videoContextUsesNullHw;
3470 
3471         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, nullRendering));
3472 
3473         CODECHAL_DEBUG_TOOL(
3474             CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpHucDebugOutputBuffers());
3475             if (m_mmcState)
3476             {
3477                 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
3478             }
3479         )
3480 
3481         if (IsFirstPipe() &&
3482             IsLastPass() &&
3483             m_signalEnc &&
3484             m_currRefSync &&
3485             !Mos_ResourceIsNull(&m_resSyncObjectVideoContextInUse))
3486         {
3487             // signal semaphore
3488             MOS_SYNC_PARAMS syncParams;
3489             syncParams                  = g_cInitSyncParams;
3490             syncParams.GpuContext       = m_videoContext;
3491             syncParams.presSyncResource = &m_currRefSync->resSyncObject;
3492 
3493             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
3494             m_currRefSync->uiSemaphoreObjCount++;
3495             m_currRefSync->bInUsed = true;
3496             }
3497     }
3498 
3499     // Reset parameters for next PAK execution
3500     if (IsLastPipe() &&
3501         IsLastPass())
3502     {
3503         if (!m_singleTaskPhaseSupported)
3504         {
3505             m_osInterface->pfnResetPerfBufferID(m_osInterface);
3506         }
3507 
3508         m_currPakSliceIdx = (m_currPakSliceIdx + 1) % CODECHAL_HEVC_NUM_PAK_SLICE_BATCH_BUFFERS;
3509 
3510         m_newPpsHeader = 0;
3511         m_newSeqHeader = 0;
3512         m_frameNum++;
3513     }
3514 
3515     return eStatus;
3516 }
3517 
EncWithTileRowLevelBRC()3518 MOS_STATUS CodechalVdencHevcStateG12::EncWithTileRowLevelBRC()
3519 {
3520     CODECHAL_ENCODE_FUNCTION_ENTER;
3521 
3522     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3523 
3524     int32_t currentPipe = GetCurrentPipe();
3525     int32_t currentPass = GetCurrentPass();
3526 
3527     if (currentPipe < 0 || currentPass < 0)
3528     {
3529         CODECHAL_ENCODE_ASSERTMESSAGE("Invalid pipe number or pass number");
3530         return MOS_STATUS_INVALID_PARAMETER;
3531     }
3532 
3533     // Revisit the buffer reuse for multiple frames later
3534     if (IsFirstPass() && IsFirstPipe())
3535     {
3536         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileLevelBatch());
3537         CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileRowLevelBRCBatch());
3538     }
3539 
3540     PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 pipeModeSelectParams = dynamic_cast<PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12>(m_vdencInterface->CreateMhwVdboxPipeModeSelectParams());
3541     ENCODE_CHK_NULL_RETURN(pipeModeSelectParams);
3542 
3543     auto release_func = [&]()
3544     {
3545         m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams);
3546         pipeModeSelectParams = nullptr;
3547     };
3548 
3549     SetHcpPipeModeSelectParams(*pipeModeSelectParams);
3550     SetVdencPipeModeSelectParams(*pipeModeSelectParams);
3551 
3552     MHW_VDBOX_HEVC_SLICE_STATE_G12 sliceState;
3553     SetHcpSliceStateCommonParams(sliceState);
3554 
3555     MOS_COMMAND_BUFFER cmdBuffer;
3556     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(GetCommandBuffer(&cmdBuffer), release_func);
3557 
3558     MHW_MI_VD_CONTROL_STATE_PARAMS     vdControlStateParams;
3559     uint32_t                           numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
3560     uint32_t                           numTileRows    = m_hevcPicParams->num_tile_rows_minus1 + 1;
3561 
3562     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
3563     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
3564 
3565     m_FrameLevelBRCForTileRow = false;
3566     m_TileRowLevelBRC = true;
3567 
3568     for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
3569     {
3570         for (m_tileRowPass = 0; m_tileRowPass < m_NumPassesForTileReplay; m_tileRowPass++)
3571         {
3572             for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
3573             {
3574                 PCODEC_ENCODER_SLCDATA slcData = m_slcData;
3575                 uint32_t               slcCount, idx, sliceNumInTile = 0;
3576 
3577                 idx = tileRow * numTileColumns + tileCol;
3578 
3579                 if ((m_numPipe > 1) && (tileCol != currentPipe))
3580                 {
3581                     continue;
3582                 }
3583 
3584                 MOS_LOCK_PARAMS lockFlags;
3585                 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3586                 lockFlags.WriteOnly = true;
3587 
3588                 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(m_tileLevelBatchBuffer[m_tileRowPass][idx].OsResource), &lockFlags);
3589                 CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(data, release_func);
3590 
3591                 MOS_COMMAND_BUFFER tileBatchBuf;
3592                 MOS_ZeroMemory(&tileBatchBuf, sizeof(tileBatchBuf));
3593                 tileBatchBuf.pCmdBase = tileBatchBuf.pCmdPtr = (uint32_t *)data;
3594                 tileBatchBuf.iRemaining = m_tileLevelBatchSize;
3595 
3596                 // Add batch buffer start for tile
3597                 HalOcaInterface::OnSubLevelBBStart(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, &m_tileLevelBatchBuffer[m_tileRowPass][idx].OsResource, 0, true, 0);
3598                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_tileLevelBatchBuffer[m_tileRowPass][idx]), release_func);
3599 
3600                 if (m_numPipe > 1)
3601                 {
3602                     //wait for last tile row BRC update completion
3603                     if ((!IsFirstPipe()) && (!IsFirstPassForTileReplay()))
3604                     {
3605                         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(SendHWWaitCommand(&m_resTileRowBRCsyncSemaphore, &tileBatchBuf, 0xFF), release_func);
3606                         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(SetSemaphoreMem(&m_resTileRowBRCsyncSemaphore, &tileBatchBuf, 0x0), release_func);
3607                     }
3608                 }
3609 
3610                 // Add conditional batch buffer end before tile row level second pass
3611                 // To unify the single pipe and multiple pipe cases, add this for each tile
3612 
3613                 // To add the sync logic here to make sure the previous tile row BRC update is done
3614 
3615                 if (!IsFirstPassForTileReplay())
3616                 {
3617                     MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS  miEnhancedConditionalBatchBufferEndParams;
3618 
3619                     MOS_ZeroMemory(
3620                         &miEnhancedConditionalBatchBufferEndParams,
3621                         sizeof(MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
3622 
3623                     // VDENC uses HuC FW generated semaphore for conditional 2nd pass
3624                     miEnhancedConditionalBatchBufferEndParams.presSemaphoreBuffer =
3625                         &m_resPakMmioBuffer;
3626 
3627                     miEnhancedConditionalBatchBufferEndParams.dwParamsType = MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS::ENHANCED_PARAMS;
3628                     miEnhancedConditionalBatchBufferEndParams.enableEndCurrentBatchBuffLevel = true;
3629 
3630                     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
3631                         &tileBatchBuf,
3632                         (PMHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS)(&miEnhancedConditionalBatchBufferEndParams)),
3633                         release_func);
3634                 }
3635 
3636                 // counter should be read after conditional batch buffer
3637                 // in case second pass is not executed then counter should not be read
3638                 if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHwCounterIncrement(m_osInterface) && m_enableTileReplay)
3639                 {
3640                     CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(m_hwInterface->GetCpInterface(), release_func);
3641 
3642                     // Lazy allocation
3643                     if (Mos_ResourceIsNull(&m_resHwCountTileReplay))
3644                     {
3645                         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3646                         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3647                         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3648                         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3649                         allocParamsForBufferLinear.Format = Format_Buffer;
3650 
3651                         uint32_t maxTileRow = MOS_ROUNDUP_DIVIDE(m_frameHeight, CODECHAL_HEVC_MIN_TILE_SIZE);
3652                         uint32_t maxTileColumn = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE);
3653 
3654                         allocParamsForBufferLinear.dwBytes = maxTileRow*maxTileColumn*(sizeof(HwCounter));
3655                         allocParamsForBufferLinear.pBufName = "HWCounter";
3656                         allocParamsForBufferLinear.bIsPersistent = true;
3657                         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_osInterface->pfnAllocateResource(
3658                             m_osInterface,
3659                             &allocParamsForBufferLinear,
3660                             &m_resHwCountTileReplay),
3661                             release_func);
3662                         allocParamsForBufferLinear.bIsPersistent = false;
3663                     }
3664 
3665                     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hwInterface->GetCpInterface()->ReadEncodeCounterFromHW(
3666                         m_osInterface,
3667                         &tileBatchBuf,
3668                         &m_resHwCountTileReplay,
3669                         (uint16_t)idx),
3670                         release_func);
3671                 }
3672 
3673                 // Construct the tile batch
3674                 // To be moved to one sub function later
3675                 // HCP Lock for multiple pipe mode
3676                 if (m_numPipe > 1)
3677                 {
3678                     MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
3679                     vdControlStateParams.scalableModePipeLock = true;
3680                     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(static_cast<MhwMiInterfaceG12 *>(m_miInterface)->AddMiVdControlStateCmd(&tileBatchBuf, &vdControlStateParams), release_func);
3681                 }
3682 
3683                 // VDENC_PIPE_MODE_SELECT
3684                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(&tileBatchBuf, pipeModeSelectParams), release_func);
3685                 // HCP_PIPE_MODE_SELECT
3686                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&tileBatchBuf, pipeModeSelectParams), release_func);
3687 
3688                 // 3nd level batch buffer
3689                 if (m_hevcVdencAcqpEnabled || m_brcEnabled)
3690                 {
3691                     m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
3692                     HalOcaInterface::OnSubLevelBBStart(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource, 0, true, 0);
3693                     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&tileBatchBuf, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]), release_func);
3694 
3695                     if (m_hevcRdoqEnabled)
3696                     {
3697                         MHW_VDBOX_HEVC_PIC_STATE_G12 picStateParams;
3698                         SetHcpPicStateParams(picStateParams);
3699                         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&tileBatchBuf, &picStateParams), release_func);
3700                     }
3701                 }
3702 
3703                 // HCP_TILE_CODING commmand
3704                 // Set Tile replay related parameters
3705                 tileParams[idx].IsFirstPass = IsFirstPassForTileReplay();
3706                 tileParams[idx].IsLastPass = IsLastPassForTileReplay();
3707                 tileParams[idx].bTileReplayEnable = m_enableTileReplay;
3708                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(static_cast<MhwVdboxHcpInterfaceG12 *>(m_hcpInterface)->AddHcpTileCodingCmd(&tileBatchBuf, &tileParams[idx]), release_func);
3709 
3710                 for (slcCount = 0; slcCount < m_numSlices; slcCount++)
3711                 {
3712                     bool lastSliceInTile = false, sliceInTile = false;
3713 
3714                     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(IsSliceInTile(slcCount,
3715                         &tileParams[idx],
3716                         &sliceInTile,
3717                         &lastSliceInTile),
3718                         release_func);
3719 
3720                     if (!sliceInTile)
3721                     {
3722                         continue;
3723                     }
3724 
3725                     if (m_hevcVdencAcqpEnabled || m_brcEnabled)
3726                     {
3727                         // save offset for next 2nd level batch buffer usage
3728                         // This is because we don't know how many times HCP_WEIGHTOFFSET_STATE & HCP_PAK_INSERT_OBJECT will be inserted for each slice
3729                         // dwVdencBatchBufferPerSliceConstSize: constant size for each slice
3730                         // m_vdencBatchBufferPerSliceVarSize:   variable size for each slice
3731 
3732                         // starting location for executing slice level cmds
3733                         // To do: Improvize to only add current slice wSlcCount
3734                         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
3735 
3736                         for (uint32_t j = 0; j < slcCount; j++)
3737                         {
3738                             m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset
3739                                 += (m_hwInterface->m_vdencBatchBufferPerSliceConstSize + m_vdencBatchBufferPerSliceVarSize[j]);
3740                         }
3741                     }
3742 
3743                     SetHcpSliceStateParams(sliceState, slcData, (uint16_t)slcCount, tileParams, lastSliceInTile, idx);
3744 
3745                     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(SendHwSliceEncodeCommand(&tileBatchBuf, &sliceState), release_func);
3746 
3747                     // Send VD_PIPELINE_FLUSH command  for each slice
3748                     MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
3749                     vdPipelineFlushParams.Flags.bWaitDoneMFX = 1;
3750                     vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1;
3751                     vdPipelineFlushParams.Flags.bFlushVDENC = 1;
3752                     vdPipelineFlushParams.Flags.bFlushHEVC  = 1;
3753                     vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
3754                     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&tileBatchBuf, &vdPipelineFlushParams), release_func);
3755 
3756                     sliceNumInTile++;
3757                 } // end of slice
3758 
3759                 if (0 == sliceNumInTile)
3760                 {
3761                     // One tile must have at least one slice
3762                     CODECHAL_ENCODE_ASSERT(false);
3763                     eStatus = MOS_STATUS_INVALID_PARAMETER;
3764                     break;
3765                 }
3766 
3767                 if (sliceNumInTile > 1 && (numTileColumns > 1 || numTileRows > 1))
3768                 {
3769                     CODECHAL_ENCODE_ASSERTMESSAGE("Multi-slices in a tile is not supported!");
3770                     release_func();
3771                     return MOS_STATUS_INVALID_PARAMETER;
3772                 }
3773 
3774                 //HCP unLock for multiple pipe mode
3775                 if (m_numPipe > 1)
3776                 {
3777                     MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
3778                     vdControlStateParams.scalableModePipeUnlock = true;
3779                     CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(static_cast<MhwMiInterfaceG12 *>(m_miInterface)->AddMiVdControlStateCmd(&tileBatchBuf, &vdControlStateParams), release_func);
3780                 }
3781 
3782                 // Send VD_PIPELINE_FLUSH command
3783                 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
3784                 vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
3785                 vdPipelineFlushParams.Flags.bFlushHEVC = 1;
3786                 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
3787                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&tileBatchBuf, &vdPipelineFlushParams), release_func);
3788 
3789                 // Send MI_FLUSH command
3790                 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3791                 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3792                 flushDwParams.bVideoPipelineCacheInvalidate = true;
3793                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiFlushDwCmd(&tileBatchBuf, &flushDwParams), release_func);
3794 
3795                 // Add batch buffer end at the end of each tile batch, 2nd level batch buffer
3796                 (&m_tileLevelBatchBuffer[m_tileRowPass][idx])->iCurrent = tileBatchBuf.iOffset;
3797                 (&m_tileLevelBatchBuffer[m_tileRowPass][idx])->iRemaining = tileBatchBuf.iRemaining;
3798                 (&m_tileLevelBatchBuffer[m_tileRowPass][idx])->pData = data;
3799                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &m_tileLevelBatchBuffer[m_tileRowPass][idx]), release_func);
3800 
3801                 if (data)
3802                 {
3803                     m_osInterface->pfnUnlockResource(m_osInterface, &(m_tileLevelBatchBuffer[m_tileRowPass][idx].OsResource));
3804                 }
3805             } // end of row tile
3806 
3807             // Set the semaphore for tile row BRC update
3808             if ((m_numPipe > 1) && (!IsFirstPipe()) && (!IsLastPassForTileReplay()))
3809             {
3810                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(
3811                     SetSemaphoreMem(
3812                         &m_resVdBoxSemaphoreMem[currentPipe].sResource,
3813                         &cmdBuffer,
3814                         0xFF),
3815                     release_func);
3816             }
3817 
3818             //turn on protection again in case conditionalbatchbufferexit turns off the protection
3819             if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHwCounterIncrement(m_osInterface) && m_enableTileReplay)
3820             {
3821                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, &cmdBuffer), release_func);
3822             }
3823 
3824             // Run tile row based BRC on pipe 0
3825             if (IsFirstPipe() && (!IsLastPassForTileReplay()))
3826             {
3827                 m_CurrentTileRow           = tileRow;
3828                 m_CurrentPassForTileReplay = m_tileRowPass;
3829                 m_CurrentPassForOverAll++;
3830 
3831                 // Before tile row BRC update, make sure all pipes are complete
3832                 if (m_numPipe > 1)
3833                 {
3834                     for (uint32_t i = 1; i < m_numPipe; i++)
3835                     {
3836                         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(SendHWWaitCommand(&m_resVdBoxSemaphoreMem[i].sResource, &cmdBuffer, 0xFF), release_func);
3837                         CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(SetSemaphoreMem(&m_resVdBoxSemaphoreMem[i].sResource, &cmdBuffer, 0x0), release_func);
3838                     }
3839                 }
3840 
3841                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(HuCBrcTileRowUpdate(&cmdBuffer), release_func);
3842             }
3843 
3844             //turn on protection again in case conditionalbatchbufferexit turns off the protection
3845             if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHwCounterIncrement(m_osInterface) && m_enableTileReplay)
3846             {
3847                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, &cmdBuffer), release_func);
3848             }
3849 
3850             //Refresh counter after every tilerowpass
3851             if (m_tileRowPass < m_NumPassesForTileReplay - 1)
3852             {
3853                 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hwInterface->GetCpInterface()->RefreshCounter(m_osInterface, &cmdBuffer), release_func);
3854             }
3855         }
3856 
3857         // Update head pointer for capture mode
3858         if (m_CaptureModeEnable && IsLastPipe())
3859         {
3860             MHW_MI_LOAD_REGISTER_IMM_PARAMS     registerImmParams;
3861             MOS_ZeroMemory(&registerImmParams, sizeof(registerImmParams));
3862             registerImmParams.dwData      = 1;
3863             registerImmParams.dwRegister  = m_VdboxVDENCRegBase[currentPipe] + 0x90;
3864             CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(&cmdBuffer, &registerImmParams), release_func);
3865         }
3866 
3867         //refresh encode counter after every rowpass
3868         if (tileRow < numTileRows - 1)
3869         {
3870             CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hwInterface->GetCpInterface()->RefreshCounter(m_osInterface, &cmdBuffer), release_func);
3871         }
3872     }
3873 
3874     m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams);
3875 
3876     // Insert end of sequence/stream if se
3877     // To be moved to slice level?
3878     if ((m_lastPicInStream || m_lastPicInSeq) && IsLastPipe())
3879     {
3880         MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
3881         MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
3882         pakInsertObjectParams.bLastPicInSeq = m_lastPicInSeq;
3883         pakInsertObjectParams.bLastPicInStream = m_lastPicInStream;
3884         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&cmdBuffer, &pakInsertObjectParams));
3885     }
3886 
3887     // Send VD_CONTROL_STATE (Memory Implict Flush)
3888     MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
3889     vdControlStateParams.memoryImplicitFlush = true;
3890     CODECHAL_ENCODE_CHK_STATUS_RETURN(
3891         static_cast<MhwMiInterfaceG12*>(m_miInterface)->AddMiVdControlStateCmd(&cmdBuffer, &vdControlStateParams));
3892 
3893     // Send VD_PIPELINE_FLUSH command
3894     MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
3895     vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
3896     vdPipelineFlushParams.Flags.bFlushHEVC = 1;
3897     vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
3898     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
3899 
3900     // Send MI_FLUSH command
3901     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3902     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3903     flushDwParams.bVideoPipelineCacheInvalidate = true;
3904     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
3905 
3906     // Set the HW semaphore to indicate current pipe done
3907     if (m_numPipe > 1)
3908     {
3909         MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3910         flushDwParams.bVideoPipelineCacheInvalidate = true;
3911         if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[currentPipe].sResource))
3912         {
3913             flushDwParams.pOsResource = &m_resVdBoxSemaphoreMem[currentPipe].sResource;
3914             flushDwParams.dwDataDW1   = 0xFF;
3915         }
3916         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
3917     }
3918 
3919     if (IsFirstPipe())
3920     {
3921         // first pipe needs to ensure all other pipes are ready
3922         if (m_numPipe > 1)
3923         {
3924             for (uint32_t i = 0; i < m_numPipe; i++)
3925             {
3926                 if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[i].sResource))
3927                 {
3928                     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(&m_resVdBoxSemaphoreMem[i].sResource, &cmdBuffer, 0xFF));
3929                     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSemaphoreMem(&m_resVdBoxSemaphoreMem[i].sResource, &cmdBuffer, 0x0));
3930                 }
3931             }
3932         }
3933 
3934         // Whenever ACQP/ BRC is enabled with tiling, PAK Integration kernel is needed.
3935         // ACQP/ BRC need PAK integration kernel to aggregate statistics
3936         if (m_vdencHucUsed)
3937         {
3938             CODECHAL_ENCODE_CHK_STATUS_RETURN(HucPakIntegrate(&cmdBuffer));
3939         }
3940 
3941         // Use HW stitch commands only in the scalable mode
3942         // For single pipe with tile replay, stitch also needed
3943         if (m_enableTileStitchByHW)
3944         {
3945             // 2nd level BB buffer for stitching cmd
3946             // current location to add cmds in 2nd level batch buffer
3947             m_HucStitchCmdBatchBuffer.iCurrent = 0;
3948             // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
3949             m_HucStitchCmdBatchBuffer.dwOffset = 0;
3950             HalOcaInterface::OnSubLevelBBStart(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, &m_HucStitchCmdBatchBuffer.OsResource, 0, true, 0);
3951             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_HucStitchCmdBatchBuffer));
3952             // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases
3953             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, m_osInterface->osCpInterface->IsCpEnabled() ? true : false));
3954         }
3955 
3956         CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(&cmdBuffer));
3957 
3958         CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
3959 
3960         if (m_numPipe <= 1)  // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer
3961         {
3962             CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
3963 
3964             // BRC PAK statistics different for each pass
3965             if (m_brcEnabled)
3966             {
3967                 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStats(&cmdBuffer));
3968             }
3969         }
3970 
3971         MHW_MI_STORE_DATA_PARAMS    storeDataParams;
3972         // Signal HW semaphore for the reference frame dependency (i.e., current coding frame waits for the reference frame being ready)
3973         if (m_currRefSync && !Mos_ResourceIsNull(&m_currRefSync->resSemaphoreMem.sResource))
3974         {
3975             // the reference frame semaphore must be set in each pass because of the conditional BRC batch buffer. Some BRC passes could be skipped.
3976             MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3977             storeDataParams.pOsResource      = &m_currRefSync->resSemaphoreMem.sResource;
3978             storeDataParams.dwResourceOffset = 0;
3979             storeDataParams.dwValue = 1;
3980 
3981             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiStoreDataImmCmd(
3982                 &cmdBuffer,
3983                 &storeDataParams));
3984         }
3985     }
3986 
3987     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase || (m_numPipe >= 2))
3988     {
3989         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
3990     }
3991 
3992     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
3993 
3994     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
3995     {
3996         bool nullRendering = m_videoContextUsesNullHw;
3997 
3998         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, nullRendering));
3999 
4000         CODECHAL_DEBUG_TOOL(
4001             if (m_mmcState)
4002             {
4003                 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
4004             }
4005         )
4006 
4007         if (IsFirstPipe() &&
4008             IsLastPass() &&
4009             m_signalEnc &&
4010             m_currRefSync &&
4011             !Mos_ResourceIsNull(&m_resSyncObjectVideoContextInUse))
4012         {
4013             // signal semaphore
4014             MOS_SYNC_PARAMS syncParams;
4015             syncParams                  = g_cInitSyncParams;
4016             syncParams.GpuContext       = m_videoContext;
4017             syncParams.presSyncResource = &m_currRefSync->resSyncObject;
4018 
4019             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
4020             m_currRefSync->uiSemaphoreObjCount++;
4021             m_currRefSync->bInUsed = true;
4022             }
4023     }
4024 
4025     // Reset parameters for next PAK execution
4026     if (IsLastPipe() &&
4027         IsLastPass())
4028     {
4029         if (!m_singleTaskPhaseSupported)
4030         {
4031             m_osInterface->pfnResetPerfBufferID(m_osInterface);
4032         }
4033 
4034         m_currPakSliceIdx = (m_currPakSliceIdx + 1) % CODECHAL_HEVC_NUM_PAK_SLICE_BATCH_BUFFERS;
4035 
4036         m_newPpsHeader = 0;
4037         m_newSeqHeader = 0;
4038         m_frameNum++;
4039     }
4040 
4041     return eStatus;
4042 }
4043 
ConstructBatchBufferHuCBRC(PMOS_RESOURCE batchBuffer)4044 MOS_STATUS CodechalVdencHevcStateG12::ConstructBatchBufferHuCBRC(PMOS_RESOURCE batchBuffer)
4045 {
4046     CODECHAL_ENCODE_FUNCTION_ENTER;
4047 
4048     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4049 
4050     CODECHAL_ENCODE_CHK_NULL_RETURN(m_slcData);
4051     CODECHAL_ENCODE_CHK_NULL_RETURN(batchBuffer);
4052 
4053     MOS_LOCK_PARAMS lockFlags;
4054     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4055     lockFlags.WriteOnly = true;
4056 
4057     uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, batchBuffer, &lockFlags);
4058     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4059 
4060     MOS_COMMAND_BUFFER constructedCmdBuf;
4061     MOS_ZeroMemory(&constructedCmdBuf, sizeof(constructedCmdBuf));
4062     constructedCmdBuf.pCmdBase = constructedCmdBuf.pCmdPtr = (uint32_t *)data;
4063     constructedCmdBuf.iRemaining = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE);
4064 
4065     // 1st Group : PIPE_MODE_SELECT
4066     // set PIPE_MODE_SELECT command
4067     // This is not needed for GEN11/GEN12 as single pass SAO is supported
4068     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 pipeModeSelectParams;
4069     pipeModeSelectParams.Mode = m_mode;
4070     pipeModeSelectParams.bVdencEnabled = true;
4071     pipeModeSelectParams.bAdvancedRateControlEnable = true;
4072     pipeModeSelectParams.bRdoqEnable                = m_hevcRdoqEnabled;
4073     pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
4074     pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY;
4075     pipeModeSelectParams.bStreamOutEnabled = 1;
4076     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&constructedCmdBuf, &pipeModeSelectParams));
4077 
4078     MHW_BATCH_BUFFER  TempBatchBuffer;
4079     MOS_ZeroMemory(&TempBatchBuffer, sizeof(MHW_BATCH_BUFFER));
4080     TempBatchBuffer.iSize       = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE);
4081     TempBatchBuffer.pData       = data;
4082 
4083     // set MI_BATCH_BUFFER_END command
4084     int32_t cmdBufOffset = constructedCmdBuf.iOffset;
4085 
4086     TempBatchBuffer.iCurrent    = constructedCmdBuf.iOffset;
4087     TempBatchBuffer.iRemaining  = constructedCmdBuf.iRemaining;
4088     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &TempBatchBuffer));
4089     constructedCmdBuf.pCmdPtr     += (TempBatchBuffer.iCurrent - constructedCmdBuf.iOffset) / 4;
4090     constructedCmdBuf.iOffset      = TempBatchBuffer.iCurrent;
4091     constructedCmdBuf.iRemaining   = TempBatchBuffer.iRemaining;
4092 
4093     m_miBatchBufferEndCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
4094     CODECHAL_ENCODE_ASSERT(m_hwInterface->m_vdencBatchBuffer1stGroupSize == constructedCmdBuf.iOffset);
4095 
4096     SetAddCommands(CODECHAL_CMD1, &constructedCmdBuf, true, m_roundInterValue, m_roundIntraValue, m_lowDelay);
4097     m_picStateCmdStartInBytes = constructedCmdBuf.iOffset;
4098 
4099     // set HCP_PIC_STATE command
4100     MHW_VDBOX_HEVC_PIC_STATE_G12 hevcPicState;
4101     SetHcpPicStateParams(hevcPicState);
4102     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPicStateCmd(&constructedCmdBuf, &hevcPicState));
4103     m_cmd2StartInBytes = constructedCmdBuf.iOffset;
4104 
4105     SetAddCommands(CODECHAL_CMD2, &constructedCmdBuf, true, m_roundInterValue, m_roundIntraValue, m_lowDelay, m_refIdxMapping, m_slotForRecNotFiltered);
4106 
4107     // set MI_BATCH_BUFFER_END command
4108     TempBatchBuffer.iCurrent    = constructedCmdBuf.iOffset;
4109     TempBatchBuffer.iRemaining  = constructedCmdBuf.iRemaining;
4110     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &TempBatchBuffer));
4111     constructedCmdBuf.pCmdPtr     += (TempBatchBuffer.iCurrent - constructedCmdBuf.iOffset) / 4;
4112     constructedCmdBuf.iOffset      = TempBatchBuffer.iCurrent;
4113     constructedCmdBuf.iRemaining   = TempBatchBuffer.iRemaining;
4114 
4115     CODECHAL_ENCODE_ASSERT(m_hwInterface->m_vdencBatchBuffer2ndGroupSize + m_hwInterface->m_vdencBatchBuffer1stGroupSize
4116         == constructedCmdBuf.iOffset);
4117 
4118     // 3rd Group : HCP_WEIGHTSOFFSETS_STATE + HCP_SLICE_STATE + HCP_PAK_INSERT_OBJECT + VDENC_WEIGHT_OFFSETS_STATE
4119     MHW_VDBOX_HEVC_SLICE_STATE_G12 sliceState;
4120     SetHcpSliceStateCommonParams(sliceState);
4121 
4122     // slice level cmds for each slice
4123     PCODEC_ENCODER_SLCDATA slcData = m_slcData;
4124     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
4125 
4126     for (uint32_t startLCU = 0, slcCount = 0; slcCount < m_numSlices; slcCount++)
4127     {
4128         bool lastSliceInTile = false, sliceInTile = false;
4129 
4130         if (IsFirstPass())
4131         {
4132             slcData[slcCount].CmdOffset = startLCU * (m_hcpInterface->GetHcpPakObjSize()) * sizeof(uint32_t);
4133         }
4134 
4135         uint32_t  numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
4136         uint32_t  numTileRows    = m_hevcPicParams->num_tile_rows_minus1 + 1;
4137         uint32_t  idx = 0;
4138         for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
4139         {
4140             for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
4141             {
4142                 idx = tileRow * numTileColumns + tileCol;
4143                 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
4144                     &tileParams[idx],
4145                     &sliceInTile,
4146                     &lastSliceInTile));
4147 
4148                 if (sliceInTile)
4149                 {
4150                     break;
4151                 }
4152             }
4153             if (sliceInTile)
4154             {
4155                 break;
4156             }
4157         }
4158 
4159         SetHcpSliceStateParams(sliceState, slcData, (uint16_t)slcCount, tileParams, lastSliceInTile, idx);
4160 
4161         m_vdencBatchBufferPerSliceVarSize[slcCount] = 0;
4162 
4163         // set HCP_WEIGHTOFFSET_STATE command
4164         // This slice level command is issued, if the weighted_pred_flag or weighted_bipred_flag equals one.
4165         //        If zero, then this command is not issued.
4166         if (m_hevcVdencWeightedPredEnabled)
4167         {
4168             MHW_VDBOX_HEVC_WEIGHTOFFSET_PARAMS hcpWeightOffsetParams;
4169             MOS_ZeroMemory(&hcpWeightOffsetParams, sizeof(hcpWeightOffsetParams));
4170             // HuC based WP ignores App based weights
4171             if (!m_hevcPicParams->bEnableGPUWeightedPrediction)
4172             {
4173                 for (auto k = 0; k < 2; k++) // k=0: LIST_0, k=1: LIST_1
4174                 {
4175                     // Luma, Chroma Offset
4176                     for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
4177                     {
4178                         hcpWeightOffsetParams.LumaOffsets[k][i] = (int16_t)m_hevcSliceParams->luma_offset[k][i];
4179                         // Cb, Cr
4180                         for (auto j = 0; j < 2; j++)
4181                         {
4182                             hcpWeightOffsetParams.ChromaOffsets[k][i][j] = (int16_t)m_hevcSliceParams->chroma_offset[k][i][j];
4183                         }
4184                     }
4185 
4186                     // Luma Weight
4187                     CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
4188                         &hcpWeightOffsetParams.LumaWeights[k],
4189                         sizeof(hcpWeightOffsetParams.LumaWeights[k]),
4190                         &m_hevcSliceParams->delta_luma_weight[k],
4191                         sizeof(m_hevcSliceParams->delta_luma_weight[k])));
4192                     // Chroma Weight
4193                     CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
4194                         &hcpWeightOffsetParams.ChromaWeights[k],
4195                         sizeof(hcpWeightOffsetParams.ChromaWeights[k]),
4196                         &m_hevcSliceParams->delta_chroma_weight[k],
4197                         sizeof(m_hevcSliceParams->delta_chroma_weight[k])));
4198                 }
4199             }
4200 
4201             // 1st HCP_WEIGHTOFFSET_STATE cmd - P & B
4202             if (m_hevcSliceParams->slice_type == CODECHAL_ENCODE_HEVC_P_SLICE || m_hevcSliceParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
4203             {
4204                 hcpWeightOffsetParams.ucList = LIST_0;
4205 
4206                 cmdBufOffset = constructedCmdBuf.iOffset;
4207                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpWeightOffsetStateCmd(&constructedCmdBuf, nullptr, &hcpWeightOffsetParams));
4208                 m_hcpWeightOffsetStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
4209                 // 1st HcpWeightOffset cmd is not always inserted (except weighted prediction + P, B slices)
4210                 m_vdencBatchBufferPerSliceVarSize[slcCount] += m_hcpWeightOffsetStateCmdSize;
4211             }
4212 
4213             // 2nd HCP_WEIGHTOFFSET_STATE cmd - B only
4214             if (m_hevcSliceParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
4215             {
4216                 hcpWeightOffsetParams.ucList = LIST_1;
4217 
4218                 cmdBufOffset = constructedCmdBuf.iOffset;
4219                 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpWeightOffsetStateCmd(&constructedCmdBuf, nullptr, &hcpWeightOffsetParams));
4220                 m_hcpWeightOffsetStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
4221                 // 2nd HcpWeightOffset cmd is not always inserted (except weighted prediction + B slices)
4222                 m_vdencBatchBufferPerSliceVarSize[slcCount] += m_hcpWeightOffsetStateCmdSize;
4223             }
4224         }
4225 
4226         // set HCP_SLICE_STATE command
4227         cmdBufOffset = constructedCmdBuf.iOffset;
4228         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSliceStateCmd(&constructedCmdBuf, &sliceState));
4229         m_hcpSliceStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
4230 
4231         // set 1st HCP_PAK_INSERT_OBJECT command
4232         // insert AU, SPS, PPS headers before first slice header
4233         if (sliceState.bInsertBeforeSliceHeaders)
4234         {
4235             uint32_t maxBytesInPakInsertObjCmd = ((2 << 11) - 1) * 4; // 12 bits for DwordLength field in PAK_INSERT_OBJ cmd
4236             m_1stPakInsertObjectCmdSize = 0;
4237 
4238             for (auto i = 0; i < HEVC_MAX_NAL_UNIT_TYPE; i++)
4239             {
4240                 uint32_t nalUnitPosiSize = sliceState.ppNalUnitParams[i]->uiSize;
4241                 uint32_t nalUnitPosiOffset = sliceState.ppNalUnitParams[i]->uiOffset;
4242 
4243                 while (nalUnitPosiSize > 0)
4244                 {
4245                     uint32_t bitSize = MOS_MIN(maxBytesInPakInsertObjCmd * 8, nalUnitPosiSize * 8);
4246                     uint32_t offSet = nalUnitPosiOffset;
4247 
4248                     MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
4249                     MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
4250                     pakInsertObjectParams.bEmulationByteBitsInsert = sliceState.ppNalUnitParams[i]->bInsertEmulationBytes;
4251                     pakInsertObjectParams.uiSkipEmulationCheckCount = sliceState.ppNalUnitParams[i]->uiSkipEmulationCheckCount;
4252                     pakInsertObjectParams.pBsBuffer = sliceState.pBsBuffer;
4253                     pakInsertObjectParams.dwBitSize = bitSize;
4254                     pakInsertObjectParams.dwOffset = offSet;
4255 
4256                     if (nalUnitPosiSize > maxBytesInPakInsertObjCmd)
4257                     {
4258                         nalUnitPosiSize -= maxBytesInPakInsertObjCmd;
4259                         nalUnitPosiOffset += maxBytesInPakInsertObjCmd;
4260                     }
4261                     else
4262                     {
4263                         nalUnitPosiSize = 0;
4264                     }
4265 
4266                     cmdBufOffset = constructedCmdBuf.iOffset;
4267                     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&constructedCmdBuf, &pakInsertObjectParams));
4268 
4269                     // this info needed again in BrcUpdate HuC FW const
4270                     m_1stPakInsertObjectCmdSize += (constructedCmdBuf.iOffset - cmdBufOffset);
4271                 }
4272             }
4273             // 1st PakInsertObject cmd is not always inserted for each slice
4274             m_vdencBatchBufferPerSliceVarSize[slcCount] += m_1stPakInsertObjectCmdSize;
4275         }
4276 
4277         // set 2nd HCP_PAK_INSERT_OBJECT command
4278         // Insert slice header
4279         MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
4280         MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
4281         pakInsertObjectParams.bLastHeader = true;
4282         pakInsertObjectParams.bEmulationByteBitsInsert = true;
4283 
4284         // App does the slice header packing, set the skip count passed by the app
4285         pakInsertObjectParams.uiSkipEmulationCheckCount = sliceState.uiSkipEmulationCheckCount;
4286         pakInsertObjectParams.pBsBuffer = sliceState.pBsBuffer;
4287         pakInsertObjectParams.dwBitSize = sliceState.dwLength;
4288         pakInsertObjectParams.dwOffset = sliceState.dwOffset;
4289 
4290         // For HEVC VDEnc Dynamic Slice
4291         if (m_hevcSeqParams->SliceSizeControl)
4292         {
4293             pakInsertObjectParams.bLastHeader = false;
4294             pakInsertObjectParams.bEmulationByteBitsInsert = false;
4295             pakInsertObjectParams.dwBitSize                  = m_hevcSliceParams->BitLengthSliceHeaderStartingPortion;
4296             pakInsertObjectParams.bResetBitstreamStartingPos = true;
4297         }
4298 
4299         uint32_t byteSize = (pakInsertObjectParams.dwBitSize + 7) >> 3;
4300         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(
4301             &constructedCmdBuf,
4302             &pakInsertObjectParams));
4303 
4304         // 2nd PakInsertObject cmd is always inserted for each slice
4305         // so already reflected in dwVdencBatchBufferPerSliceConstSize
4306         m_vdencBatchBufferPerSliceVarSize[slcCount] += (MOS_ALIGN_CEIL(byteSize, sizeof(uint32_t))) / sizeof(uint32_t) * 4;
4307 
4308         // set 3rd HCP_PAK_INSERT_OBJECT command
4309         if (m_hevcSeqParams->SliceSizeControl)
4310         {
4311             // Send HCP_PAK_INSERT_OBJ command. For dynamic slice, we are skipping the beginning part of slice header.
4312             pakInsertObjectParams.bLastHeader = true;
4313             pakInsertObjectParams.dwBitSize   = sliceState.dwLength - m_hevcSliceParams->BitLengthSliceHeaderStartingPortion;
4314             pakInsertObjectParams.dwOffset += ((m_hevcSliceParams->BitLengthSliceHeaderStartingPortion + 7) / 8);  // Skips the first 5 bytes which is Start Code + Nal Unit Header
4315             pakInsertObjectParams.bResetBitstreamStartingPos = true;
4316 
4317             cmdBufOffset = constructedCmdBuf.iOffset;
4318             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(
4319                 &constructedCmdBuf,
4320                 &pakInsertObjectParams));
4321             // 3rd PakInsertObject cmd is not always inserted for each slice
4322             m_vdencBatchBufferPerSliceVarSize[slcCount] += (constructedCmdBuf.iOffset - cmdBufOffset);
4323         }
4324 
4325         // set VDENC_WEIGHT_OFFSETS_STATE command
4326         MHW_VDBOX_VDENC_WEIGHT_OFFSET_PARAMS vdencWeightOffsetParams;
4327         MOS_ZeroMemory(&vdencWeightOffsetParams, sizeof(vdencWeightOffsetParams));
4328         vdencWeightOffsetParams.bWeightedPredEnabled = m_hevcVdencWeightedPredEnabled;
4329         vdencWeightOffsetParams.isLowDelay = m_lowDelay;
4330 
4331         if (vdencWeightOffsetParams.bWeightedPredEnabled)
4332         {
4333             uint8_t lumaLog2WeightDenom = m_hevcPicParams->bEnableGPUWeightedPrediction ? 6 : m_hevcSliceParams->luma_log2_weight_denom;
4334             vdencWeightOffsetParams.dwDenom = 1 << lumaLog2WeightDenom;
4335 
4336             if (!m_hevcPicParams->bEnableGPUWeightedPrediction)
4337             {
4338                 // Luma Offsets
4339                 for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
4340                 {
4341                     vdencWeightOffsetParams.LumaOffsets[0][i] = (int16_t)m_hevcSliceParams->luma_offset[0][i];
4342                     vdencWeightOffsetParams.LumaOffsets[1][i] = (int16_t)m_hevcSliceParams->luma_offset[1][i];
4343                 }
4344 
4345                 // Luma Weights
4346                 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(MOS_SecureMemcpy(
4347                     &vdencWeightOffsetParams.LumaWeights[0],
4348                     sizeof(vdencWeightOffsetParams.LumaWeights[0]),
4349                     &m_hevcSliceParams->delta_luma_weight[0],
4350                     sizeof(m_hevcSliceParams->delta_luma_weight[0])),
4351                     "Failed to copy luma weight 0 memory.");
4352 
4353                 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(MOS_SecureMemcpy(
4354                     &vdencWeightOffsetParams.LumaWeights[1],
4355                     sizeof(vdencWeightOffsetParams.LumaWeights[1]),
4356                     &m_hevcSliceParams->delta_luma_weight[1],
4357                     sizeof(m_hevcSliceParams->delta_luma_weight[1])),
4358                     "Failed to copy luma weight 1 memory.");
4359             }
4360         }
4361 
4362         cmdBufOffset = constructedCmdBuf.iOffset;
4363         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWeightsOffsetsStateCmd(
4364             &constructedCmdBuf,
4365             nullptr,
4366             &vdencWeightOffsetParams));
4367         m_vdencWeightOffsetStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
4368 
4369         // set MI_BATCH_BUFFER_END command
4370         TempBatchBuffer.iCurrent    = constructedCmdBuf.iOffset;
4371         TempBatchBuffer.iRemaining  = constructedCmdBuf.iRemaining;
4372         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &TempBatchBuffer));
4373         constructedCmdBuf.pCmdPtr     += (TempBatchBuffer.iCurrent - constructedCmdBuf.iOffset) / 4;
4374         constructedCmdBuf.iOffset      = TempBatchBuffer.iCurrent;
4375         constructedCmdBuf.iRemaining   = TempBatchBuffer.iRemaining;
4376 
4377         m_vdencBatchBufferPerSliceVarSize[slcCount] += ENCODE_VDENC_HEVC_PADDING_DW_SIZE * 4;
4378         for (auto i = 0; i < ENCODE_VDENC_HEVC_PADDING_DW_SIZE ; i++)
4379         {
4380             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiNoop(&constructedCmdBuf, nullptr));
4381         }
4382         startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice;
4383     }
4384 
4385     if (data)
4386     {
4387         m_osInterface->pfnUnlockResource(m_osInterface, batchBuffer);
4388     }
4389 
4390     return eStatus;
4391 }
4392 
ConstructTLB(PMHW_BATCH_BUFFER thirdLevelBatchBuffer)4393 MOS_STATUS CodechalVdencHevcStateG12::ConstructTLB(PMHW_BATCH_BUFFER thirdLevelBatchBuffer)
4394 {
4395     CODECHAL_ENCODE_FUNCTION_ENTER;
4396 
4397     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4398 
4399     CODECHAL_ENCODE_CHK_NULL_RETURN(thirdLevelBatchBuffer);
4400 
4401     MHW_VDBOX_HEVC_PIC_STATE_G12 picStateParams;
4402     SetHcpPicStateParams(picStateParams);
4403 
4404     MOS_LOCK_PARAMS lockFlags;
4405     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4406     lockFlags.WriteOnly = true;
4407 
4408     uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(thirdLevelBatchBuffer->OsResource), &lockFlags);
4409     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4410 
4411     MOS_COMMAND_BUFFER constructedCmdBuf;
4412     MOS_ZeroMemory(&constructedCmdBuf, sizeof(constructedCmdBuf));
4413     constructedCmdBuf.pCmdBase = constructedCmdBuf.pCmdPtr = (uint32_t *)data;
4414     constructedCmdBuf.iRemaining = m_thirdLBSize;
4415 
4416     SetAddCommands(CODECHAL_CMD1, &constructedCmdBuf, true, m_roundInterValue, m_roundIntraValue, m_lowDelay);
4417 
4418     // HCP_PIC_STATE
4419     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPicStateCmd(&constructedCmdBuf, &picStateParams));
4420 
4421     SetAddCommands(CODECHAL_CMD2, &constructedCmdBuf, true, m_roundInterValue, m_roundIntraValue, m_lowDelay, m_refIdxMapping, m_slotForRecNotFiltered);
4422 
4423     // Send HEVC_VP9_RDOQ_STATE command
4424     if (m_hevcRdoqEnabled)
4425     {
4426         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&constructedCmdBuf, &picStateParams));
4427     }
4428 
4429     thirdLevelBatchBuffer->iCurrent     = constructedCmdBuf.iOffset;
4430     thirdLevelBatchBuffer->iRemaining   = constructedCmdBuf.iRemaining;
4431     thirdLevelBatchBuffer->pData        = data;
4432     // set MI_BATCH_BUFFER_END command
4433     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, thirdLevelBatchBuffer));
4434 
4435     std::string pakPassName = "PAK_PASS[" + std::to_string(GetCurrentPass()) + "]_PIPE[" + std::to_string(GetCurrentPipe()) + "]_TLB";
4436     CODECHAL_DEBUG_TOOL(
4437         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
4438             &constructedCmdBuf,
4439             CODECHAL_NUM_MEDIA_STATES,
4440             pakPassName.data()));)
4441 
4442     if (data)
4443     {
4444         m_osInterface->pfnUnlockResource(m_osInterface, &(thirdLevelBatchBuffer->OsResource));
4445     }
4446 
4447     return eStatus;
4448 }
4449 
SetDmemHuCBrcInitReset()4450 MOS_STATUS CodechalVdencHevcStateG12::SetDmemHuCBrcInitReset()
4451 {
4452     CODECHAL_ENCODE_FUNCTION_ENTER;
4453 
4454     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4455 
4456     MOS_LOCK_PARAMS lockFlagsWriteOnly;
4457     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
4458     lockFlagsWriteOnly.WriteOnly = true;
4459 
4460     // Setup BrcInit DMEM
4461     auto hucVdencBrcInitDmem = (PCODECHAL_VDENC_HEVC_HUC_BRC_INIT_DMEM_G12)m_osInterface->pfnLockResource(
4462         m_osInterface, &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx], &lockFlagsWriteOnly);
4463     CODECHAL_ENCODE_CHK_NULL_RETURN(hucVdencBrcInitDmem);
4464     MOS_ZeroMemory(hucVdencBrcInitDmem, sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_INIT_DMEM_G12));
4465 
4466     hucVdencBrcInitDmem->BRCFunc_U32       = (m_enableTileReplay ? 1 : 0) << 7;  //bit0 0: Init; 1: Reset, bit7 0: frame-based; 1: tile-based
4467     hucVdencBrcInitDmem->UserMaxFrame      = GetProfileLevelMaxFrameSize();
4468     hucVdencBrcInitDmem->InitBufFull_U32   = MOS_MIN(m_hevcSeqParams->InitVBVBufferFullnessInBit, m_hevcSeqParams->VBVBufferSizeInBit);
4469     hucVdencBrcInitDmem->BufSize_U32       = m_hevcSeqParams->VBVBufferSizeInBit;
4470     hucVdencBrcInitDmem->TargetBitrate_U32 = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;  // map DDI params(in Kbits) to huc (in bits)
4471     hucVdencBrcInitDmem->MaxRate_U32       = m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS;
4472     hucVdencBrcInitDmem->MinRate_U32 = 0;
4473     hucVdencBrcInitDmem->FrameRateM_U32    = m_hevcSeqParams->FrameRate.Numerator;
4474     hucVdencBrcInitDmem->FrameRateD_U32    = m_hevcSeqParams->FrameRate.Denominator;
4475     hucVdencBrcInitDmem->ACQP_U32          = 0;
4476     if (m_hevcSeqParams->UserMaxPBFrameSize > 0)
4477     {
4478         //Backup CodingType as need to set it as B_Tpye to get MaxFrameSize for P/B frames.
4479         auto CodingTypeTemp = m_hevcPicParams->CodingType;
4480         m_hevcPicParams->CodingType = B_TYPE;
4481         hucVdencBrcInitDmem->ProfileLevelMaxFramePB_U32 = GetProfileLevelMaxFrameSize();
4482         m_hevcPicParams->CodingType = CodingTypeTemp;
4483     }
4484     else
4485     {
4486         hucVdencBrcInitDmem->ProfileLevelMaxFramePB_U32 = hucVdencBrcInitDmem->UserMaxFrame;
4487     }
4488 
4489     if (m_brcEnabled)
4490     {
4491         switch (m_hevcSeqParams->RateControlMethod)
4492         {
4493         case RATECONTROL_ICQ:
4494             hucVdencBrcInitDmem->BRCFlag = 0;
4495             hucVdencBrcInitDmem->ACQP_U32 = m_hevcSeqParams->ICQQualityFactor;
4496             break;
4497         case RATECONTROL_CBR:
4498             hucVdencBrcInitDmem->BRCFlag = 1;
4499             break;
4500         case RATECONTROL_VBR:
4501             hucVdencBrcInitDmem->BRCFlag = 2;
4502             hucVdencBrcInitDmem->ACQP_U32 = 0;
4503             break;
4504         case RATECONTROL_VCM:
4505             hucVdencBrcInitDmem->BRCFlag = 3;
4506             break;
4507         case RATECONTROL_QVBR:
4508             hucVdencBrcInitDmem->BRCFlag = 2;
4509             hucVdencBrcInitDmem->ACQP_U32 = m_hevcSeqParams->ICQQualityFactor;;
4510             break;
4511         default:
4512             break;
4513         }
4514 
4515         // Low Delay BRC
4516         if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)
4517         {
4518             hucVdencBrcInitDmem->BRCFlag = 5;
4519         }
4520 
4521         switch (m_hevcSeqParams->MBBRC)
4522         {
4523         case mbBrcInternal:
4524         case mbBrcEnabled:
4525             hucVdencBrcInitDmem->CuQpCtrl_U8 = 3;
4526             break;
4527         case mbBrcDisabled:
4528             hucVdencBrcInitDmem->CuQpCtrl_U8 = 0;
4529             break;
4530         default:
4531             break;
4532         }
4533     }
4534     else if (m_hevcVdencAcqpEnabled)
4535     {
4536         hucVdencBrcInitDmem->BRCFlag = 0;
4537 
4538         // 0=No CUQP; 1=CUQP for I-frame; 2=CUQP for P/B-frame
4539         // bit operation, bit 1 for I-frame, bit 2 for P/B frame
4540         // In VDENC mode, the field "Cu_Qp_Delta_Enabled_Flag" should always be set to 1.
4541         if (m_hevcSeqParams->QpAdjustment)
4542         {
4543             hucVdencBrcInitDmem->CuQpCtrl_U8 = 3;  // wPictureCodingType I:0, P:1, B:2
4544         }
4545         else
4546         {
4547             hucVdencBrcInitDmem->CuQpCtrl_U8 = 0;  // wPictureCodingType I:0, P:1, B:2
4548         }
4549     }
4550 
4551     hucVdencBrcInitDmem->SSCFlag = m_hevcSeqParams->SliceSizeControl;
4552 
4553     hucVdencBrcInitDmem->FrameWidth_U16 = (uint16_t)m_frameWidth;
4554     hucVdencBrcInitDmem->FrameHeight_U16 = (uint16_t)m_frameHeight;
4555 
4556     hucVdencBrcInitDmem->MinQP_U8 = m_hevcPicParams->BRCMinQp < 10 ? 10 : m_hevcPicParams->BRCMinQp;                                           // Setting values from arch spec
4557     hucVdencBrcInitDmem->MaxQP_U8 = m_hevcPicParams->BRCMaxQp < 10 ? 51 : (m_hevcPicParams->BRCMaxQp > 51 ? 51 : m_hevcPicParams->BRCMaxQp);   // Setting values from arch spec
4558 
4559     hucVdencBrcInitDmem->BRCPyramidEnable_U8 = 0;
4560 
4561     //QP modulation settings
4562     m_hevcSeqParams->GopRefDist = m_hevcSeqParams->GopRefDist == 0 ? 1 : m_hevcSeqParams->GopRefDist;
4563     bool bAllowedPyramid = m_hevcSeqParams->GopRefDist != 3;
4564     uint16_t intraPeriod = m_hevcSeqParams->GopPicSize > 4001 ? 4000 : m_hevcSeqParams->GopPicSize - 1;
4565     intraPeriod = ((intraPeriod + m_hevcSeqParams->GopRefDist - 1) / m_hevcSeqParams->GopRefDist) * m_hevcSeqParams->GopRefDist;
4566 
4567     if (m_hevcSeqParams->HierarchicalFlag && bAllowedPyramid)
4568     {
4569         hucVdencBrcInitDmem->GopP_U16 = intraPeriod/m_hevcSeqParams->GopRefDist;
4570         hucVdencBrcInitDmem->GopB_U16 = hucVdencBrcInitDmem->GopP_U16;
4571         hucVdencBrcInitDmem->GopB1_U16 = ((hucVdencBrcInitDmem->GopP_U16 + hucVdencBrcInitDmem->GopB_U16) == intraPeriod) ? 0 : hucVdencBrcInitDmem->GopB_U16 * 2;
4572         hucVdencBrcInitDmem->GopB2_U16 = intraPeriod - hucVdencBrcInitDmem->GopP_U16 - hucVdencBrcInitDmem->GopB_U16 - hucVdencBrcInitDmem->GopB1_U16;
4573 
4574         hucVdencBrcInitDmem->MaxBRCLevel_U8 = hucVdencBrcInitDmem->GopB1_U16 == 0 ? HEVC_BRC_FRAME_TYPE_B : (hucVdencBrcInitDmem->GopB2_U16 == 0 ? HEVC_BRC_FRAME_TYPE_B1 : HEVC_BRC_FRAME_TYPE_B2);
4575         hucVdencBrcInitDmem->BRCPyramidEnable_U8 = 1;
4576     }
4577     else //FlatB or LDB
4578     {
4579         hucVdencBrcInitDmem->GopP_U16 = intraPeriod/m_hevcSeqParams->GopRefDist;
4580         hucVdencBrcInitDmem->GopB_U16 = intraPeriod - hucVdencBrcInitDmem->GopP_U16;
4581         hucVdencBrcInitDmem->MaxBRCLevel_U8 = hucVdencBrcInitDmem->GopB_U16 == 0? HEVC_BRC_FRAME_TYPE_P_OR_LB : HEVC_BRC_FRAME_TYPE_B;
4582     }
4583 
4584     hucVdencBrcInitDmem->LumaBitDepth_U8   = m_hevcSeqParams->bit_depth_luma_minus8 + 8;
4585     hucVdencBrcInitDmem->ChromaBitDepth_U8 = m_hevcSeqParams->bit_depth_chroma_minus8 + 8;
4586 
4587     if (m_hevcSeqParams->SourceBitDepth == ENCODE_HEVC_BIT_DEPTH_10)
4588     {
4589         hucVdencBrcInitDmem->LumaBitDepth_U8 = 10;
4590         hucVdencBrcInitDmem->ChromaBitDepth_U8 = 10;
4591     }
4592 
4593     if ((hucVdencBrcInitDmem->LowDelayMode_U8 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)))
4594     {
4595         MOS_SecureMemcpy(hucVdencBrcInitDmem->DevThreshPB0_S8, 8 * sizeof(int8_t), (void *)m_lowdelayDevThreshPB, 8 * sizeof(int8_t));
4596         MOS_SecureMemcpy(hucVdencBrcInitDmem->DevThreshVBR0_S8, 8 * sizeof(int8_t), (void*)m_lowdelayDevThreshVBR, 8 * sizeof(int8_t));
4597         MOS_SecureMemcpy(hucVdencBrcInitDmem->DevThreshI0_S8, 8 * sizeof(int8_t), (void*)m_lowdelayDevThreshI, 8 * sizeof(int8_t));
4598     }
4599     else
4600     {
4601         uint64_t inputbitsperframe = uint64_t(hucVdencBrcInitDmem->MaxRate_U32*100. / (hucVdencBrcInitDmem->FrameRateM_U32 * 100.0 / hucVdencBrcInitDmem->FrameRateD_U32));
4602         if (m_brcEnabled && !hucVdencBrcInitDmem->BufSize_U32)
4603         {
4604             CODECHAL_ENCODE_ASSERTMESSAGE("VBV BufSize should not be 0 for BRC case\n");
4605             eStatus = MOS_STATUS_INVALID_PARAMETER;
4606         }
4607         uint64_t vbvsz = hucVdencBrcInitDmem->BufSize_U32;
4608         double bps_ratio = inputbitsperframe / (vbvsz / m_devStdFPS);
4609         if (bps_ratio < m_bpsRatioLow) bps_ratio = m_bpsRatioLow;
4610         if (bps_ratio > m_bpsRatioHigh) bps_ratio = m_bpsRatioHigh;
4611 
4612         for (int i = 0; i < m_numDevThreshlds / 2; i++) {
4613             hucVdencBrcInitDmem->DevThreshPB0_S8[i] = (signed char)(m_negMultPB*pow(m_devThreshPBFPNEG[i], bps_ratio));
4614             hucVdencBrcInitDmem->DevThreshPB0_S8[i + m_numDevThreshlds / 2] = (signed char)(m_postMultPB*pow(m_devThreshPBFPPOS[i], bps_ratio));
4615 
4616             hucVdencBrcInitDmem->DevThreshI0_S8[i] = (signed char)(m_negMultPB*pow(m_devThreshIFPNEG[i], bps_ratio));
4617             hucVdencBrcInitDmem->DevThreshI0_S8[i + m_numDevThreshlds / 2] = (signed char)(m_postMultPB*pow(m_devThreshIFPPOS[i], bps_ratio));
4618 
4619             hucVdencBrcInitDmem->DevThreshVBR0_S8[i] = (signed char)(m_negMultPB*pow(m_devThreshVBRNEG[i], bps_ratio));
4620             hucVdencBrcInitDmem->DevThreshVBR0_S8[i + m_numDevThreshlds / 2] = (signed char)(m_posMultVBR*pow(m_devThreshVBRPOS[i], bps_ratio));
4621         }
4622     }
4623 
4624     MOS_SecureMemcpy(hucVdencBrcInitDmem->InstRateThreshP0_S8, 4 * sizeof(int8_t), (void *)m_instRateThreshP0, 4 * sizeof(int8_t));
4625     MOS_SecureMemcpy(hucVdencBrcInitDmem->InstRateThreshB0_S8, 4 * sizeof(int8_t), (void *)m_instRateThreshB0, 4 * sizeof(int8_t));
4626     MOS_SecureMemcpy(hucVdencBrcInitDmem->InstRateThreshI0_S8, 4 * sizeof(int8_t), (void *)m_instRateThreshI0, 4 * sizeof(int8_t));
4627 
4628     if (m_brcEnabled)
4629     {
4630         // initQPIP, initQPB values will be used for BRC in the future
4631         int32_t initQPIP = 0, initQPB = 0;
4632         ComputeVDEncInitQP(initQPIP, initQPB);
4633         hucVdencBrcInitDmem->InitQPIP_U8 = (uint8_t)initQPIP;
4634         hucVdencBrcInitDmem->InitQPB_U8 = (uint8_t)initQPB;
4635     }
4636     else
4637     {
4638         hucVdencBrcInitDmem->InitQPIP_U8 = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
4639         hucVdencBrcInitDmem->InitQPB_U8  = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
4640     }
4641 
4642     hucVdencBrcInitDmem->TopFrmSzThrForAdapt2Pass_U8 = 32;
4643     hucVdencBrcInitDmem->BotFrmSzThrForAdapt2Pass_U8 = 24;
4644 
4645     MOS_SecureMemcpy(hucVdencBrcInitDmem->EstRateThreshP0_U8, 7 * sizeof(uint8_t), (void*)m_estRateThreshP0, 7 * sizeof(uint8_t));
4646     MOS_SecureMemcpy(hucVdencBrcInitDmem->EstRateThreshB0_U8, 7 * sizeof(uint8_t), (void*)m_estRateThreshB0, 7 * sizeof(uint8_t));
4647     MOS_SecureMemcpy(hucVdencBrcInitDmem->EstRateThreshI0_U8, 7 * sizeof(uint8_t), (void*)m_estRateThreshI0, 7 * sizeof(uint8_t));
4648 
4649     if (m_vdencStreamInEnabled && m_hevcPicParams->NumROI && !m_vdencNativeROIEnabled)
4650     {
4651         hucVdencBrcInitDmem->StreamInROIEnable_U8 = 1;
4652         hucVdencBrcInitDmem->StreamInSurfaceEnable_U8 = 1;
4653     }
4654 
4655     hucVdencBrcInitDmem->TopQPDeltaThrForAdapt2Pass_U8 = 2;
4656     hucVdencBrcInitDmem->BotQPDeltaThrForAdapt2Pass_U8 = 1;
4657 
4658     if ((m_hevcSeqParams->SlidingWindowSize != 0) && (m_hevcSeqParams->MaxBitRatePerSlidingWindow != 0))
4659     {
4660         hucVdencBrcInitDmem->SlidingWindow_Size_U32     = m_hevcSeqParams->SlidingWindowSize;
4661         hucVdencBrcInitDmem->SLIDINGWINDOW_MaxRateRatio = m_hevcSeqParams->MaxBitRatePerSlidingWindow * 100 / m_hevcSeqParams->TargetBitRate;
4662     }
4663     else
4664     {
4665         if (m_hevcSeqParams->FrameRate.Denominator == 0)
4666         {
4667             CODECHAL_ENCODE_ASSERTMESSAGE("FrameRate.Deminator is zero!");
4668             return MOS_STATUS_INVALID_PARAMETER;
4669         }
4670         uint32_t framerate = m_hevcSeqParams->FrameRate.Numerator / m_hevcSeqParams->FrameRate.Denominator;
4671         hucVdencBrcInitDmem->SlidingWindow_Size_U32 = MOS_MIN(framerate, 60);
4672         hucVdencBrcInitDmem->SLIDINGWINDOW_MaxRateRatio = 120;
4673     }
4674 
4675     // Tile Row based BRC
4676     if (m_enableTileReplay)
4677     {
4678         uint32_t shift                        = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
4679         uint32_t residual                     = (1 << shift) - 1;
4680         hucVdencBrcInitDmem->SlideWindowRC    = 0;  //Reserved for now
4681         hucVdencBrcInitDmem->MaxLogCUSize     = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
4682         hucVdencBrcInitDmem->FrameWidthInLCU  = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift;
4683         hucVdencBrcInitDmem->FrameHeightInLCU = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift;
4684     }
4685 
4686     // Long term reference
4687     hucVdencBrcInitDmem->LongTermRefEnable_U8  = true;
4688     hucVdencBrcInitDmem->LongTermRefMsdk_U8 = true;
4689     hucVdencBrcInitDmem->IsLowDelay_U8 = m_lowDelay;
4690 
4691     hucVdencBrcInitDmem->LookaheadDepth_U8 = m_lookaheadDepth;
4692 
4693     m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx]);
4694 
4695     return eStatus;
4696 }
4697 
SetConstDataHuCBrcUpdate()4698 MOS_STATUS CodechalVdencHevcStateG12::SetConstDataHuCBrcUpdate()
4699 {
4700     CODECHAL_ENCODE_FUNCTION_ENTER;
4701 
4702     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4703 
4704     MOS_LOCK_PARAMS lockFlagsWriteOnly;
4705     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
4706     lockFlagsWriteOnly.WriteOnly = true;
4707 
4708     auto hucConstData = (PCODECHAL_VDENC_HEVC_HUC_BRC_CONSTANT_DATA_G12)m_osInterface->pfnLockResource(
4709         m_osInterface, &m_vdencBrcConstDataBuffer[m_currRecycledBufIdx], &lockFlagsWriteOnly);
4710     CODECHAL_ENCODE_CHK_NULL_RETURN(hucConstData);
4711 
4712     MOS_SecureMemcpy(hucConstData->SLCSZ_THRDELTAI_U16, sizeof(m_hucConstantData), m_hucConstantData, sizeof(m_hucConstantData));
4713 
4714     MOS_SecureMemcpy(hucConstData->RDQPLambdaI, sizeof(m_rdQpLambdaI), m_rdQpLambdaI, sizeof(m_rdQpLambdaI));
4715     MOS_SecureMemcpy(hucConstData->RDQPLambdaP, sizeof(m_rdQpLambdaP), m_rdQpLambdaP, sizeof(m_rdQpLambdaP));
4716 
4717     if (m_hevcVisualQualityImprovement)
4718     {
4719         MOS_SecureMemcpy(hucConstData->SADQPLambdaI, sizeof(m_sadQpLambdaI), m_sadQpLambdaI_VQI, sizeof(m_sadQpLambdaI_VQI));
4720         MOS_SecureMemcpy(hucConstData->PenaltyForIntraNonDC32x32PredMode, sizeof(m_penaltyForIntraNonDC32x32PredMode), m_penaltyForIntraNonDC32x32PredMode_VQI, sizeof(m_penaltyForIntraNonDC32x32PredMode_VQI));
4721     }
4722     else
4723     {
4724         MOS_SecureMemcpy(hucConstData->SADQPLambdaI, sizeof(m_sadQpLambdaI), m_sadQpLambdaI, sizeof(m_sadQpLambdaI));
4725         MOS_SecureMemcpy(hucConstData->PenaltyForIntraNonDC32x32PredMode, sizeof(m_penaltyForIntraNonDC32x32PredMode), m_penaltyForIntraNonDC32x32PredMode, sizeof(m_penaltyForIntraNonDC32x32PredMode));
4726     }
4727 
4728     MOS_SecureMemcpy(hucConstData->SADQPLambdaP, sizeof(m_sadQpLambdaP), m_sadQpLambdaP, sizeof(m_sadQpLambdaP));
4729 
4730     if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)
4731     {
4732         const int numEstrateThreshlds = 7;
4733 
4734         for (int i = 0; i < numEstrateThreshlds + 1; i++)
4735         {
4736             for (int j = 0; j < m_numDevThreshlds + 1; j++)
4737             {
4738                 hucConstData->FrmSzAdjTabI_S8[(numEstrateThreshlds + 1)*j + i] = m_lowdelayDeltaFrmszI[j][i];
4739                 hucConstData->FrmSzAdjTabP_S8[(numEstrateThreshlds + 1)*j + i] = m_lowdelayDeltaFrmszP[j][i];
4740                 hucConstData->FrmSzAdjTabB_S8[(numEstrateThreshlds + 1)*j + i] = m_lowdelayDeltaFrmszB[j][i];
4741             }
4742         }
4743     }
4744 
4745     // ModeCosts depends on frame type
4746     if (m_pictureCodingType == I_TYPE)
4747     {
4748         MOS_SecureMemcpy(hucConstData->ModeCosts, sizeof(m_hucModeCostsIFrame), m_hucModeCostsIFrame, sizeof(m_hucModeCostsIFrame));
4749     }
4750     else
4751     {
4752         MOS_SecureMemcpy(hucConstData->ModeCosts, sizeof(m_hucModeCostsPbFrame), m_hucModeCostsPbFrame, sizeof(m_hucModeCostsPbFrame));
4753     }
4754 
4755     // starting location in batch buffer for each slice
4756     uint32_t baseLocation = m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
4757     uint32_t currentLocation = baseLocation;
4758 
4759     auto slcData = m_slcData;
4760     // HCP_WEIGHTSOFFSETS_STATE + HCP_SLICE_STATE + HCP_PAK_INSERT_OBJECT + VDENC_WEIGHT_OFFSETS_STATE
4761     for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
4762     {
4763         auto hevcSlcParams = &m_hevcSliceParams[slcCount];
4764         // HuC FW require unit in Bytes
4765         hucConstData->Slice[slcCount].SizeOfCMDs
4766             = (uint16_t)(m_hwInterface->m_vdencBatchBufferPerSliceConstSize + m_vdencBatchBufferPerSliceVarSize[slcCount]);
4767 
4768         // HCP_WEIGHTOFFSET_STATE cmd
4769         if (m_hevcVdencWeightedPredEnabled)
4770         {
4771             // 1st HCP_WEIGHTOFFSET_STATE cmd - P & B
4772             if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_P_SLICE || hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
4773             {
4774                 hucConstData->Slice[slcCount].HcpWeightOffsetL0_StartInBytes = (uint16_t)currentLocation;   // HCP_WEIGHTOFFSET_L0 starts in byte from beginning of the SLB. 0xFFFF means unavailable in SLB
4775                 currentLocation += m_hcpWeightOffsetStateCmdSize;
4776             }
4777 
4778             // 2nd HCP_WEIGHTOFFSET_STATE cmd - B
4779             if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
4780             {
4781                 hucConstData->Slice[slcCount].HcpWeightOffsetL1_StartInBytes = (uint16_t)currentLocation; // HCP_WEIGHTOFFSET_L1 starts in byte from beginning of the SLB. 0xFFFF means unavailable in SLB
4782                 currentLocation += m_hcpWeightOffsetStateCmdSize;
4783             }
4784         }
4785         else
4786         {
4787             // 0xFFFF means unavailable in SLB
4788             hucConstData->Slice[slcCount].HcpWeightOffsetL0_StartInBytes = 0xFFFF;
4789             hucConstData->Slice[slcCount].HcpWeightOffsetL1_StartInBytes = 0xFFFF;
4790         }
4791 
4792         // HCP_SLICE_STATE cmd
4793         hucConstData->Slice[slcCount].SliceState_StartInBytes = (uint16_t)currentLocation;  // HCP_WEIGHTOFFSET is not needed
4794         currentLocation += m_hcpSliceStateCmdSize;
4795 
4796         // VDENC_WEIGHT_OFFSETS_STATE cmd
4797         hucConstData->Slice[slcCount].VdencWeightOffset_StartInBytes                      // VdencWeightOffset cmd is the last one expect BatchBufferEnd cmd
4798             = (uint16_t)(baseLocation + hucConstData->Slice[slcCount].SizeOfCMDs - m_vdencWeightOffsetStateCmdSize - m_miBatchBufferEndCmdSize - ENCODE_VDENC_HEVC_PADDING_DW_SIZE * 4);
4799 
4800         // logic from PakInsertObject cmd
4801         uint32_t bitSize         = (m_hevcSeqParams->SliceSizeControl) ? (hevcSlcParams->BitLengthSliceHeaderStartingPortion) : slcData[slcCount].BitSize;  // 40 for HEVC VDEnc Dynamic Slice
4802         uint32_t byteSize = (bitSize + 7) >> 3;
4803         uint32_t sliceHeaderSizeInBytes = (bitSize + 7) >> 3;
4804         // 1st PakInsertObject cmd with AU, SPS, PPS headers only exists for the first slice
4805         if (slcCount == 0)
4806         {
4807             // assumes that there is no 3rd PakInsertObject cmd for SSC
4808             currentLocation += m_1stPakInsertObjectCmdSize;
4809         }
4810 
4811         hucConstData->Slice[slcCount].SliceHeaderPIO_StartInBytes = (uint16_t)currentLocation;
4812 
4813         // HuC FW requires true slice header size in bits without byte alignment
4814         hucConstData->Slice[slcCount].SliceHeader_SizeInBits = (uint16_t)(sliceHeaderSizeInBytes * 8);
4815         if (!IsFirstPass())
4816         {
4817             PBSBuffer bsBuffer = &m_bsBuffer;
4818             CODECHAL_ENCODE_CHK_NULL_RETURN(bsBuffer);
4819             CODECHAL_ENCODE_CHK_NULL_RETURN(bsBuffer->pBase);
4820             uint8_t *sliceHeaderLastByte = (uint8_t*)(bsBuffer->pBase + slcData[slcCount].SliceOffset + sliceHeaderSizeInBytes - 1);
4821             for (auto i = 0; i < 8; i++)
4822             {
4823                 uint8_t mask = 1 << i;
4824                 if (*sliceHeaderLastByte & mask)
4825                 {
4826                     hucConstData->Slice[slcCount].SliceHeader_SizeInBits -= (i + 1);
4827                     break;
4828                 }
4829             }
4830         }
4831 
4832         if (m_hevcVdencWeightedPredEnabled)
4833         {
4834             hucConstData->Slice[slcCount].WeightTable_StartInBits = (uint16_t)hevcSlcParams->PredWeightTableBitOffset;
4835             hucConstData->Slice[slcCount].WeightTable_EndInBits = (uint16_t)(hevcSlcParams->PredWeightTableBitOffset + (hevcSlcParams->PredWeightTableBitLength));
4836         }
4837         else
4838         {
4839             // number of bits from beginning of slice header, 0xffff means not awailable
4840             hucConstData->Slice[slcCount].WeightTable_StartInBits = 0xFFFF;
4841             hucConstData->Slice[slcCount].WeightTable_EndInBits = 0xFFFF;
4842         }
4843 
4844         baseLocation += hucConstData->Slice[slcCount].SizeOfCMDs;
4845         currentLocation = baseLocation;
4846     }
4847 
4848     if (m_lookaheadDepth > 0)
4849     {
4850         hucConstData->UPD_LA_TargetFulness_U32 = m_targetBufferFulness;
4851 
4852         uint8_t QpStrength = (uint8_t)(m_hevcPicParams->QpModulationStrength + (m_hevcPicParams->QpModulationStrength >> 1));
4853         if (!m_initDeltaQP)
4854         {
4855             hucConstData->UPD_deltaQP = (m_prevQpModulationStrength + QpStrength + 1) >> 1;
4856         }
4857         else
4858         {
4859             hucConstData->UPD_deltaQP = QpStrength;
4860 
4861             if (IsLastPass())
4862             {
4863                 m_initDeltaQP = false;
4864             }
4865         }
4866 
4867         m_prevQpModulationStrength = hucConstData->UPD_deltaQP;
4868     }
4869 
4870     hucConstData->UPD_TR_TargetSize_U32 = m_hevcPicParams->TargetFrameSize << 3;// byte to bit
4871     hucConstData->UPD_TCBRC_SCENARIO_U8 = m_tcbrcQualityBoost;
4872 
4873     m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcConstDataBuffer[m_currRecycledBufIdx]);
4874 
4875     return eStatus;
4876 }
4877 
SetDmemHuCBrcUpdate()4878 MOS_STATUS CodechalVdencHevcStateG12::SetDmemHuCBrcUpdate()
4879 {
4880     CODECHAL_ENCODE_FUNCTION_ENTER;
4881 
4882     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4883 
4884     MOS_LOCK_PARAMS lockFlagsWriteOnly;
4885     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
4886     lockFlagsWriteOnly.WriteOnly = true;
4887     uint32_t currentPass = m_enableTileReplay ? m_CurrentPassForOverAll : GetCurrentPass();
4888 
4889     // Program update DMEM
4890     auto hucVdencBrcUpdateDmem = (PCODECHAL_VDENC_HEVC_HUC_BRC_UPDATE_DMEM_G12)m_osInterface->pfnLockResource(
4891         m_osInterface, &m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][currentPass], &lockFlagsWriteOnly);
4892     CODECHAL_ENCODE_CHK_NULL_RETURN(hucVdencBrcUpdateDmem);
4893     MOS_ZeroMemory(hucVdencBrcUpdateDmem, sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_UPDATE_DMEM_G12));
4894 
4895     hucVdencBrcUpdateDmem->TARGETSIZE_U32 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)? m_hevcSeqParams->InitVBVBufferFullnessInBit :
4896                                             MOS_MIN(m_hevcSeqParams->InitVBVBufferFullnessInBit, m_hevcSeqParams->VBVBufferSizeInBit);
4897     hucVdencBrcUpdateDmem->FrameID_U32 = m_storeData;    // frame number
4898     MOS_SecureMemcpy(hucVdencBrcUpdateDmem->startGAdjFrame_U16, 4 * sizeof(uint16_t), (void*)m_startGAdjFrame, 4 * sizeof(uint16_t));
4899     hucVdencBrcUpdateDmem->TargetSliceSize_U16           = (uint16_t)m_hevcPicParams->MaxSliceSizeInBytes;
4900     auto slbSliceSize = (m_hwInterface->m_vdenc2ndLevelBatchBufferSize - m_hwInterface->m_vdencBatchBuffer1stGroupSize -
4901         m_hwInterface->m_vdencBatchBuffer2ndGroupSize) / ENCODE_HEVC_VDENC_NUM_MAX_SLICES;
4902     hucVdencBrcUpdateDmem->SLB_Data_SizeInBytes = (uint16_t)(slbSliceSize * m_numSlices +
4903         m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize);
4904     hucVdencBrcUpdateDmem->PIPE_MODE_SELECT_StartInBytes = 0xFFFF;    // HuC need not need to modify the pipe mode select command in Gen11+
4905     hucVdencBrcUpdateDmem->CMD1_StartInBytes = (uint16_t)m_hwInterface->m_vdencBatchBuffer1stGroupSize;
4906     hucVdencBrcUpdateDmem->PIC_STATE_StartInBytes = (uint16_t)m_picStateCmdStartInBytes;
4907     hucVdencBrcUpdateDmem->CMD2_StartInBytes = (uint16_t)m_cmd2StartInBytes;
4908 
4909     if (m_prevStoreData != m_storeData)
4910     {
4911         m_prevStoreData = m_storeData;
4912 
4913         int32_t oldestIdx = -1;
4914         int32_t selectedSlot = -1;
4915         uint32_t oldestAge = 0;
4916         for (int i = 0; i < CODECHAL_ENCODE_HEVC_VDENC_WP_DATA_BLOCK_NUMBER; i++)
4917         {
4918             if (slotInfo[i].isUsed == true && slotInfo[i].isRef)
4919             {
4920                 slotInfo[i].age++;
4921                 if (slotInfo[i].age >= oldestAge)
4922                 {
4923                     oldestAge = slotInfo[i].age;
4924                     oldestIdx = i;
4925                 }
4926             }
4927             if ((selectedSlot == -1) && (slotInfo[i].isUsed == false || !slotInfo[i].isRef))
4928             {
4929                 selectedSlot = i;
4930             }
4931         }
4932 
4933         if (selectedSlot == -1)
4934         {
4935             selectedSlot = oldestIdx;
4936         }
4937 
4938         if (selectedSlot == -1)
4939         {
4940             CODECHAL_ENCODE_ASSERTMESSAGE("No valid ref slot index");
4941             return MOS_STATUS_INVALID_PARAMETER;
4942         }
4943 
4944         slotInfo[selectedSlot].age = 0;
4945         slotInfo[selectedSlot].poc = m_hevcPicParams->CurrPicOrderCnt;
4946         slotInfo[selectedSlot].isUsed = true;
4947         slotInfo[selectedSlot].isRef = m_hevcPicParams->bUsedAsRef;
4948 
4949         m_curPicSlot = selectedSlot;
4950     }
4951 
4952     hucVdencBrcUpdateDmem->Current_Data_Offset = m_curPicSlot * m_weightHistSize;
4953 
4954     for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++)
4955     {
4956         CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][refIdx];
4957         auto refPOC = m_hevcPicParams->RefFramePOCList[refPic.FrameIdx];
4958         for (int i = 0; i < CODECHAL_ENCODE_HEVC_VDENC_WP_DATA_BLOCK_NUMBER; i++)
4959         {
4960             if (slotInfo[i].poc == refPOC)
4961             {
4962                 hucVdencBrcUpdateDmem->Ref_Data_Offset[refIdx] = i * m_weightHistSize;
4963                 break;
4964             }
4965         }
4966     }
4967 
4968     for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l1_active_minus1; refIdx++)
4969     {
4970         CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_1][refIdx];
4971         auto refPOC = m_hevcPicParams->RefFramePOCList[refPic.FrameIdx];
4972         for (int i = 0; i < CODECHAL_ENCODE_HEVC_VDENC_WP_DATA_BLOCK_NUMBER; i++)
4973         {
4974             if (slotInfo[i].poc == refPOC)
4975             {
4976                 hucVdencBrcUpdateDmem->Ref_Data_Offset[refIdx + m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1] = i * m_weightHistSize;
4977                 break;
4978             }
4979         }
4980     }
4981 
4982     hucVdencBrcUpdateDmem->MaxNumSliceAllowed_U16 = (uint16_t)GetMaxAllowedSlices(m_hevcSeqParams->Level);
4983 
4984     if (m_FrameLevelBRCForTileRow)
4985     {
4986         hucVdencBrcUpdateDmem->OpMode_U8 = 0x4;
4987     }
4988     else if (m_TileRowLevelBRC)
4989     {
4990         hucVdencBrcUpdateDmem->OpMode_U8 = 0x8;
4991     }
4992     else
4993     {
4994         hucVdencBrcUpdateDmem->OpMode_U8         // 1: BRC (including ACQP), 2: Weighted prediction (should not be enabled in first pass)
4995             = (m_hevcVdencWeightedPredEnabled && m_hevcPicParams->bEnableGPUWeightedPrediction && !IsFirstPass()) ? 3 : 1;    // 01: BRC, 10: WP never used,  11: BRC + WP
4996     }
4997 
4998     bool bAllowedPyramid = m_hevcSeqParams->GopRefDist != 3;
4999 
5000     if (m_pictureCodingType == I_TYPE)
5001     {
5002         hucVdencBrcUpdateDmem->CurrentFrameType_U8 = HEVC_BRC_FRAME_TYPE_I;
5003     }
5004     else if (m_hevcSeqParams->HierarchicalFlag && bAllowedPyramid)
5005     {
5006         if (m_hevcPicParams->HierarchLevelPlus1 > 0)
5007         {
5008             std::map<int, HEVC_BRC_FRAME_TYPE> hierchLevelPlus1_to_brclevel{
5009             {1, HEVC_BRC_FRAME_TYPE_P_OR_LB},
5010             {2, HEVC_BRC_FRAME_TYPE_B},
5011             {3, HEVC_BRC_FRAME_TYPE_B1},
5012             {4, HEVC_BRC_FRAME_TYPE_B2}};
5013             hucVdencBrcUpdateDmem->CurrentFrameType_U8 = hierchLevelPlus1_to_brclevel.count(m_hevcPicParams->HierarchLevelPlus1) ? hierchLevelPlus1_to_brclevel[m_hevcPicParams->HierarchLevelPlus1] : HEVC_BRC_FRAME_TYPE_INVALID;
5014             //Invalid HierarchLevelPlus1 or LBD frames at level 3 eror check.
5015             if ((hucVdencBrcUpdateDmem->CurrentFrameType_U8 == HEVC_BRC_FRAME_TYPE_INVALID) ||
5016                 (m_hevcSeqParams->LowDelayMode && hucVdencBrcUpdateDmem->CurrentFrameType_U8 == HEVC_BRC_FRAME_TYPE_B2))
5017             {
5018                 CODECHAL_ENCODE_ASSERTMESSAGE("HEVC_BRC_FRAME_TYPE_INVALID or LBD picture doesn't support Level 4\n");
5019                 return MOS_STATUS_INVALID_PARAMETER;
5020             }
5021         }
5022         else if(!m_hevcSeqParams->LowDelayMode) //RA
5023         {
5024             //if L0/L1 both points to previous frame, then its LBD otherwise its is level 1 RA B.
5025             auto B_or_LDB_brclevel = m_lowDelay ? HEVC_BRC_FRAME_TYPE_P_OR_LB : HEVC_BRC_FRAME_TYPE_B;
5026             std::map<int, HEVC_BRC_FRAME_TYPE> codingtype_to_brclevel{
5027             {P_TYPE, HEVC_BRC_FRAME_TYPE_P_OR_LB},
5028             {B_TYPE, B_or_LDB_brclevel},
5029             {B1_TYPE, HEVC_BRC_FRAME_TYPE_B1},
5030             {B2_TYPE, HEVC_BRC_FRAME_TYPE_B2}};
5031              hucVdencBrcUpdateDmem->CurrentFrameType_U8 = codingtype_to_brclevel.count(m_pictureCodingType) ? codingtype_to_brclevel[m_pictureCodingType] : HEVC_BRC_FRAME_TYPE_INVALID;
5032             //Invalid CodingType.
5033             if (hucVdencBrcUpdateDmem->CurrentFrameType_U8 == HEVC_BRC_FRAME_TYPE_INVALID)
5034             {
5035                 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid CodingType\n");
5036                 return MOS_STATUS_INVALID_PARAMETER;
5037             }
5038         }
5039         else //LDB
5040         {
5041             hucVdencBrcUpdateDmem->CurrentFrameType_U8 = HEVC_BRC_FRAME_TYPE_P_OR_LB; //No Hierarchical info for LDB, treated as flat case
5042         }
5043     }
5044     else // FlatB or LDB
5045     {
5046         hucVdencBrcUpdateDmem->CurrentFrameType_U8 = m_lowDelay ? HEVC_BRC_FRAME_TYPE_P_OR_LB : HEVC_BRC_FRAME_TYPE_B;
5047     }
5048 
5049     // Num_Ref_L1 should be always same as Num_Ref_L0
5050     hucVdencBrcUpdateDmem->Num_Ref_L0_U8 = m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1;
5051     hucVdencBrcUpdateDmem->Num_Ref_L1_U8 = m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1;
5052     hucVdencBrcUpdateDmem->Num_Slices    = (uint8_t)m_hevcPicParams->NumSlices;
5053 
5054     // CQP_QPValue_U8 setting is needed since ACQP is also part of ICQ
5055     hucVdencBrcUpdateDmem->CQP_QPValue_U8 = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
5056     hucVdencBrcUpdateDmem->CQP_FracQP_U8 = 0;
5057     if (m_hevcPicParams->BRCPrecision == 1)
5058     {
5059         hucVdencBrcUpdateDmem->MaxNumPass_U8 = 1;
5060     }
5061     else
5062     {
5063         hucVdencBrcUpdateDmem->MaxNumPass_U8 = CODECHAL_VDENC_BRC_NUM_OF_PASSES;
5064     }
5065 
5066     MOS_SecureMemcpy(hucVdencBrcUpdateDmem->gRateRatioThreshold_U8, 7 * sizeof(uint8_t), (void*)m_rateRatioThreshold, 7 * sizeof(uint8_t));
5067     MOS_SecureMemcpy(hucVdencBrcUpdateDmem->startGAdjMult_U8, 5 * sizeof(uint8_t), (void*)m_startGAdjMult, 5 * sizeof(uint8_t));
5068     MOS_SecureMemcpy(hucVdencBrcUpdateDmem->startGAdjDiv_U8, 5 * sizeof(uint8_t), (void*)m_startGAdjDiv, 5 * sizeof(uint8_t));
5069     MOS_SecureMemcpy(hucVdencBrcUpdateDmem->gRateRatioThresholdQP_U8, 8 * sizeof(uint8_t), (void*)m_rateRatioThresholdQP, 8 * sizeof(uint8_t));
5070 
5071     hucVdencBrcUpdateDmem->IPAverageCoeff_U8 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW) ? 0 : 64;
5072     hucVdencBrcUpdateDmem->CurrentPass_U8 = (uint8_t)currentPass;
5073 
5074     if ((m_hevcVdencAcqpEnabled && m_hevcSeqParams->QpAdjustment) || (m_brcEnabled && (m_hevcSeqParams->MBBRC != 2)))
5075     {
5076         hucVdencBrcUpdateDmem->DeltaQPForSadZone0_S8 = -1;
5077         hucVdencBrcUpdateDmem->DeltaQPForSadZone1_S8 = 0;
5078         hucVdencBrcUpdateDmem->DeltaQPForSadZone2_S8 = 1;
5079         hucVdencBrcUpdateDmem->DeltaQPForSadZone3_S8 = 2;
5080         hucVdencBrcUpdateDmem->DeltaQPForMvZero_S8   = 3;
5081         hucVdencBrcUpdateDmem->DeltaQPForMvZone0_S8  = -2;
5082         hucVdencBrcUpdateDmem->DeltaQPForMvZone1_S8  = 0;
5083         hucVdencBrcUpdateDmem->DeltaQPForMvZone2_S8  = 2;
5084     }
5085 
5086     if (m_hevcVdencWeightedPredEnabled)
5087     {
5088         hucVdencBrcUpdateDmem->LumaLog2WeightDenom_S8 = 6;
5089         hucVdencBrcUpdateDmem->ChromaLog2WeightDenom_S8 = 6;
5090     }
5091 
5092     // chroma weights are not confirmed to be supported from HW team yet
5093     hucVdencBrcUpdateDmem->DisabledFeature_U8 = 0; // bit mask, 1 (bit0): disable chroma weight setting
5094 
5095     hucVdencBrcUpdateDmem->SlidingWindow_Enable_U8 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_LOW);
5096     hucVdencBrcUpdateDmem->LOG_LCU_Size_U8 = 6;
5097     hucVdencBrcUpdateDmem->ReEncodePositiveQPDeltaThr_S8    = 4;
5098     hucVdencBrcUpdateDmem->ReEncodeNegativeQPDeltaThr_S8    = -5;
5099     hucVdencBrcUpdateDmem->SceneChgPrevIntraPctThreshold_U8 = 96;
5100     hucVdencBrcUpdateDmem->SceneChgCurIntraPctThreshold_U8  = 192;
5101 
5102     // SCC is in conflict with PAK only pass
5103     if (m_enableSCC)
5104     {
5105         hucVdencBrcUpdateDmem->ReEncodePositiveQPDeltaThr_S8 = 0;
5106         hucVdencBrcUpdateDmem->ReEncodeNegativeQPDeltaThr_S8 = 0;
5107     }
5108 
5109     // reset skip frame statistics
5110     m_numSkipFrames = 0;
5111     m_sizeSkipFrames = 0;
5112 
5113     // For tile row based BRC
5114     if (m_TileRowLevelBRC)
5115     {
5116         hucVdencBrcUpdateDmem->MaxNumTileHuCCallMinus1 = m_hevcPicParams->num_tile_rows_minus1;
5117         hucVdencBrcUpdateDmem->TileHucCallIndex        = (uint8_t)m_CurrentTileRow;
5118         hucVdencBrcUpdateDmem->TileHuCCallPassIndex    = m_CurrentPassForTileReplay + 1;
5119         hucVdencBrcUpdateDmem->TileHuCCallPassMax      = m_NumPassesForTileReplay;
5120 
5121         // Need change App to pass real max bit rate rather than to enlarge it with 1000
5122         if (m_hevcSeqParams->FrameRate.Numerator)
5123         {
5124             hucVdencBrcUpdateDmem->TxSizeInBitsPerFrame = (uint32_t)(((uint32_t)m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS *
5125                 m_hevcSeqParams->FrameRate.Denominator + (m_hevcSeqParams->FrameRate.Numerator >> 1)) /
5126                 m_hevcSeqParams->FrameRate.Numerator);
5127         }
5128         else
5129         {
5130             hucVdencBrcUpdateDmem->TxSizeInBitsPerFrame = (uint32_t)(((uint32_t)m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS + 15) / 30);
5131         }
5132 
5133         uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
5134         uint32_t startIdx       = m_CurrentTileRow * numTileColumns;
5135         uint32_t endIdx         = startIdx + numTileColumns - 1;
5136         uint32_t LCUsInTile     = 0;
5137 
5138         for (uint32_t idx = 0; idx < numTileColumns; idx ++)
5139         {
5140             LCUsInTile += m_hevcPicParams->tile_row_height[m_CurrentTileRow] * m_hevcPicParams->tile_column_width[idx];
5141         }
5142 
5143         hucVdencBrcUpdateDmem->StartTileIdx            = (uint8_t)startIdx;
5144         hucVdencBrcUpdateDmem->EndTileIdx              = (uint8_t)endIdx;
5145         hucVdencBrcUpdateDmem->TileSizeInLCU           = (uint16_t)LCUsInTile;
5146     }
5147     else if (m_FrameLevelBRCForTileRow)
5148     {
5149         hucVdencBrcUpdateDmem->MaxNumTileHuCCallMinus1 = m_hevcPicParams->num_tile_rows_minus1;
5150         hucVdencBrcUpdateDmem->TileHucCallIndex        = 0;
5151         hucVdencBrcUpdateDmem->TileHuCCallPassIndex    = 0;
5152         hucVdencBrcUpdateDmem->TileHuCCallPassMax      = m_NumPassesForTileReplay;
5153 
5154         // Need change App to pass real max bit rate rather than to enlarge it with 1000
5155         if (m_hevcSeqParams->FrameRate.Numerator)
5156         {
5157             hucVdencBrcUpdateDmem->TxSizeInBitsPerFrame = (uint32_t)(((uint32_t)m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS *
5158                 m_hevcSeqParams->FrameRate.Denominator + (m_hevcSeqParams->FrameRate.Numerator >> 1)) /
5159                 m_hevcSeqParams->FrameRate.Numerator);
5160         }
5161         else
5162         {
5163             hucVdencBrcUpdateDmem->TxSizeInBitsPerFrame = (uint32_t)(((uint32_t)m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS + 15) / 30);
5164         }
5165     }
5166 
5167     // Long term reference
5168     hucVdencBrcUpdateDmem->IsLongTermRef = CodecHal_PictureIsLongTermRef(m_currReconstructedPic);
5169     hucVdencBrcUpdateDmem->UPD_CQMEnabled_U8 = m_hevcSeqParams->scaling_list_enable_flag || m_hevcPicParams->scaling_list_data_present_flag;
5170 
5171     if (m_lookaheadDepth > 0)
5172     {
5173         hucVdencBrcUpdateDmem->EnableLookAhead = 1;
5174     }
5175     m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][currentPass]);
5176 
5177     return eStatus;
5178 }
5179 
SetRegionsHuCBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)5180 MOS_STATUS CodechalVdencHevcStateG12::SetRegionsHuCBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
5181 {
5182     CODECHAL_ENCODE_FUNCTION_ENTER;
5183 
5184     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5185 
5186     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::SetRegionsHuCBrcUpdate(virtualAddrParams));
5187 
5188     // With multiple tiles, ensure that HuC BRC kernel is fed with vdenc frame level statistics from HuC PAK Int kernel
5189     // Applicable for scalable/ non-scalable mode
5190     if (m_hevcPicParams->tiles_enabled_flag)
5191     {
5192         virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource;  // Region 1  VDEnc Statistics Buffer (Input) - VDENC_HEVC_VP9_FRAME_BASED_STATISTICS_STREAMOUT
5193         virtualAddrParams->regionParams[1].dwOffset   = m_hevcFrameStatsOffset.uiVdencStatistics;
5194     }
5195 
5196     if (m_numPipe > 1)
5197     {
5198         virtualAddrParams->regionParams[2].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource;  // Region 2  PAK Statistics Buffer (Input) - MFX_PAK_FRAME_STATISTICS
5199         virtualAddrParams->regionParams[2].dwOffset   = m_hevcFrameStatsOffset.uiHevcPakStatistics;
5200         virtualAddrParams->regionParams[7].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource;  // Region 7  Slice Stat Streamout (Input)
5201         virtualAddrParams->regionParams[7].dwOffset   = m_hevcFrameStatsOffset.uiHevcSliceStreamout;
5202         // In scalable-mode, use PAK Integration kernel output to get bistream size
5203         virtualAddrParams->regionParams[8].presRegion   = &m_resBrcDataBuffer;
5204     }
5205 
5206     // Tile reset case, use previous frame BRC data
5207     if ((m_numPipe != m_numPipePre) && IsFirstPass())
5208     {
5209         if (m_numPipePre > 1)
5210         {
5211             virtualAddrParams->regionParams[8].presRegion   = &m_resBrcDataBuffer;
5212         }
5213         else
5214         {
5215             virtualAddrParams->regionParams[8].presRegion   = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakInfo);
5216         }
5217     }
5218 
5219     return eStatus;
5220 }
5221 
SetRegionsHuCTileRowBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)5222 MOS_STATUS CodechalVdencHevcStateG12::SetRegionsHuCTileRowBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
5223 {
5224     CODECHAL_ENCODE_FUNCTION_ENTER;
5225 
5226     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5227 
5228     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::SetRegionsHuCBrcUpdate(virtualAddrParams));
5229 
5230     // For tile replay, the tile based statistics is directly passed to HUC kernel
5231     virtualAddrParams->regionParams[1].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;  // Region 1 � VDEnc Statistics Buffer (Input)
5232     virtualAddrParams->regionParams[1].dwOffset   = m_hevcTileStatsOffset.uiVdencStatistics;
5233 
5234     virtualAddrParams->regionParams[2].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;  // Region 2 � PAK Statistics Buffer (Input)
5235     virtualAddrParams->regionParams[2].dwOffset   = m_hevcTileStatsOffset.uiHevcPakStatistics;
5236 
5237     virtualAddrParams->regionParams[7].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;  // Region 7 � Slice Stat Streamout (Input)
5238     virtualAddrParams->regionParams[7].dwOffset   = m_hevcTileStatsOffset.uiHevcSliceStreamout;
5239 
5240     virtualAddrParams->regionParams[12].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource;             // Region 12 � Tile encoded information (Input)
5241 
5242     return eStatus;
5243 }
5244 
SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceStateParams)5245 void CodechalVdencHevcStateG12::SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE& sliceStateParams)
5246 {
5247     CODECHAL_ENCODE_FUNCTION_ENTER;
5248 
5249     CodechalVdencHevcState::SetHcpSliceStateCommonParams(sliceStateParams);
5250 
5251     static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12 &>(sliceStateParams).dwNumPipe = m_numPipe;
5252 
5253     if (m_enableSCC)
5254     {
5255         static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12 &>(sliceStateParams).ucRecNotFilteredID = m_slotForRecNotFiltered;
5256     }
5257 }
5258 
SetHcpSliceStateParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceState,PCODEC_ENCODER_SLCDATA slcData,uint16_t slcCount,PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileCodingParams,bool lastSliceInTile,uint32_t idx)5259 void CodechalVdencHevcStateG12::SetHcpSliceStateParams(
5260     MHW_VDBOX_HEVC_SLICE_STATE&           sliceState,
5261     PCODEC_ENCODER_SLCDATA                slcData,
5262     uint16_t                              slcCount,
5263     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileCodingParams,
5264     bool                                  lastSliceInTile,
5265     uint32_t                              idx)
5266 {
5267     CODECHAL_ENCODE_FUNCTION_ENTER;
5268 
5269     CodechalEncodeHevcBase::SetHcpSliceStateParams(sliceState, slcData, slcCount);
5270 
5271     sliceState.bLastSliceInTile = lastSliceInTile ? true : false;
5272     sliceState.bLastSliceInTileColumn = (lastSliceInTile & tileCodingParams[idx].IsLastTileofColumn) ? true : false;
5273     static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12&>(sliceState).pTileCodingParams = tileCodingParams + idx;
5274     static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12&>(sliceState).dwTileID = idx;
5275 
5276     // update pass status
5277     if (m_enableTileReplay && m_FrameLevelBRCForTileRow)
5278     {
5279         sliceState.bFirstPass = true;
5280         sliceState.bLastPass  = false;
5281     }
5282     else if (m_enableTileReplay && m_TileRowLevelBRC)
5283     {
5284         sliceState.bFirstPass = IsFirstPassForTileReplay();
5285         sliceState.bLastPass  = IsLastPassForTileReplay();
5286     }
5287 }
5288 
SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS & vdboxPipeModeSelectParams)5289 void CodechalVdencHevcStateG12::SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS& vdboxPipeModeSelectParams)
5290 {
5291     CODECHAL_ENCODE_FUNCTION_ENTER;
5292 
5293     CodechalEncodeHevcBase::SetHcpPipeModeSelectParams(vdboxPipeModeSelectParams);
5294 
5295     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12& pipeModeSelectParams = static_cast<MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12&>(vdboxPipeModeSelectParams);
5296 
5297     if (m_numPipe > 1)
5298     {
5299         // Running in the multiple VDBOX mode
5300         if (IsFirstPipe())
5301         {
5302             pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT;
5303         }
5304         else if (IsLastPipe())
5305         {
5306             pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT;
5307         }
5308         else
5309         {
5310             pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE;
5311         }
5312         pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_CODEC_BE;
5313     }
5314     else
5315     {
5316         pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
5317         pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY;
5318     }
5319 
5320     // In single pipe mode, if TileBasedReplayMode is enabled, the bit stream for each tile will not be continuous
5321     if (m_hevcPicParams->tiles_enabled_flag)
5322     {
5323         pipeModeSelectParams.bTileBasedReplayMode = m_enableTileReplay;
5324     }
5325     else
5326     {
5327         pipeModeSelectParams.bTileBasedReplayMode = 0;
5328     }
5329 
5330     // To enable VDENC/PAK statistics stream out for BRC only
5331     // Is stream out needed for ACQP? check this out!
5332     pipeModeSelectParams.bBRCEnabled = m_hevcVdencAcqpEnabled || m_vdencBrcEnabled;
5333 }
5334 
SetVdencPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS & vdboxPipeModeSelectParams)5335 void CodechalVdencHevcStateG12::SetVdencPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS& vdboxPipeModeSelectParams)
5336 {
5337     CODECHAL_ENCODE_FUNCTION_ENTER;
5338 
5339     CodechalVdencHevcState::SetVdencPipeModeSelectParams(vdboxPipeModeSelectParams);
5340 
5341     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12& pipeModeSelectParams = static_cast<MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12&>(vdboxPipeModeSelectParams);
5342 
5343     // Enable RGB encoding
5344     pipeModeSelectParams.bRGBEncodingMode  = m_RGBEncodingEnable;
5345 
5346     // Capture mode enable
5347     pipeModeSelectParams.bWirelessEncodeEnabled = m_CaptureModeEnable;
5348     pipeModeSelectParams.ucWirelessSessionId    = 0;
5349 
5350     // Set random access flag
5351     pipeModeSelectParams.bIsRandomAccess        = !m_lowDelay;
5352 
5353     // Set lookahead pass flag
5354     pipeModeSelectParams.bLookaheadPass         = m_lookaheadPass;
5355 
5356 #ifdef _ENCODE_VDENC_RESERVED
5357     if (m_rsvdState)
5358     {
5359         m_rsvdState->SetVdencPipeModeSelectParams(pipeModeSelectParams);
5360     }
5361 #endif
5362 
5363     if (m_enableSCC && (m_hevcPicParams->pps_curr_pic_ref_enabled_flag || m_hevcSeqParams->palette_mode_enabled_flag))
5364     {
5365         pipeModeSelectParams.bVdencPakObjCmdStreamOutEnable = false;
5366     }
5367 }
5368 
SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams)5369 void CodechalVdencHevcStateG12::SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams)
5370 {
5371     CODECHAL_ENCODE_FUNCTION_ENTER;
5372 
5373     CodechalEncodeHevcBase::SetHcpPipeBufAddrParams(pipeBufAddrParams);
5374 
5375     //set MMC flag
5376     if (m_mmcState->IsMmcEnabled())
5377     {
5378         pipeBufAddrParams.bMmcEnabled = true;
5379     }
5380 
5381     PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex];
5382     if (!Mos_ResourceIsNull(&tileStatisticsBuffer->sResource) && (m_numPipe > 1))
5383     {
5384         pipeBufAddrParams.presLcuBaseAddressBuffer     = &tileStatisticsBuffer->sResource;
5385         pipeBufAddrParams.dwLcuStreamOutOffset         = m_hevcTileStatsOffset.uiHevcSliceStreamout;
5386         pipeBufAddrParams.presFrameStatStreamOutBuffer = &tileStatisticsBuffer->sResource;
5387         pipeBufAddrParams.dwFrameStatStreamOutOffset   = m_hevcTileStatsOffset.uiHevcPakStatistics;
5388     }
5389 
5390     // SAO Row Store is GEN12 specific
5391     pipeBufAddrParams.presSaoRowStoreBuffer = &m_vdencSAORowStoreBuffer;
5392 
5393     // Set up the recon not filtered surface for IBC
5394     if (m_enableSCC && m_hevcPicParams->pps_curr_pic_ref_enabled_flag)
5395     {
5396         // I frame is much simpler
5397         if (m_pictureCodingType == I_TYPE)
5398         {
5399             pipeBufAddrParams.presReferences[0] = &m_vdencRecNotFilteredBuffer;
5400             m_slotForRecNotFiltered             = 0;
5401         }
5402         // B frame
5403         else
5404         {
5405             unsigned int i;
5406             // Find one available slot
5407             for (i = 0; i < CODECHAL_MAX_CUR_NUM_REF_FRAME_HEVC; i++)
5408             {
5409                 if (pipeBufAddrParams.presReferences[i] == nullptr)
5410                 {
5411                     break;
5412                 }
5413             }
5414 
5415             CODECHAL_ENCODE_ASSERT(i < CODECHAL_MAX_CUR_NUM_REF_FRAME_HEVC);
5416 
5417             //record the slot for HCP_REF_IDX_STATE
5418             m_slotForRecNotFiltered             = (unsigned char)i;
5419             pipeBufAddrParams.presReferences[i] = &m_vdencRecNotFilteredBuffer;
5420         }
5421     }
5422 }
5423 
SetHcpPicStateParams(MHW_VDBOX_HEVC_PIC_STATE & picStateParams)5424 void CodechalVdencHevcStateG12::SetHcpPicStateParams(MHW_VDBOX_HEVC_PIC_STATE& picStateParams)
5425 {
5426     CODECHAL_ENCODE_FUNCTION_ENTER;
5427 
5428     CodechalEncodeHevcBase::SetHcpPicStateParams(picStateParams);
5429     if (m_enableSCC)
5430     {
5431         MHW_VDBOX_HEVC_PIC_STATE_G12& picStateParamsGen12 = dynamic_cast<MHW_VDBOX_HEVC_PIC_STATE_G12&>(picStateParams);
5432         picStateParamsGen12.ucRecNotFilteredID  = m_slotForRecNotFiltered;
5433         picStateParamsGen12.IBCControl = m_enableLBCOnly ? SCC_IBC_CONTROL_IBC_ONLY_LBC_G12 : SCC_IBC_CONTROL_IBC_ENABLED_TBCLBC_G12;
5434     }
5435 }
5436 
AddHcpRefIdxCmd(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_BATCH_BUFFER batchBuffer,PMHW_VDBOX_HEVC_SLICE_STATE params)5437 MOS_STATUS CodechalVdencHevcStateG12::AddHcpRefIdxCmd(
5438     PMOS_COMMAND_BUFFER cmdBuffer,
5439     PMHW_BATCH_BUFFER batchBuffer,
5440     PMHW_VDBOX_HEVC_SLICE_STATE params)
5441 {
5442     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5443 
5444     CODECHAL_ENCODE_FUNCTION_ENTER;
5445 
5446     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
5447     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcSliceParams);
5448     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcPicParams);
5449 
5450     if (cmdBuffer == nullptr && batchBuffer == nullptr)
5451     {
5452         CODECHAL_ENCODE_ASSERTMESSAGE("There was no valid buffer to add the HW command to.");
5453         return MOS_STATUS_NULL_POINTER;
5454     }
5455 
5456     PCODEC_HEVC_ENCODE_PICTURE_PARAMS hevcPicParams = params->pEncodeHevcPicParams;
5457     PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = params->pEncodeHevcSliceParams;
5458 
5459     if ((hevcPicParams->pps_curr_pic_ref_enabled_flag) || (hevcSlcParams->slice_type != CODECHAL_ENCODE_HEVC_I_SLICE))
5460     {
5461         MHW_VDBOX_HEVC_REF_IDX_PARAMS_G12 refIdxParams;
5462 
5463         refIdxParams.CurrPic = hevcPicParams->CurrReconstructedPic;
5464         refIdxParams.isEncode = true;
5465         refIdxParams.ucList = LIST_0;
5466         refIdxParams.ucNumRefForList = hevcSlcParams->num_ref_idx_l0_active_minus1 + 1;
5467         eStatus = MOS_SecureMemcpy(&refIdxParams.RefPicList, sizeof(refIdxParams.RefPicList),
5468             &hevcSlcParams->RefPicList, sizeof(hevcSlcParams->RefPicList));
5469         if (eStatus != MOS_STATUS_SUCCESS)
5470         {
5471             CODECHAL_ENCODE_ASSERTMESSAGE("Failed to copy memory.");
5472             return eStatus;
5473         }
5474 
5475         refIdxParams.hevcRefList = (void**)m_refList;
5476         refIdxParams.poc_curr_pic = hevcPicParams->CurrPicOrderCnt;
5477         for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
5478         {
5479             refIdxParams.poc_list[i] = hevcPicParams->RefFramePOCList[i];
5480         }
5481 
5482         refIdxParams.pRefIdxMapping = params->pRefIdxMapping;
5483         refIdxParams.RefFieldPicFlag = 0; // there is no interlaced support in encoder
5484         refIdxParams.RefBottomFieldFlag = 0; // there is no interlaced support in encoder
5485 
5486     if (m_hevcPicParams->pps_curr_pic_ref_enabled_flag)
5487     {
5488         refIdxParams.bIBCEnabled = true;
5489         refIdxParams.ucRecNotFilteredID = m_slotForRecNotFiltered;
5490 
5491         if ((m_hevcPicParams->CodingType == I_TYPE) && (m_hevcSliceParams->slice_type == MhwVdboxHcpInterface::hevcSliceP))
5492         {
5493             refIdxParams.ucNumRefForList = 0;
5494         }
5495     }
5496 
5497         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpRefIdxStateCmd(cmdBuffer, batchBuffer, &refIdxParams));
5498 
5499         if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
5500         {
5501             refIdxParams.ucList = LIST_1;
5502             refIdxParams.ucNumRefForList = hevcSlcParams->num_ref_idx_l1_active_minus1 + 1;
5503             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpRefIdxStateCmd(cmdBuffer, batchBuffer, &refIdxParams));
5504         }
5505     }
5506 
5507     return eStatus;
5508 }
5509 
SetVdencPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams)5510 void CodechalVdencHevcStateG12::SetVdencPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams)
5511 {
5512     CODECHAL_ENCODE_FUNCTION_ENTER;
5513 
5514     CodechalVdencHevcState::SetVdencPipeBufAddrParams(pipeBufAddrParams);
5515 
5516     PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer    = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex];
5517     if (!Mos_ResourceIsNull(&tileStatisticsBuffer->sResource))
5518     {
5519         pipeBufAddrParams.presVdencStreamOutBuffer = &tileStatisticsBuffer->sResource;
5520         pipeBufAddrParams.dwVdencStatsStreamOutOffset = m_hevcTileStatsOffset.uiVdencStatistics;
5521     }
5522 
5523     // Set up the recon not filtered surface for IBC
5524     if (m_enableSCC && m_hevcPicParams->pps_curr_pic_ref_enabled_flag)
5525     {
5526         // I frame is much simpler
5527         if (m_pictureCodingType == I_TYPE)
5528         {
5529             pipeBufAddrParams.presVdencReferences[0] = &m_vdencRecNotFilteredBuffer;
5530         }
5531         // LDB
5532         else
5533         {
5534             unsigned int i;
5535 
5536             // Find one available slot
5537             for (i = 0; i < CODECHAL_MAX_CUR_NUM_REF_FRAME_HEVC; i++)
5538             {
5539                 if (pipeBufAddrParams.presVdencReferences[i] == nullptr)
5540                 {
5541                     break;
5542                 }
5543             }
5544 
5545             CODECHAL_ENCODE_ASSERT(i < CODECHAL_MAX_CUR_NUM_REF_FRAME_HEVC);
5546             if (i != 0)
5547             {
5548                 pipeBufAddrParams.dwNumRefIdxL0ActiveMinus1 += 1;
5549             }
5550             pipeBufAddrParams.presVdencReferences[i] = &m_vdencRecNotFilteredBuffer;
5551         }
5552     }
5553 
5554     pipeBufAddrParams.presVdencTileRowStoreBuffer = &m_vdencTileRowStoreBuffer;
5555     pipeBufAddrParams.presVdencCumulativeCuCountStreamoutSurface = &m_vdencCumulativeCuCountStreamoutSurface;
5556     pipeBufAddrParams.isLowDelayB = m_lowDelay;
5557 }
5558 
SetKernelParams(EncOperation operation,MHW_KERNEL_PARAM * kernelParams)5559 MOS_STATUS CodechalVdencHevcStateG12::SetKernelParams(
5560     EncOperation     operation,
5561     MHW_KERNEL_PARAM *kernelParams)
5562 {
5563     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5564 
5565     CODECHAL_ENCODE_FUNCTION_ENTER;
5566 
5567     CODECHAL_ENCODE_CHK_NULL_RETURN(kernelParams);
5568 
5569     auto curbeAlignment = m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment();
5570 
5571     kernelParams->iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
5572     kernelParams->iIdCount = 1;
5573 
5574     switch (operation)
5575     {
5576     case VDENC_ME_P:
5577     case VDENC_ME_B:
5578     case VDENC_STREAMIN:
5579     case VDENC_STREAMIN_HEVC:
5580     case VDENC_STREAMIN_HEVC_RAB:
5581         kernelParams->iBTCount = CODECHAL_VDENC_HME_END_G12 - CODECHAL_VDENC_HME_BEGIN_G12;
5582         kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MEDIA_OBJECT_HEVC_VP9_VDENC_ME_CURBE_G12), (size_t)curbeAlignment);
5583         kernelParams->iBlockWidth = 32;
5584         kernelParams->iBlockHeight = 32;
5585         break;
5586     default:
5587         CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
5588         eStatus = MOS_STATUS_INVALID_PARAMETER;
5589     }
5590 
5591     return eStatus;
5592 }
5593 
SetBindingTable(EncOperation operation,PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable)5594 MOS_STATUS CodechalVdencHevcStateG12::SetBindingTable(
5595     EncOperation operation,
5596     PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable)
5597 {
5598     CODECHAL_ENCODE_FUNCTION_ENTER;
5599 
5600     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
5601 
5602     CODECHAL_ENCODE_CHK_NULL_RETURN(bindingTable);
5603 
5604     MOS_ZeroMemory(bindingTable, sizeof(*bindingTable));
5605 
5606     switch (operation)
5607     {
5608     case VDENC_ME_P:
5609     case VDENC_ME_B:
5610     case VDENC_STREAMIN:
5611     case VDENC_STREAMIN_HEVC:
5612     case VDENC_STREAMIN_HEVC_RAB:
5613         bindingTable->dwNumBindingTableEntries = CODECHAL_VDENC_HME_END_G12 - CODECHAL_VDENC_HME_BEGIN_G12;
5614         bindingTable->dwBindingTableStartOffset = CODECHAL_VDENC_HME_BEGIN_G12;
5615         break;
5616     default:
5617         CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
5618         return MOS_STATUS_INVALID_PARAMETER;
5619     }
5620 
5621     for (uint32_t i = 0; i < bindingTable->dwNumBindingTableEntries; i++)
5622     {
5623         bindingTable->dwBindingTableEntries[i] = i;
5624     }
5625     return eStatus;
5626 }
5627 
EncodeMeKernel(HmeLevel hmeLevel)5628 MOS_STATUS CodechalVdencHevcStateG12::EncodeMeKernel(HmeLevel hmeLevel)
5629 {
5630     CODECHAL_ENCODE_FUNCTION_ENTER;
5631 
5632     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
5633 
5634     PMHW_KERNEL_STATE kernelState = nullptr;
5635     if(hmeLevel == HME_LEVEL_4x)
5636     {
5637         kernelState = m_lowDelay ? &m_vdencStreaminKernelState : &m_vdencStreaminKernelStateRAB;
5638     }
5639     else
5640     {
5641         kernelState = m_lowDelay ? &m_vdencMeKernelState : &m_vdencMeKernelStateRAB;
5642     }
5643     auto encFunctionType = (hmeLevel == HME_LEVEL_32x) ? CODECHAL_MEDIA_STATE_32X_ME :
5644         (hmeLevel == HME_LEVEL_16x) ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME;
5645 
5646     // If Single Task Phase is not enabled, use BT count for the kernel state.
5647     if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
5648     {
5649         uint32_t maxBtCount = m_singleTaskPhaseSupported ?
5650             m_maxBtCount : kernelState->KernelParams.iBTCount;
5651         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
5652             m_stateHeapInterface,
5653             maxBtCount));
5654         m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
5655         CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
5656     }
5657 
5658     // Set up the DSH/SSH as normal
5659     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
5660         m_stateHeapInterface,
5661         kernelState,
5662         false,
5663         0,
5664         false,
5665         m_storeData));
5666 
5667     MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
5668     MOS_ZeroMemory(&idParams, sizeof(idParams));
5669     idParams.pKernelState = kernelState;
5670     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
5671         m_stateHeapInterface,
5672         1,
5673         &idParams));
5674 
5675     //Setup curbe for StreamIn Kernel
5676     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMeCurbe(hmeLevel));
5677 
5678     CODECHAL_DEBUG_TOOL(
5679     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5680         encFunctionType,
5681         MHW_DSH_TYPE,
5682         kernelState));
5683     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
5684         encFunctionType,
5685         kernelState));
5686     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5687         encFunctionType,
5688         MHW_ISH_TYPE,
5689         kernelState));
5690     )
5691 
5692     MOS_COMMAND_BUFFER cmdBuffer;
5693     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
5694 
5695     SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
5696     sendKernelCmdsParams.EncFunctionType = encFunctionType;
5697     sendKernelCmdsParams.pKernelState = kernelState;
5698     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
5699 
5700     // Add binding table
5701     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
5702         m_stateHeapInterface,
5703         kernelState));
5704 
5705     CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMeSurfaces(hmeLevel, &cmdBuffer));
5706 
5707     // Dump SSH for ME kernel
5708     CODECHAL_DEBUG_TOOL(
5709         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5710             encFunctionType,
5711             MHW_SSH_TYPE,
5712             kernelState)));
5713 
5714     uint32_t scalingFactor = (hmeLevel == HME_LEVEL_32x) ? SCALE_FACTOR_32x :
5715         (hmeLevel == HME_LEVEL_16x) ? SCALE_FACTOR_16x : SCALE_FACTOR_4x;
5716 
5717     uint32_t resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / scalingFactor);
5718     uint32_t resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scalingFactor);
5719 
5720     CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
5721     MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
5722     walkerCodecParams.WalkerMode = m_walkerMode;
5723     walkerCodecParams.dwResolutionX = resolutionX;
5724     walkerCodecParams.dwResolutionY = resolutionY;
5725     walkerCodecParams.bNoDependency = true;
5726     walkerCodecParams.bMbaff = false;
5727     walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported;
5728     walkerCodecParams.ucGroupId = m_groupId;
5729 
5730     MHW_WALKER_PARAMS walkerParams;
5731     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
5732         m_hwInterface,
5733         &walkerParams,
5734         &walkerCodecParams));
5735 
5736     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
5737         &cmdBuffer,
5738         &walkerParams));
5739 
5740     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
5741 
5742     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5743     {
5744         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
5745             m_stateHeapInterface));
5746         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
5747     }
5748 
5749     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
5750         &cmdBuffer,
5751         encFunctionType,
5752         nullptr)));
5753 
5754     m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase);
5755 
5756     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
5757 
5758     MHW_MI_STORE_DATA_PARAMS    storeDataParams;
5759 
5760     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5761     {
5762         m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
5763         m_lastTaskInPhase = false;
5764     }
5765     return eStatus;
5766 }
5767 
SetMeCurbe(HmeLevel hmeLevel)5768 MOS_STATUS CodechalVdencHevcStateG12::SetMeCurbe(HmeLevel hmeLevel)
5769 {
5770     CODECHAL_ENCODE_FUNCTION_ENTER;
5771 
5772     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5773 
5774     CODECHAL_VDENC_HEVC_ME_CURBE_G12 curbe;
5775     CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
5776         &curbe,
5777         sizeof(CODECHAL_VDENC_HEVC_ME_CURBE_G12),
5778         ME_CURBE_INIT_G12,
5779         sizeof(CODECHAL_VDENC_HEVC_ME_CURBE_G12)));
5780 
5781     PMHW_KERNEL_STATE kernelState = nullptr;
5782     if(hmeLevel == HME_LEVEL_4x)
5783     {
5784         kernelState = m_lowDelay ? &m_vdencStreaminKernelState : &m_vdencStreaminKernelStateRAB;
5785     }
5786     else
5787     {
5788         kernelState = m_lowDelay ? &m_vdencMeKernelState : &m_vdencMeKernelStateRAB;
5789     }
5790 
5791     bool useMvFromPrevStep;
5792     bool writeDistortions;
5793     uint32_t scaleFactor;
5794     uint32_t  mvShiftFactor = 0;
5795     uint32_t  prevMvReadPosFactor = 0;
5796 
5797     switch (hmeLevel)
5798     {
5799     case HME_LEVEL_32x:
5800         useMvFromPrevStep = false;
5801         writeDistortions = false;
5802         scaleFactor = SCALE_FACTOR_32x;
5803         mvShiftFactor = 1;
5804         prevMvReadPosFactor = 0;
5805         break;
5806     case HME_LEVEL_16x:
5807         useMvFromPrevStep = (m_b32XMeEnabled) ? true : false;
5808         writeDistortions = false;
5809         scaleFactor = SCALE_FACTOR_16x;
5810         mvShiftFactor = 2;
5811         prevMvReadPosFactor = 1;
5812         break;
5813     case HME_LEVEL_4x:
5814         useMvFromPrevStep = (m_b16XMeEnabled) ? true : false;
5815         writeDistortions = true;
5816         scaleFactor = SCALE_FACTOR_4x;
5817         mvShiftFactor = 2;
5818         prevMvReadPosFactor = 0;
5819         break;
5820     default:
5821         eStatus = MOS_STATUS_INVALID_PARAMETER;
5822         return eStatus;
5823         break;
5824     }
5825 
5826     curbe.DW3.SubPelMode = 3;
5827     curbe.DW4.PictureHeightMinus1 = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scaleFactor) - 1;
5828     curbe.DW4.PictureWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth / scaleFactor);
5829     curbe.DW5.QpPrimeY            = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
5830     curbe.DW6.WriteDistortions = writeDistortions;
5831     curbe.DW6.UseMvFromPrevStep = useMvFromPrevStep;
5832     curbe.DW6.SuperCombineDist = 5;//SuperCombineDist_Generic[pHevcSeqParams->TargetUsage]; Harded coded in KCM
5833     curbe.DW6.MaxVmvR = 511 * 4;
5834     curbe.DW15.MvShiftFactor = mvShiftFactor;
5835     curbe.DW15.PrevMvReadPosFactor = prevMvReadPosFactor;
5836 
5837     if (m_pictureCodingType == B_TYPE)
5838     {
5839         // This field is irrelevant since we are not using the bi-direct search.
5840         curbe.DW1.BiWeight = m_bframeMeBidirectionalWeight;
5841         curbe.DW13.NumRefIdxL1MinusOne = m_hevcSliceParams->num_ref_idx_l1_active_minus1;
5842     }
5843 
5844     if (m_pictureCodingType == P_TYPE || m_pictureCodingType == B_TYPE)
5845     {
5846         curbe.DW13.NumRefIdxL0MinusOne = m_hevcSliceParams->num_ref_idx_l0_active_minus1;
5847     }
5848 
5849     if (hmeLevel == HME_LEVEL_4x)
5850     {
5851         curbe.DW30.ActualMBHeight = m_frameHeight;
5852         curbe.DW30.ActualMBWidth = m_frameWidth;
5853     }
5854     else
5855     {
5856         curbe.DW30.ActualMBHeight = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight);
5857         curbe.DW30.ActualMBWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth);
5858     }
5859 
5860     curbe.DW13.RefStreaminCost = 0;
5861     // This flag is to indicate the ROI source type instead of indicating ROI is enabled or not
5862     curbe.DW13.ROIEnable = 0;
5863 
5864     uint8_t meMethod = (m_pictureCodingType == B_TYPE) ? m_bMeMethodGeneric[m_hevcSeqParams->TargetUsage] : m_meMethodGeneric[m_hevcSeqParams->TargetUsage];
5865     uint8_t tableIdx = (m_pictureCodingType == B_TYPE) ? 1 : 0;
5866     CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(&(curbe.SPDelta), 14 * sizeof(uint32_t),
5867         m_encodeSearchPath[tableIdx][meMethod], 14 * sizeof(uint32_t)));
5868 
5869     if (hmeLevel == HME_LEVEL_4x)
5870     {
5871         //StreamIn CURBE
5872         curbe.DW6.LCUSize            = 1;//Only LCU64 supported by the VDEnc HW
5873         // Kernel should use driver-prepared stream-in surface during ROI/ MBQP(LCUQP)/ Dirty-Rect
5874         curbe.DW6.InputStreamInEn    = (m_hevcPicParams->NumROI || m_encodeParams.bMbQpDataEnabled || (m_hevcPicParams->NumDirtyRects > 0 && (B_TYPE == m_hevcPicParams->CodingType)));
5875         curbe.DW31.MaxCuSize         = 3;
5876         curbe.DW31.MaxTuSize         = 3;
5877         switch (m_hevcSeqParams->TargetUsage)
5878         {
5879         case 1:
5880         case 4:
5881             curbe.DW36.NumMergeCandCu64x64    = 4;
5882             curbe.DW36.NumMergeCandCu32x32    = 3;
5883             curbe.DW36.NumMergeCandCu16x16    = 2;
5884             curbe.DW36.NumMergeCandCu8x8      = 1;
5885             curbe.DW31.NumImePredictors       = m_imgStateImePredictors;
5886             break;
5887         case 7:
5888             curbe.DW36.NumMergeCandCu64x64    = 2;
5889             curbe.DW36.NumMergeCandCu32x32    = 2;
5890             curbe.DW36.NumMergeCandCu16x16    = 2;
5891             curbe.DW36.NumMergeCandCu8x8      = 0;
5892             curbe.DW31.NumImePredictors       = 4;
5893             break;
5894         }
5895     }
5896 
5897     curbe.DW40._4xMeMvOutputDataSurfIndex       = CODECHAL_VDENC_HME_MV_DATA_SURFACE_CM_G12;
5898     curbe.DW41._16xOr32xMeMvInputDataSurfIndex = (hmeLevel == HME_LEVEL_32x) ? CODECHAL_VDENC_32xME_MV_DATA_SURFACE_CM_G12 : CODECHAL_VDENC_16xME_MV_DATA_SURFACE_CM_G12;
5899     curbe.DW42._4xMeOutputDistSurfIndex         = CODECHAL_VDENC_HME_DISTORTION_SURFACE_CM_G12;
5900     curbe.DW43._4xMeOutputBrcDistSurfIndex      = CODECHAL_VDENC_HME_BRC_DISTORTION_CM_G12;
5901     curbe.DW44.VMEFwdInterPredictionSurfIndex   = CODECHAL_VDENC_HME_CURR_FOR_FWD_REF_CM_G12;
5902     curbe.DW45.VMEBwdInterPredictionSurfIndex   = CODECHAL_VDENC_HME_CURR_FOR_BWD_REF_CM_G12;
5903     curbe.DW46.VDEncStreamInOutputSurfIndex     = CODECHAL_VDENC_HME_VDENC_STREAMIN_OUTPUT_CM_G12;
5904     curbe.DW47.VDEncStreamInInputSurfIndex      = CODECHAL_VDENC_HME_VDENC_STREAMIN_INPUT_CM_G12;
5905 
5906     CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
5907         &curbe,
5908         kernelState->dwCurbeOffset,
5909         sizeof(curbe)));
5910 
5911     return eStatus;
5912 }
5913 
SendMeSurfaces(HmeLevel hmeLevel,PMOS_COMMAND_BUFFER cmdBuffer)5914 MOS_STATUS CodechalVdencHevcStateG12::SendMeSurfaces(HmeLevel hmeLevel, PMOS_COMMAND_BUFFER cmdBuffer)
5915 {
5916     CODECHAL_ENCODE_FUNCTION_ENTER;
5917     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5918 
5919     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
5920 
5921     MOS_SURFACE *meMvDataBuffer;
5922     uint32_t downscaledWidthInMb;
5923     uint32_t downscaledHeightInMb;
5924 
5925     if (hmeLevel == HME_LEVEL_32x)
5926     {
5927         meMvDataBuffer = &m_s32XMeMvDataBuffer;
5928         downscaledWidthInMb = m_downscaledWidthInMb32x;
5929         downscaledHeightInMb = m_downscaledHeightInMb32x;
5930     }
5931     else if (hmeLevel == HME_LEVEL_16x)
5932     {
5933         meMvDataBuffer = &m_s16XMeMvDataBuffer;
5934         downscaledWidthInMb = m_downscaledWidthInMb16x;
5935         downscaledHeightInMb = m_downscaledHeightInMb16x;
5936     }
5937     else
5938     {
5939         meMvDataBuffer = &m_s4XMeMvDataBuffer;
5940         downscaledWidthInMb = m_downscaledWidthInMb4x;
5941         downscaledHeightInMb = m_downscaledHeightInMb4x;
5942     }
5943 
5944     auto width = MOS_ALIGN_CEIL(downscaledWidthInMb * 32, 64);
5945     auto height = downscaledHeightInMb * 4 * 10;
5946     // Force the values
5947     meMvDataBuffer->dwWidth = width;
5948     meMvDataBuffer->dwHeight = height;
5949     meMvDataBuffer->dwPitch = width;
5950 
5951     PMHW_KERNEL_STATE kernelState = nullptr;
5952     if(hmeLevel == HME_LEVEL_4x)
5953     {
5954         kernelState = m_lowDelay ? &m_vdencStreaminKernelState : &m_vdencStreaminKernelStateRAB;
5955     }
5956     else
5957     {
5958         kernelState = m_lowDelay ? &m_vdencMeKernelState : &m_vdencMeKernelStateRAB;
5959     }
5960     auto bindingTable = (hmeLevel == HME_LEVEL_4x) ?
5961         &m_vdencStreaminKernelBindingTable : &m_vdencMeKernelBindingTable;
5962     uint32_t meMvBottomFieldOffset = 0;
5963 
5964     CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
5965     MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
5966     surfaceCodecParams.bIs2DSurface = true;
5967     surfaceCodecParams.bMediaBlockRW = true;
5968     surfaceCodecParams.psSurface = meMvDataBuffer;
5969     surfaceCodecParams.dwOffset = meMvBottomFieldOffset;
5970     surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_MV_DATA_ENCODE].Value;
5971     surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_MV_DATA_SURFACE_CM_G12];
5972     surfaceCodecParams.bIsWritable = true;
5973     surfaceCodecParams.bRenderTarget = true;
5974     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5975         m_hwInterface,
5976         cmdBuffer,
5977         &surfaceCodecParams,
5978         kernelState));
5979 
5980     if (hmeLevel == HME_LEVEL_16x && m_b32XMeEnabled)
5981     {
5982         // Pass 32x MV to 16x ME operation
5983         MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
5984         surfaceCodecParams.bIs2DSurface = true;
5985         surfaceCodecParams.bMediaBlockRW = true;
5986         surfaceCodecParams.psSurface = &m_s32XMeMvDataBuffer;
5987         surfaceCodecParams.dwOffset = 0;
5988         surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_MV_DATA_ENCODE].Value;
5989         surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_32xME_MV_DATA_SURFACE_CM_G12];
5990         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5991             m_hwInterface,
5992             cmdBuffer,
5993             &surfaceCodecParams,
5994             kernelState));
5995     }
5996     else if (!(hmeLevel == HME_LEVEL_32x) && m_b16XMeEnabled)
5997     {
5998         // Pass 16x MV to 4x ME operation
5999         MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
6000         surfaceCodecParams.bIs2DSurface = true;
6001         surfaceCodecParams.bMediaBlockRW = true;
6002         surfaceCodecParams.psSurface = &m_s16XMeMvDataBuffer;
6003         surfaceCodecParams.dwOffset = 0;
6004         surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_MV_DATA_ENCODE].Value;
6005         surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_16xME_MV_DATA_SURFACE_CM_G12];
6006         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6007             m_hwInterface,
6008             cmdBuffer,
6009             &surfaceCodecParams,
6010             kernelState));
6011 
6012         MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
6013         surfaceCodecParams.bIs2DSurface = true;
6014         surfaceCodecParams.bMediaBlockRW = true;
6015         surfaceCodecParams.psSurface = &m_s4XMeDistortionBuffer;
6016         surfaceCodecParams.dwOffset = 0;
6017         surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_DISTORTION_SURFACE_CM_G12];
6018         surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value;
6019         surfaceCodecParams.bIsWritable = true;
6020         surfaceCodecParams.bRenderTarget = true;
6021         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6022             m_hwInterface,
6023             cmdBuffer,
6024             &surfaceCodecParams,
6025             kernelState));
6026     }
6027 
6028     PMOS_SURFACE currScaledSurface = (hmeLevel == HME_LEVEL_4x) ? m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER) :
6029         ((hmeLevel == HME_LEVEL_16x) ? m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER) : m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER));
6030     MOS_SURFACE refScaledSurface = *currScaledSurface;
6031     bool currFieldPicture = CodecHal_PictureIsField(m_currOriginalPic) ? true : false;
6032     bool currBottomField = CodecHal_PictureIsBottomField(m_currOriginalPic) ? true : false;
6033 
6034     uint8_t currVDirection = (!currFieldPicture) ? CODECHAL_VDIRECTION_FRAME :
6035         ((currBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
6036     uint32_t currScaledBottomFieldOffset = (hmeLevel == HME_LEVEL_4x) ?
6037         (uint32_t)m_scaledBottomFieldOffset : ((hmeLevel == HME_LEVEL_16x) ? (uint32_t)m_scaled16xBottomFieldOffset : (uint32_t)m_scaled32xBottomFieldOffset);
6038 
6039     // Setup references 1...n
6040     // LIST 0 references
6041     for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++)
6042     {
6043         CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][refIdx];
6044 
6045         if (!CodecHal_PictureIsInvalid(refPic))
6046         {
6047             if (refIdx == 0)
6048             {
6049                 // Current Picture Y - VME
6050                 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
6051                 surfaceCodecParams.bUseAdvState = true;
6052                 surfaceCodecParams.psSurface = currScaledSurface;
6053                 surfaceCodecParams.dwOffset = currBottomField ? currScaledBottomFieldOffset : 0;
6054                 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value;
6055                 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_CURR_FOR_FWD_REF_CM_G12];
6056                 surfaceCodecParams.ucVDirection = currVDirection;
6057                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6058                     m_hwInterface,
6059                     cmdBuffer,
6060                     &surfaceCodecParams,
6061                     kernelState));
6062             }
6063 
6064             bool refFieldPicture = CodecHal_PictureIsField(refPic) ? true : false;
6065             bool refBottomField = CodecHal_PictureIsBottomField(refPic) ? true : false;
6066             uint8_t refPicIdx       = m_picIdx[refPic.FrameIdx].ucPicIdx;
6067             uint8_t scaledIdx       = m_refList[refPicIdx]->ucScalingIdx;
6068             if (hmeLevel == HME_LEVEL_4x)
6069             {
6070                 refScaledSurface.OsResource = m_trackedBuf->Get4xDsSurface(scaledIdx)->OsResource;
6071             }
6072             else if (hmeLevel == HME_LEVEL_16x)
6073             {
6074                 refScaledSurface.OsResource = m_trackedBuf->Get16xDsSurface(scaledIdx)->OsResource;
6075             }
6076             else
6077             {
6078                 refScaledSurface.OsResource = m_trackedBuf->Get32xDsSurface(scaledIdx)->OsResource;
6079             }
6080             uint32_t refScaledBottomFieldOffset = refBottomField ? currScaledBottomFieldOffset : 0;
6081 
6082             // L0 Reference Picture Y - VME
6083             MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
6084             surfaceCodecParams.bUseAdvState = true;
6085             surfaceCodecParams.psSurface = &refScaledSurface;
6086             surfaceCodecParams.dwOffset = refBottomField ? refScaledBottomFieldOffset : 0;
6087             surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value;
6088             surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_FWD_REF_IDX0_CM_G12 + (refIdx * 2)];
6089             surfaceCodecParams.ucVDirection = !currFieldPicture ? CODECHAL_VDIRECTION_FRAME :
6090                 ((refBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
6091             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6092                 m_hwInterface,
6093                 cmdBuffer,
6094                 &surfaceCodecParams,
6095                 kernelState));
6096 
6097             surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_RESERVED1_CM_G12 + (refIdx * 2)];
6098             surfaceCodecParams.ucVDirection = !currFieldPicture ? CODECHAL_VDIRECTION_FRAME :
6099                 ((refBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
6100             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6101                 m_hwInterface,
6102                 cmdBuffer,
6103                 &surfaceCodecParams,
6104                 kernelState));
6105         }
6106     }
6107 
6108     //List1
6109     for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l1_active_minus1; refIdx++)
6110     {
6111         CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_1][refIdx];
6112 
6113         if (!CodecHal_PictureIsInvalid(refPic))
6114         {
6115             if (refIdx == 0)
6116             {
6117                 // Current Picture Y - VME
6118                 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
6119                 surfaceCodecParams.bUseAdvState = true;
6120                 surfaceCodecParams.psSurface = currScaledSurface;
6121                 surfaceCodecParams.dwOffset = currBottomField ? currScaledBottomFieldOffset : 0;
6122                 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value;
6123                 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_CURR_FOR_BWD_REF_CM_G12];
6124                 surfaceCodecParams.ucVDirection = currVDirection;
6125                 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6126                     m_hwInterface,
6127                     cmdBuffer,
6128                     &surfaceCodecParams,
6129                     kernelState));
6130             }
6131 
6132             bool refFieldPicture = CodecHal_PictureIsField(refPic) ? 1 : 0;
6133             bool refBottomField = CodecHal_PictureIsBottomField(refPic) ? 1 : 0;
6134             auto    refPicIdx       = m_picIdx[refPic.FrameIdx].ucPicIdx;
6135             uint8_t scaledIdx       = m_refList[refPicIdx]->ucScalingIdx;
6136 
6137             if (hmeLevel == HME_LEVEL_4x)
6138             {
6139                 refScaledSurface.OsResource = m_trackedBuf->Get4xDsSurface(scaledIdx)->OsResource;
6140             }
6141             else if (hmeLevel == HME_LEVEL_16x)
6142             {
6143                 refScaledSurface.OsResource = m_trackedBuf->Get16xDsSurface(scaledIdx)->OsResource;
6144             }
6145             else
6146             {
6147                 refScaledSurface.OsResource = m_trackedBuf->Get32xDsSurface(scaledIdx)->OsResource;
6148             }
6149             uint32_t refScaledBottomFieldOffset = refBottomField ? currScaledBottomFieldOffset : 0;
6150 
6151             // L1 Reference Picture Y - VME
6152             MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
6153             surfaceCodecParams.bUseAdvState = true;
6154             surfaceCodecParams.psSurface = &refScaledSurface;
6155             surfaceCodecParams.dwOffset = refBottomField ? refScaledBottomFieldOffset : 0;
6156             surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value;
6157             surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_BWD_REF_IDX0_CM_G12 + (refIdx * 2)];
6158             surfaceCodecParams.ucVDirection = !currFieldPicture ? CODECHAL_VDIRECTION_FRAME :
6159                 ((refBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
6160             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6161                 m_hwInterface,
6162                 cmdBuffer,
6163                 &surfaceCodecParams,
6164                 kernelState));
6165 
6166             surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_RESERVED9_CM_G12 + (refIdx * 2)];
6167             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6168                 m_hwInterface,
6169                 cmdBuffer,
6170                 &surfaceCodecParams,
6171                 kernelState));
6172         }
6173     }
6174 
6175     if (hmeLevel == HME_LEVEL_4x)
6176     {
6177         CODECHAL_ENCODE_CHK_NULL_RETURN(&m_resVdencStreamInBuffer[m_currRecycledBufIdx]);
6178 
6179         auto streamingSize = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32) * (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32) * CODECHAL_CACHELINE_SIZE;
6180 
6181         // Send driver-prepared stream-in surface as input during ROI/ MBQP(LCUQP)/ Dirty-Rect
6182         if (m_hevcPicParams->NumROI || m_encodeParams.bMbQpDataEnabled || (m_hevcPicParams->NumDirtyRects > 0 && (B_TYPE == m_hevcPicParams->CodingType)))
6183         {
6184             MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
6185             surfaceCodecParams.dwSize = MOS_BYTES_TO_DWORDS(streamingSize);
6186             surfaceCodecParams.bIs2DSurface = false;
6187             surfaceCodecParams.presBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
6188             surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_VDENC_STREAMIN_CODEC].Value;
6189             surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_VDENC_STREAMIN_INPUT_CM_G12];
6190             surfaceCodecParams.bIsWritable = true;
6191             surfaceCodecParams.bRenderTarget = true;
6192             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6193                 m_hwInterface,
6194                 cmdBuffer,
6195                 &surfaceCodecParams,
6196                 kernelState));
6197         }
6198         else    // Clear stream-in surface otherwise
6199         {
6200             MOS_LOCK_PARAMS lockFlags;
6201             MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
6202             lockFlags.WriteOnly = true;
6203 
6204             auto data = m_osInterface->pfnLockResource(
6205                 m_osInterface,
6206                 &m_resVdencStreamInBuffer[m_currRecycledBufIdx],
6207                 &lockFlags);
6208 
6209             CODECHAL_ENCODE_CHK_NULL_RETURN(data);
6210 
6211             MOS_ZeroMemory(
6212                 data,
6213                 streamingSize);
6214 
6215             m_osInterface->pfnUnlockResource(
6216                 m_osInterface,
6217                 &m_resVdencStreamInBuffer[m_currRecycledBufIdx]);
6218         }
6219 
6220         MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
6221         surfaceCodecParams.dwSize = MOS_BYTES_TO_DWORDS(streamingSize);
6222         surfaceCodecParams.bIs2DSurface = false;
6223         surfaceCodecParams.presBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
6224         surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_VDENC_STREAMIN_CODEC].Value;
6225         surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_VDENC_STREAMIN_OUTPUT_CM_G12];
6226         surfaceCodecParams.bIsWritable = true;
6227         surfaceCodecParams.bRenderTarget = true;
6228         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6229             m_hwInterface,
6230             cmdBuffer,
6231             &surfaceCodecParams,
6232             kernelState));
6233     }
6234 
6235     return eStatus;
6236 }
6237 
6238 MOS_STATUS
GetKernelHeaderAndSize(void * binary,EncOperation operation,uint32_t krnStateIdx,void * krnHeader,uint32_t * krnSize)6239 CodechalVdencHevcStateG12::GetKernelHeaderAndSize(
6240     void                           *binary,
6241     EncOperation                   operation,
6242     uint32_t                       krnStateIdx,
6243     void                           *krnHeader,
6244     uint32_t                       *krnSize)
6245 {
6246     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6247 
6248     CODECHAL_ENCODE_FUNCTION_ENTER;
6249 
6250     CODECHAL_ENCODE_CHK_NULL_RETURN(binary);
6251     CODECHAL_ENCODE_CHK_NULL_RETURN(krnHeader);
6252     CODECHAL_ENCODE_CHK_NULL_RETURN(krnSize);
6253 
6254     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG12(binary, operation, krnStateIdx, krnHeader, krnSize));
6255 
6256     return eStatus;
6257 }
6258 
AddVdencWalkerStateCmd(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_VDBOX_HEVC_SLICE_STATE params)6259 MOS_STATUS CodechalVdencHevcStateG12::AddVdencWalkerStateCmd(
6260     PMOS_COMMAND_BUFFER cmdBuffer,
6261     PMHW_VDBOX_HEVC_SLICE_STATE params)
6262 {
6263     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6264 
6265     CODECHAL_ENCODE_FUNCTION_ENTER;
6266 
6267     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
6268     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
6269 
6270     MHW_VDBOX_VDENC_WALKER_STATE_PARAMS_G12 vdencWalkerStateParams;
6271     vdencWalkerStateParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
6272     vdencWalkerStateParams.pHevcEncSeqParams = params->pEncodeHevcSeqParams;
6273     vdencWalkerStateParams.pHevcEncPicParams = params->pEncodeHevcPicParams;
6274     vdencWalkerStateParams.pEncodeHevcSliceParams = params->pEncodeHevcSliceParams;
6275     vdencWalkerStateParams.pTileCodingParams = static_cast<PMHW_VDBOX_HEVC_SLICE_STATE_G12>(params)->pTileCodingParams;
6276     vdencWalkerStateParams.dwTileId = static_cast<PMHW_VDBOX_HEVC_SLICE_STATE_G12>(params)->dwTileID;
6277     switch (static_cast<PMHW_VDBOX_HEVC_SLICE_STATE_G12>(params)->dwNumPipe)
6278     {
6279     case 0:
6280     case 1:
6281         vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_SINGLE_PIPE;
6282         break;
6283     case 2:
6284         vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_TWO_PIPE;
6285         break;
6286     case 4:
6287         vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_FOUR_PIPE;
6288         break;
6289     default:
6290         vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_INVALID;
6291         CODECHAL_ENCODE_ASSERT(false);
6292         break;
6293     }
6294 
6295     vdencWalkerStateParams.IBCControl =
6296         m_enableLBCOnly ? SCC_IBC_CONTROL_IBC_ONLY_LBC_G12 : SCC_IBC_CONTROL_IBC_ENABLED_TBCLBC_G12;
6297 
6298     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWalkerStateCmd(cmdBuffer, &vdencWalkerStateParams));
6299 
6300     return eStatus;
6301 }
6302 
GetSystemPipeNumberCommon()6303 MOS_STATUS CodechalVdencHevcStateG12::GetSystemPipeNumberCommon()
6304 {
6305     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6306 
6307     CODECHAL_ENCODE_FUNCTION_ENTER;
6308 
6309     MOS_USER_FEATURE_VALUE_DATA userFeatureData;
6310     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6311 
6312     MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
6313     statusKey = MOS_UserFeature_ReadValue_ID(
6314         nullptr,
6315         __MEDIA_USER_FEATURE_VALUE_ENCODE_DISABLE_SCALABILITY,
6316         &userFeatureData,
6317         m_osInterface->pOsContext);
6318 
6319     bool disableScalability = m_hwInterface->IsDisableScalability();
6320     if (statusKey == MOS_STATUS_SUCCESS)
6321     {
6322         disableScalability = userFeatureData.i32Data ? true : false;
6323     }
6324 
6325     MEDIA_SYSTEM_INFO *gtSystemInfo = m_osInterface->pfnGetGtSystemInfo(m_osInterface);
6326     CODECHAL_ENCODE_CHK_NULL_RETURN(gtSystemInfo);
6327 
6328     if (gtSystemInfo && disableScalability == false)
6329     {
6330         // Both VE mode and media solo mode should be able to get the VDBOX number via the same interface
6331         m_numVdbox = (uint8_t)(gtSystemInfo->VDBoxInfo.NumberOfVDBoxEnabled);
6332     }
6333     else
6334     {
6335         m_numVdbox = 1;
6336     }
6337 
6338     CODECHAL_ENCODE_VERBOSEMESSAGE("System VDBOX number = %d.", m_numVdbox);
6339 
6340     return eStatus;
6341 }
6342 
Initialize(CodechalSetting * settings)6343 MOS_STATUS CodechalVdencHevcStateG12::Initialize(CodechalSetting * settings)
6344 {
6345     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6346 
6347     CODECHAL_ENCODE_FUNCTION_ENTER;
6348 
6349     MOS_USER_FEATURE_VALUE_DATA userFeatureData;
6350     // Tile Replay Enable should be passed from DDI, will change later when DDI is ready
6351     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6352     MOS_UserFeature_ReadValue_ID(
6353         nullptr,
6354         __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_TILEREPLAY_ENABLE_ID,
6355         &userFeatureData,
6356         m_osInterface->pOsContext);
6357     m_enableTileReplay = userFeatureData.i32Data ? true : false;
6358 
6359     m_skipFrameBasedHWCounterRead = m_enableTileReplay;
6360 
6361     // RGB Encoding Enable should be passed from DDI, will change later when DDI is ready
6362     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6363     MOS_UserFeature_ReadValue_ID(
6364         nullptr,
6365         __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_RGB_ENCODING_ENABLE_ID,
6366         &userFeatureData,
6367         m_osInterface->pOsContext);
6368     m_RGBEncodingEnable = userFeatureData.i32Data ? true : false;
6369 
6370     // Capture mode with display Enable should be passed from DDI, will change later
6371     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6372     MOS_UserFeature_ReadValue_ID(
6373         nullptr,
6374         __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_CAPTURE_MODE_ENABLE_ID,
6375         &userFeatureData,
6376         m_osInterface->pOsContext);
6377     m_CaptureModeEnable = userFeatureData.i32Data ? true : false;
6378 
6379 #if (_DEBUG || _RELEASE_INTERNAL)
6380     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6381     MOS_UserFeature_ReadValue_ID(
6382         nullptr,
6383         __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_TCBRC_ARB_DISABLE_ID,
6384         &userFeatureData,
6385         m_osInterface->pOsContext);
6386     m_brcAdaptiveRegionBoostSupported = userFeatureData.i32Data ? false : m_brcAdaptiveRegionBoostSupported;
6387 #endif
6388 
6389     // common initilization
6390     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::Initialize(settings));
6391 
6392     MEDIA_FEATURE_TABLE *skuTable = m_osInterface->pfnGetSkuTable(m_osInterface);
6393     if (m_osInterface->bSimIsActive && (m_enableTileReplay == true))
6394     {
6395         m_frameTrackingEnabled = false;
6396     }
6397 
6398     // To do: current size assumes 8Kx8K max resolution. Needs to be increased based on Gen12, along with m_maxNumNativeROI.
6399     m_deltaQpRoiBufferSize = m_deltaQpBufferSize;
6400     m_brcRoiBufferSize = m_roiStreamInBufferSize;
6401     m_maxTileNumber = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE) *
6402         CODECHAL_GET_HEIGHT_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MIN_TILE_SIZE);
6403 
6404     // we need additional buffer for (1) 1 CL for size info at the beginning of each tile column (max of 4 vdbox in scalability mode)
6405     // (2) CL alignment at end of every tile column
6406     // as a result, increase the height by 1 for allocation purposes
6407     m_numLcu = MOS_ROUNDUP_DIVIDE(m_frameWidth, MAX_LCU_SIZE) * (MOS_ROUNDUP_DIVIDE(m_frameHeight, MAX_LCU_SIZE) + 1);
6408     m_mbCodeSize = MOS_ALIGN_CEIL(2 * sizeof(uint32_t) * (m_numLcu * 5 + m_numLcu * 64 * 8), CODECHAL_PAGE_SIZE);
6409     m_mbCodeSize += m_mvOffset;
6410 
6411     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetSystemPipeNumberCommon());
6412 
6413     if (MOS_VE_SUPPORTED(m_osInterface))
6414     {
6415         m_scalabilityState = (PCODECHAL_ENCODE_SCALABILITY_STATE)MOS_AllocAndZeroMemory(sizeof(CODECHAL_ENCODE_SCALABILITY_STATE));
6416         CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalabilityState);
6417         //scalability initialize
6418         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_InitializeState(m_scalabilityState, m_hwInterface));
6419     }
6420 
6421     // Caculate the size for 3nd level batch buffer
6422     // mhw_vdbox_hcp_g12_X::HCP_PIC_STATE_CMD::byteSize
6423     // As this buffer is going to passed to HuC to generate the command, must be page aligned
6424     // To add the HW interface get the buffer size later
6425 
6426     m_thirdLBSize = MOS_ALIGN_CEIL(1024, CODECHAL_PAGE_SIZE);
6427 
6428     // Caculate the batch buffer size for each tile
6429     // To add the MHW interface later, can be fine tuned
6430     m_tileLevelBatchSize = m_hwInterface->m_vdenc2ndLevelBatchBufferSize;
6431 
6432     // Caculate the size for MV temporal buffer
6433     uint32_t mvt_size = MOS_ALIGN_CEIL(((m_frameWidth + 63) >> 6)*((m_frameHeight + 15) >> 4), 2) * CODECHAL_CACHELINE_SIZE;
6434     uint32_t mvtb_size = MOS_ALIGN_CEIL(((m_frameWidth + 31) >> 5)*((m_frameHeight + 31) >> 5), 2) * CODECHAL_CACHELINE_SIZE;
6435     m_sizeOfMvTemporalBuffer = MOS_MAX(mvt_size, mvtb_size);
6436 
6437     m_sizeOfHcpPakFrameStats = 9 * CODECHAL_CACHELINE_SIZE;
6438 
6439 #ifdef _ENCODE_VDENC_RESERVED
6440     InitReserveState(settings);
6441 #endif
6442     m_enableSCC = settings->isSCCEnabled;
6443 
6444 #if (_DEBUG || _RELEASE_INTERNAL)
6445     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6446     // LBC only Enable should be passed from DDI, will change later when DDI is ready
6447     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6448     MOS_UserFeature_ReadValue_ID(
6449         nullptr,
6450         __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_LBCONLY_ENABLE_ID,
6451         &userFeatureData,
6452         m_osInterface->pOsContext);
6453     m_enableLBCOnly = userFeatureData.i32Data ? true : false;
6454 #endif
6455 
6456     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6457     MOS_UserFeature_ReadValue_ID(
6458         nullptr,
6459         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_STITCH,
6460         &userFeatureData,
6461         m_osInterface->pOsContext);
6462     m_enableTileStitchByHW = userFeatureData.i32Data ? true : false;
6463 
6464     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6465     MOS_UserFeature_ReadValue_ID(
6466         nullptr,
6467         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_SEMAPHORE,
6468         &userFeatureData,
6469         m_osInterface->pOsContext);
6470     m_enableHWSemaphore = userFeatureData.i32Data ? true : false;
6471 
6472     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6473     MOS_UserFeature_ReadValue_ID(
6474         nullptr,
6475         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VDBOX_HW_SEMAPHORE,
6476         &userFeatureData,
6477         m_osInterface->pOsContext);
6478     m_enableVdBoxHWSemaphore = userFeatureData.i32Data ? true : false;
6479 
6480     // ACQP is now supported on Gen12 for TU1 / TU4
6481     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6482     MOS_UserFeature_ReadValue_ID(
6483         nullptr,
6484         __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_ACQP_ENABLE_ID,
6485         &userFeatureData,
6486         m_osInterface->pOsContext);
6487     m_hevcVdencAcqpEnabled = userFeatureData.i32Data ? true : false;
6488 
6489     m_numDelay = 15;
6490 
6491 #if (_DEBUG || _RELEASE_INTERNAL)
6492     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6493     MOS_UserFeature_ReadValue_ID(
6494         nullptr,
6495         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE,
6496         &userFeatureData,
6497         m_osInterface->pOsContext);
6498     m_kmdVeOveride.Value = (uint64_t)userFeatureData.i64Data;
6499 
6500     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6501     MOS_UserFeature_ReadValue_ID(
6502         nullptr,
6503         __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_FORCE_SCALABILITY_ID,
6504         &userFeatureData,
6505         m_osInterface->pOsContext);
6506     m_forceScalability = userFeatureData.i32Data ? true : false;
6507 #endif
6508 
6509     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6510     MOS_UserFeature_ReadValue_ID(
6511         nullptr,
6512         __MEDIA_USER_FEATURE_VALUE_HEVC_TCBRC_QUALITY_BOOST_ENABLE_ID,
6513         &userFeatureData,
6514         m_osInterface->pOsContext);
6515     m_tcbrcQualityBoost = (userFeatureData.i32Data) ? true : false;
6516 
6517     return eStatus;
6518 }
6519 
CodechalVdencHevcStateG12(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)6520 CodechalVdencHevcStateG12::CodechalVdencHevcStateG12(
6521     CodechalHwInterface* hwInterface,
6522     CodechalDebugInterface* debugInterface,
6523     PCODECHAL_STANDARD_INFO standardInfo)
6524     :CodechalVdencHevcState(hwInterface, debugInterface, standardInfo)
6525 {
6526     CODECHAL_ENCODE_FUNCTION_ENTER;
6527 
6528     m_useCommonKernel = true;
6529     pfnGetKernelHeaderAndSize = GetKernelHeaderAndSize;
6530     m_useHwScoreboard = false;
6531 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
6532     m_kernelBase = (uint8_t*)IGCODECKRN_G12;
6533 #endif
6534     m_kuidCommon = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
6535     m_scalabilityState = nullptr;
6536     m_brcAdaptiveRegionBoostSupported = true;
6537 
6538     MOS_ZeroMemory(&m_resPakcuLevelStreamoutData, sizeof(m_resPakcuLevelStreamoutData));
6539     MOS_ZeroMemory(&m_resPakSliceLevelStreamoutData, sizeof(m_resPakSliceLevelStreamoutData));
6540     MOS_ZeroMemory(m_resTileBasedStatisticsBuffer, sizeof(m_resTileBasedStatisticsBuffer));
6541     MOS_ZeroMemory(&m_resHuCPakAggregatedFrameStatsBuffer, sizeof(m_resHuCPakAggregatedFrameStatsBuffer));
6542     MOS_ZeroMemory(m_tileRecordBuffer, sizeof(m_tileRecordBuffer));
6543     MOS_ZeroMemory(&m_kmdVeOveride, sizeof(m_kmdVeOveride));
6544     MOS_ZeroMemory(&m_resHcpScalabilitySyncBuffer, sizeof(m_resHcpScalabilitySyncBuffer));
6545 
6546     MOS_ZeroMemory(m_veBatchBuffer, sizeof(m_veBatchBuffer));
6547     MOS_ZeroMemory(&m_realCmdBuffer, sizeof(m_realCmdBuffer));
6548     MOS_ZeroMemory(&m_resBrcSemaphoreMem, sizeof(m_resBrcSemaphoreMem));
6549     MOS_ZeroMemory(&m_resBrcPakSemaphoreMem, sizeof(m_resBrcPakSemaphoreMem));
6550     MOS_ZeroMemory(m_resVdBoxSemaphoreMem, sizeof(m_resVdBoxSemaphoreMem));
6551     MOS_ZeroMemory(&m_resPipeStartSemaMem, sizeof(m_resPipeStartSemaMem));
6552 
6553     MOS_ZeroMemory(&m_vdencTileRowStoreBuffer, sizeof(m_vdencTileRowStoreBuffer));
6554     MOS_ZeroMemory(&m_thirdLevelBatchBuffer, sizeof(MHW_BATCH_BUFFER));
6555     MOS_ZeroMemory(&m_vdencSAORowStoreBuffer, sizeof(m_vdencSAORowStoreBuffer));
6556 
6557     CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_osInterface);
6558     for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
6559     {
6560         MOS_ZeroMemory(&m_tileLevelBatchBuffer[i], sizeof(PMHW_BATCH_BUFFER));
6561         MOS_ZeroMemory(&m_TileRowBRCBatchBuffer[i], sizeof(PMHW_BATCH_BUFFER));
6562     }
6563 
6564     for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
6565     {
6566         for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
6567         {
6568             MOS_ZeroMemory(&m_resHucPakStitchDmemBuffer[k][i], sizeof(m_resHucPakStitchDmemBuffer[k][i]));
6569         }
6570     }
6571 
6572     MOS_ZeroMemory(&m_resBrcDataBuffer, sizeof(m_resBrcDataBuffer));
6573     MOS_ZeroMemory(&m_resTileRowBRCsyncSemaphore, sizeof(m_resTileRowBRCsyncSemaphore));
6574 
6575     m_vdencBrcInitDmemBufferSize = sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_INIT_DMEM_G12);
6576     m_vdencBrcUpdateDmemBufferSize = sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_UPDATE_DMEM_G12);
6577     m_vdencBrcConstDataBufferSize = sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_CONSTANT_DATA_G12);
6578     m_maxNumSlicesSupported        = CODECHAL_VDENC_HEVC_MAX_SLICE_NUM;
6579 
6580     m_hwInterface->GetStateHeapSettings()->dwNumSyncTags = CODECHAL_ENCODE_HEVC_NUM_SYNC_TAGS;
6581     m_hwInterface->GetStateHeapSettings()->dwDshSize = CODECHAL_INIT_DSH_SIZE_HEVC_ENC;
6582 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
6583     m_kernelBase = (uint8_t*)IGCODECKRN_G12;
6584 #endif
6585 
6586     MOS_STATUS eStatus = CodecHalGetKernelBinaryAndSize(
6587         m_kernelBase,
6588         m_kuidCommon,
6589         &m_kernelBinary,
6590         &m_combinedKernelSize);
6591     CODECHAL_ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
6592 
6593     m_hwInterface->GetStateHeapSettings()->dwIshSize +=
6594         MOS_ALIGN_CEIL(m_combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
6595 
6596     m_hwInterface->m_hucCommandBufferSize += 64;
6597 
6598     m_osInterface->pfnVirtualEngineSupported(m_osInterface, false, true);
6599     Mos_SetVirtualEngineSupported(m_osInterface, true);
6600 
6601     CODECHAL_DEBUG_TOOL(
6602         CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_encodeParState = MOS_New(CodechalDebugEncodeParG12, this));
6603     )
6604 }
6605 
SetGpuCtxCreatOption()6606 MOS_STATUS CodechalVdencHevcStateG12::SetGpuCtxCreatOption()
6607 {
6608     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6609 
6610     CODECHAL_ENCODE_FUNCTION_ENTER;
6611 
6612     if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
6613     {
6614         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SetGpuCtxCreatOption());
6615     }
6616     else
6617     {
6618         m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS_ENHANCED);
6619         CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt);
6620 
6621         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ConstructParmsForGpuCtxCreation(
6622             m_scalabilityState,
6623             (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
6624     }
6625 
6626     return eStatus;
6627 }
6628 
SetRegionsHuCPakIntegrate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)6629 MOS_STATUS CodechalVdencHevcStateG12::SetRegionsHuCPakIntegrate(
6630     PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS  virtualAddrParams)
6631 {
6632     CODECHAL_ENCODE_FUNCTION_ENTER;
6633 
6634     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
6635 
6636     int32_t currentPass = GetCurrentPass();
6637 
6638     if(m_enableTileStitchByHW)
6639     {
6640         CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
6641     }
6642 
6643     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
6644     CODECHAL_ENCODE_CHK_NULL_RETURN(tileParams);
6645 
6646     MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
6647 
6648     // Add Virtual addr
6649     virtualAddrParams->regionParams[0].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;  // Region 0 - Tile based input statistics from PAK/ VDEnc
6650     virtualAddrParams->regionParams[0].dwOffset   = 0;
6651     virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource;  // Region 1 - HuC Frame statistics output
6652     virtualAddrParams->regionParams[1].isWritable = true;
6653     virtualAddrParams->regionParams[4].presRegion = &m_resBitstreamBuffer;                         // Region 4 - Last Tile bitstream
6654     virtualAddrParams->regionParams[4].dwOffset   = MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
6655     virtualAddrParams->regionParams[5].presRegion = &m_resBitstreamBuffer;                         // Region 5 - HuC modifies the last tile bitstream before stitch command
6656     virtualAddrParams->regionParams[5].dwOffset   = MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
6657     virtualAddrParams->regionParams[5].isWritable = true;
6658     virtualAddrParams->regionParams[6].presRegion = &m_vdencBrcHistoryBuffer;                 // Region 6 History Buffer (Input/Output)
6659     virtualAddrParams->regionParams[6].isWritable = true;
6660     virtualAddrParams->regionParams[7].presRegion = &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource;                // Region 7 - HCP PIC state command
6661     virtualAddrParams->regionParams[9].presRegion = &m_resBrcDataBuffer;                           // Region 9 HuC outputs BRC data
6662     virtualAddrParams->regionParams[9].isWritable = true;
6663     if (m_enableTileStitchByHW)
6664     {
6665         virtualAddrParams->regionParams[8].presRegion = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass];  // Region 8 - data buffer read by HUC for stitching cmd generation
6666         virtualAddrParams->regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource;  // Region 10 - SLB for stitching cmd output from Huc
6667         virtualAddrParams->regionParams[10].isWritable = true;
6668     }
6669     virtualAddrParams->regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource;          // Region 15 [In/Out] - Tile Record Buffer
6670     virtualAddrParams->regionParams[15].dwOffset   = 0;
6671 
6672     return eStatus;
6673 }
6674 
ConfigStitchDataBuffer()6675 MOS_STATUS CodechalVdencHevcStateG12::ConfigStitchDataBuffer()
6676 {
6677     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6678     CODECHAL_ENCODE_FUNCTION_ENTER;
6679     int32_t currentPass = GetCurrentPass();
6680     if (currentPass < 0 ||
6681         (currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES && m_brcEnabled))
6682     {
6683         eStatus = MOS_STATUS_INVALID_PARAMETER;
6684         return eStatus;
6685     }
6686 
6687     MOS_LOCK_PARAMS lockFlagsWriteOnly;
6688     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
6689     lockFlagsWriteOnly.WriteOnly = 1;
6690 
6691     HucCommandDataVdencG12 *hucStitchDataBuf = (HucCommandDataVdencG12 *)m_osInterface->pfnLockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass], &lockFlagsWriteOnly);
6692 
6693     MOS_ZeroMemory(hucStitchDataBuf, sizeof(HucCommandDataVdencG12));
6694     hucStitchDataBuf->TotalCommands          = 1;
6695     hucStitchDataBuf->InputCOM[0].SizeOfData = 0xF;
6696 
6697     HucInputCmdVdencG12 hucInputCmd;
6698     MOS_ZeroMemory(&hucInputCmd, sizeof(HucInputCmdVdencG12));
6699 
6700     CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
6701     hucInputCmd.SelectionForIndData = m_osInterface->osCpInterface->IsCpEnabled() ? 4 : 0;
6702     hucInputCmd.CmdMode             = HUC_CMD_LIST_MODE;
6703     hucInputCmd.LengthOfTable       = (uint8_t)(m_numTiles);
6704     hucInputCmd.CopySize            = m_hwInterface->m_tileRecordSize;
6705 
6706     PMOS_RESOURCE presSrc = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource;
6707 
6708     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
6709         m_osInterface,
6710         presSrc,
6711         false,
6712         false));
6713     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
6714         m_osInterface,
6715         &m_resBitstreamBuffer,
6716         true,
6717         true));
6718 
6719     uint64_t srcAddr          = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, presSrc);
6720     uint64_t destAddr         = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, &m_resBitstreamBuffer);
6721     hucInputCmd.SrcAddrBottom = (uint32_t)(srcAddr & 0x00000000FFFFFFFF);
6722     hucInputCmd.SrcAddrTop    = (uint32_t)((srcAddr & 0xFFFFFFFF00000000) >> 32);
6723 
6724     hucInputCmd.DestAddrBottom = (uint32_t)(destAddr & 0x00000000FFFFFFFF);
6725     hucInputCmd.DestAddrTop    = (uint32_t)((destAddr & 0xFFFFFFFF00000000) >> 32);
6726 
6727     MOS_SecureMemcpy(hucStitchDataBuf->InputCOM[0].data, sizeof(HucInputCmdVdencG12), &hucInputCmd, sizeof(HucInputCmdVdencG12));
6728 
6729     m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]);
6730 
6731     return eStatus;
6732 }
6733 
SetRegionsHuCPakIntegrateStitch(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)6734 MOS_STATUS CodechalVdencHevcStateG12::SetRegionsHuCPakIntegrateStitch(
6735     PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
6736 {
6737     CODECHAL_ENCODE_FUNCTION_ENTER;
6738 
6739     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6740 
6741     int32_t currentPass = GetCurrentPass();
6742 
6743     MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
6744 
6745     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
6746     CODECHAL_ENCODE_CHK_NULL_RETURN(tileParams);
6747 
6748     CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
6749 
6750     // Add Virtual addr
6751     virtualAddrParams->regionParams[0].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;  // Region 0 - Tile based input statistics from PAK/ VDEnc
6752     virtualAddrParams->regionParams[0].dwOffset   = 0;
6753     virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource;  // Region 1 - HuC Frame statistics output
6754     virtualAddrParams->regionParams[1].isWritable = true;
6755     virtualAddrParams->regionParams[4].presRegion = &m_resBitstreamBuffer;                         // Region 4 - Last Tile bitstream
6756     virtualAddrParams->regionParams[4].dwOffset   = MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
6757     virtualAddrParams->regionParams[5].presRegion = &m_resBitstreamBuffer;                         // Region 5 - HuC modifies the last tile bitstream before stitch command
6758     virtualAddrParams->regionParams[5].dwOffset   = MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
6759     virtualAddrParams->regionParams[5].isWritable = true;
6760     virtualAddrParams->regionParams[6].presRegion = &m_vdencBrcHistoryBuffer;  // Region 6  History Buffer (Input/Output)
6761     virtualAddrParams->regionParams[6].isWritable = true;
6762     virtualAddrParams->regionParams[7].presRegion = &m_thirdLevelBatchBuffer.OsResource;  //&m_resHucPakStitchReadBatchBuffer;             // Region 7 - HCP PIC state command
6763     virtualAddrParams->regionParams[8].presRegion  = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass];  // Region 8 - data buffer read by HUC for stitching cmd generation
6764     virtualAddrParams->regionParams[9].presRegion  = &m_resBrcDataBuffer;  // Region 9  HuC outputs BRC data
6765     virtualAddrParams->regionParams[9].isWritable  = true;
6766     virtualAddrParams->regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource;                         // Region 10 - SLB for stitching cmd output from Huc
6767     virtualAddrParams->regionParams[10].isWritable = true;
6768     virtualAddrParams->regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource;  // Region 15 [In/Out] - Tile Record Buffer
6769     virtualAddrParams->regionParams[15].dwOffset   = 0;
6770     return eStatus;
6771 }
6772 
SetDmemHuCPakIntegrateStitch(PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)6773 MOS_STATUS CodechalVdencHevcStateG12::SetDmemHuCPakIntegrateStitch(
6774     PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)
6775 {
6776     CODECHAL_ENCODE_FUNCTION_ENTER;
6777 
6778     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6779 
6780     MOS_LOCK_PARAMS lockFlagsWriteOnly;
6781     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
6782     lockFlagsWriteOnly.WriteOnly = true;
6783 
6784     int32_t currentPass = GetCurrentPass();
6785 
6786     HucPakStitchDmemVdencG12 *hucPakStitchDmem = (HucPakStitchDmemVdencG12 *)m_osInterface->pfnLockResource(
6787         m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]), &lockFlagsWriteOnly);
6788     CODECHAL_ENCODE_CHK_NULL_RETURN(hucPakStitchDmem);
6789 
6790     MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakStitchDmemVdencG12));
6791 
6792     // reset all the offsets to -1
6793     uint32_t TotalOffsetSize = sizeof(hucPakStitchDmem->TileSizeRecord_offset) +
6794                                sizeof(hucPakStitchDmem->VDENCSTAT_offset) +
6795                                sizeof(hucPakStitchDmem->HEVC_PAKSTAT_offset) +
6796                                sizeof(hucPakStitchDmem->HEVC_Streamout_offset) +
6797                                sizeof(hucPakStitchDmem->VP9_PAK_STAT_offset) +
6798                                sizeof(hucPakStitchDmem->Vp9CounterBuffer_offset);
6799     MOS_FillMemory(hucPakStitchDmem, TotalOffsetSize, 0xFF);
6800 
6801     uint16_t numTileRows    = m_hevcPicParams->num_tile_rows_minus1 + 1;
6802     uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
6803     CODECHAL_ENCODE_ASSERT(numTileColumns > 0 && numTileColumns % 2 == 0);                       //numTileColumns is nonzero and even number; 2 or 4
6804     CODECHAL_ENCODE_ASSERT(m_numPipe > 0 && m_numPipe % 2 == 0 && numTileColumns <= m_numPipe);  //ucNumPipe is nonzero and even number; 2 or 4
6805     uint16_t numTiles        = numTileRows * numTileColumns;
6806     uint16_t numTilesPerPipe = m_numTiles / m_numPipe;
6807     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
6808     CODECHAL_ENCODE_CHK_NULL_RETURN(tileParams);
6809 
6810     hucPakStitchDmem->PicWidthInPixel          = (uint16_t)m_frameWidth;
6811     hucPakStitchDmem->PicHeightInPixel         = (uint16_t)m_frameHeight;
6812     hucPakStitchDmem->TotalNumberOfPAKs        = 0;
6813     hucPakStitchDmem->Codec                    = 2;  //HEVC DP CQP
6814     hucPakStitchDmem->MAXPass                  = 1;
6815     hucPakStitchDmem->CurrentPass              = 1;
6816     hucPakStitchDmem->MinCUSize                = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
6817     hucPakStitchDmem->CabacZeroWordFlag        = false;
6818     hucPakStitchDmem->bitdepth_luma            = m_hevcSeqParams->bit_depth_luma_minus8 + 8;    // default: 8
6819     hucPakStitchDmem->bitdepth_chroma          = m_hevcSeqParams->bit_depth_chroma_minus8 + 8;  // default: 8
6820     hucPakStitchDmem->ChromaFormatIdc          = m_hevcSeqParams->chroma_format_idc;
6821     hucPakStitchDmem->TotalSizeInCommandBuffer = m_numTiles * CODECHAL_CACHELINE_SIZE;
6822     // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record
6823     hucPakStitchDmem->OffsetInCommandBuffer   = tileParams[m_numTiles - 1].TileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8;
6824     hucPakStitchDmem->LastTileBS_StartInBytes = (tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE) & (CODECHAL_PAGE_SIZE - 1);
6825 
6826     hucPakStitchDmem->StitchEnable        = true;
6827     hucPakStitchDmem->StitchCommandOffset = 0;
6828     hucPakStitchDmem->BBEndforStitch      = HUC_BATCH_BUFFER_END;
6829 
6830     //Set the kernel output offsets
6831     hucPakStitchDmem->TileSizeRecord_offset[0] = m_hevcFrameStatsOffset.uiTileSizeRecord;
6832     hucPakStitchDmem->HEVC_PAKSTAT_offset[0]   = 0xFFFFFFFF;
6833     hucPakStitchDmem->HEVC_Streamout_offset[0] = 0xFFFFFFFF;
6834     hucPakStitchDmem->VDENCSTAT_offset[0]      = 0xFFFFFFFF;
6835 
6836     for (auto i = 0; i < m_numPipe; i++)
6837     {
6838         hucPakStitchDmem->NumTiles[i] = numTilesPerPipe;
6839 
6840         // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic.
6841         // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region.
6842         hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiTileSizeRecord) +
6843                                                          m_hevcTileStatsOffset.uiTileSizeRecord;
6844     }
6845 
6846     m_osInterface->pfnUnlockResource(m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]));
6847 
6848     MOS_ZeroMemory(dmemParams, sizeof(MHW_VDBOX_HUC_DMEM_STATE_PARAMS));
6849     dmemParams->presHucDataSource = &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]);
6850     dmemParams->dwDataLength      = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemVdencG12), CODECHAL_CACHELINE_SIZE);
6851     dmemParams->dwDmemOffset      = HUC_DMEM_OFFSET_RTOS_GEMS;
6852 
6853     return eStatus;
6854 }
6855 
SetDmemHuCPakIntegrate(PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)6856 MOS_STATUS CodechalVdencHevcStateG12::SetDmemHuCPakIntegrate(
6857     PMHW_VDBOX_HUC_DMEM_STATE_PARAMS    dmemParams)
6858 {
6859     CODECHAL_ENCODE_FUNCTION_ENTER;
6860 
6861     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6862 
6863     MOS_LOCK_PARAMS lockFlagsWriteOnly;
6864     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
6865     lockFlagsWriteOnly.WriteOnly = true;
6866 
6867     int32_t currentPass = GetCurrentPass();
6868     if (currentPass < 0 || currentPass >= CODECHAL_VDENC_BRC_NUM_OF_PASSES)
6869     {
6870         eStatus = MOS_STATUS_INVALID_PARAMETER;
6871         return eStatus;
6872     }
6873 
6874     HucPakStitchDmemVdencG12* hucPakStitchDmem = (HucPakStitchDmemVdencG12*)m_osInterface->pfnLockResource(
6875         m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]), &lockFlagsWriteOnly);
6876     CODECHAL_ENCODE_CHK_NULL_RETURN(hucPakStitchDmem);
6877     MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakStitchDmemVdencG12));
6878 
6879     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
6880     CODECHAL_ENCODE_CHK_NULL_RETURN(tileParams);
6881 
6882     // Reset all the offsets to be shared in the huc dmem (6*5 DW's)
6883     MOS_FillMemory(hucPakStitchDmem, 120, 0xFF);
6884 
6885     uint16_t numTileRows                        = m_hevcPicParams->num_tile_rows_minus1 + 1;
6886     uint16_t numTileColumns                     = m_hevcPicParams->num_tile_columns_minus1 + 1;
6887     uint16_t numTiles                           = numTileRows * numTileColumns;
6888     uint16_t numTilesPerPipe                    = m_numTiles / m_numPipe;
6889 
6890     hucPakStitchDmem->TotalSizeInCommandBuffer = m_numTiles * CODECHAL_CACHELINE_SIZE;
6891     // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record
6892     hucPakStitchDmem->OffsetInCommandBuffer    = (m_numTiles - 1) * CODECHAL_CACHELINE_SIZE + 8;
6893     hucPakStitchDmem->PicWidthInPixel          = (uint16_t)m_frameWidth;
6894     hucPakStitchDmem->PicHeightInPixel         = (uint16_t)m_frameHeight;
6895     hucPakStitchDmem->TotalNumberOfPAKs        = m_numPipe;
6896     hucPakStitchDmem->Codec                    = 2;             // 1: HEVC DP; 2: HEVC VDEnc; 3: VP9 VDEnc
6897     hucPakStitchDmem->MAXPass                  = m_brcEnabled ? CODECHAL_VDENC_BRC_NUM_OF_PASSES : 1;
6898     hucPakStitchDmem->CurrentPass              = (uint8_t) currentPass + 1;      // Current BRC pass [1..MAXPass]
6899     hucPakStitchDmem->MinCUSize                = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
6900     hucPakStitchDmem->CabacZeroWordFlag        = false;
6901     hucPakStitchDmem->bitdepth_luma            = m_hevcSeqParams->bit_depth_luma_minus8 + 8;    // default: 8
6902     hucPakStitchDmem->bitdepth_chroma          = m_hevcSeqParams->bit_depth_chroma_minus8 + 8;  // default: 8
6903     hucPakStitchDmem->ChromaFormatIdc          = m_hevcSeqParams->chroma_format_idc;
6904     hucPakStitchDmem->LastTileBS_StartInBytes  = (tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE) & (CODECHAL_PAGE_SIZE - 1);
6905     hucPakStitchDmem->PIC_STATE_StartInBytes   = (uint16_t)m_picStateCmdStartInBytes;
6906     CODECHAL_ENCODE_VERBOSEMESSAGE("last tile offset = 0x%x, LastTileBS_StartInBytes =0x%x, (tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE), hucPakStitchDmem->LastTileBS_StartInBytes");
6907     if(m_enableTileStitchByHW)
6908     {
6909         hucPakStitchDmem->StitchEnable = true;
6910         hucPakStitchDmem->StitchCommandOffset = 0;
6911         hucPakStitchDmem->BBEndforStitch = HUC_BATCH_BUFFER_END;
6912     }
6913 
6914     if (m_numPipe > 1)
6915     {
6916         //Set the kernel output offsets
6917         hucPakStitchDmem->HEVC_PAKSTAT_offset[0]   = m_hevcFrameStatsOffset.uiHevcPakStatistics;
6918         hucPakStitchDmem->HEVC_Streamout_offset[0] = m_hevcFrameStatsOffset.uiHevcSliceStreamout;
6919         hucPakStitchDmem->TileSizeRecord_offset[0] = m_hevcFrameStatsOffset.uiTileSizeRecord;
6920         hucPakStitchDmem->VDENCSTAT_offset[0]      = m_hevcFrameStatsOffset.uiVdencStatistics;
6921 
6922         // Calculate number of slices that execute on a single pipe
6923         for (auto tileRow = 0; tileRow < numTileRows; tileRow++)
6924         {
6925             for (auto tileCol = 0; tileCol < numTileColumns; tileCol++)
6926             {
6927                 PCODEC_ENCODER_SLCDATA  slcData = m_slcData;
6928                 uint16_t  slcCount, idx, sliceNumInTile = 0;
6929 
6930                 idx = tileRow * numTileColumns + tileCol;
6931                 for (slcCount = 0; slcCount < m_numSlices; slcCount++)
6932                 {
6933                     bool    lastSliceInTile = false, sliceInTile = false;
6934 
6935                     CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
6936                         &tileParams[idx],
6937                         &sliceInTile,
6938                         &lastSliceInTile));
6939 
6940                     if (!sliceInTile)
6941                     {
6942                         continue;
6943                     }
6944 
6945                     sliceNumInTile++;
6946                 } // end of slice
6947                 if (0 == sliceNumInTile)
6948                 {
6949                     // One tile must have at least one slice
6950                     CODECHAL_ENCODE_ASSERT(false);
6951                     eStatus = MOS_STATUS_INVALID_PARAMETER;
6952                     break;
6953                 }
6954 
6955                 if (sliceNumInTile > 1 && (numTileColumns > 1 || numTileRows > 1))
6956                 {
6957                     CODECHAL_ENCODE_ASSERTMESSAGE("Multi-slices in a tile is not supported!");
6958                     return MOS_STATUS_INVALID_PARAMETER;
6959                 }
6960                 // Set the number of slices per pipe in the Dmem structure
6961                 hucPakStitchDmem->NumSlices[tileCol] += sliceNumInTile;
6962             }
6963         }
6964 
6965         for (auto i = 0; i < m_numPipe; i++)
6966         {
6967             hucPakStitchDmem->NumTiles[i]   = numTilesPerPipe;
6968             hucPakStitchDmem->NumSlices[i]  = numTilesPerPipe;      // Assuming 1 slice/ tile. To do: change this later.
6969 
6970             // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic.
6971             // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region.
6972             hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiTileSizeRecord) + m_hevcTileStatsOffset.uiTileSizeRecord;
6973             hucPakStitchDmem->HEVC_PAKSTAT_offset[i + 1]   = (i * numTilesPerPipe * m_hevcStatsSize.uiHevcPakStatistics) + m_hevcTileStatsOffset.uiHevcPakStatistics;
6974             hucPakStitchDmem->VDENCSTAT_offset[i + 1]      = (i * numTilesPerPipe * m_hevcStatsSize.uiVdencStatistics) + m_hevcTileStatsOffset.uiVdencStatistics;
6975             hucPakStitchDmem->HEVC_Streamout_offset[i + 1] = (i * hucPakStitchDmem->NumSlices[i] * CODECHAL_CACHELINE_SIZE) + m_hevcTileStatsOffset.uiHevcSliceStreamout;
6976         }
6977     }
6978     else
6979     {
6980         hucPakStitchDmem->NumTiles[0]               = numTiles;
6981         hucPakStitchDmem->TotalNumberOfPAKs         = m_numPipe;
6982 
6983         // non-scalable mode, only VDEnc statistics need to be aggregated
6984         hucPakStitchDmem->VDENCSTAT_offset[0] = m_hevcFrameStatsOffset.uiVdencStatistics;
6985         hucPakStitchDmem->VDENCSTAT_offset[1] = m_hevcTileStatsOffset.uiVdencStatistics;
6986     }
6987 
6988     m_osInterface->pfnUnlockResource(m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]));
6989 
6990     MOS_ZeroMemory(dmemParams, sizeof(MHW_VDBOX_HUC_DMEM_STATE_PARAMS));
6991     dmemParams->presHucDataSource = &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]);
6992     dmemParams->dwDataLength = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemVdencG12), CODECHAL_CACHELINE_SIZE);
6993     dmemParams->dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
6994 
6995     return eStatus;
6996 }
6997 
HucPakIntegrate(PMOS_COMMAND_BUFFER cmdBuffer)6998 MOS_STATUS CodechalVdencHevcStateG12::HucPakIntegrate(
6999     PMOS_COMMAND_BUFFER cmdBuffer)
7000 {
7001     CODECHAL_ENCODE_FUNCTION_ENTER;
7002 
7003     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7004 
7005     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
7006 
7007     CODECHAL_ENCODE_CHK_COND_RETURN(
7008         (m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()),
7009         "ERROR - vdbox index exceed the maximum");
7010 
7011     auto mmioRegisters = m_hwInterface->GetHucInterface()->GetMmioRegisters(m_vdboxIndex);
7012 
7013     // load kernel from WOPCM into L2 storage RAM
7014     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
7015     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
7016     imemParams.dwKernelDescriptor = VDBOX_HUC_PAK_INTEGRATION_KERNEL_DESCRIPTOR;
7017 
7018     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucImemStateCmd(cmdBuffer, &imemParams));
7019 
7020     // pipe mode select
7021     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
7022     pipeModeSelectParams.Mode = m_mode;
7023     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
7024 
7025     // DMEM set
7026     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
7027     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakIntegrate(&dmemParams));
7028     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucDmemStateCmd(cmdBuffer, &dmemParams));
7029 
7030     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
7031     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCPakIntegrate(&virtualAddrParams));
7032     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucVirtualAddrStateCmd(cmdBuffer, &virtualAddrParams));
7033 
7034     // Write HUC_STATUS2 mask - bit 6 - valid IMEM loaded
7035     MHW_MI_STORE_DATA_PARAMS storeDataParams;
7036     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
7037     storeDataParams.pOsResource = &m_resHucStatus2Buffer;
7038     storeDataParams.dwResourceOffset = 0;
7039     storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatus2ImemLoadedMask();
7040     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
7041 
7042     // Store HUC_STATUS2 register
7043     MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
7044     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
7045     storeRegParams.presStoreBuffer = &m_resHucStatus2Buffer;
7046     storeRegParams.dwOffset = sizeof(uint32_t);
7047     storeRegParams.dwRegister = mmioRegisters->hucStatus2RegOffset;
7048     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams));
7049 
7050     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(cmdBuffer));
7051 
7052     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucStartCmd(cmdBuffer, true));
7053 
7054     // wait Huc completion (use HEVC bit for now)
7055     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
7056     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
7057     vdPipeFlushParams.Flags.bFlushHEVC = 1;
7058     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
7059     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetVdencInterface()->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams));
7060 
7061     // Flush the engine to ensure memory written out
7062     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
7063     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
7064     flushDwParams.bVideoPipelineCacheInvalidate = true;
7065     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
7066 
7067     EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
7068 
7069     uint32_t baseOffset =
7070         (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2;  // pEncodeStatus is offset by 2 DWs in the resource
7071 
7072                                                                                              // Write HUC_STATUS mask
7073     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
7074     storeDataParams.pOsResource = &encodeStatusBuf.resStatusBuffer;
7075     storeDataParams.dwResourceOffset = baseOffset + encodeStatusBuf.dwHuCStatusMaskOffset;
7076     storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatusReEncodeMask();
7077     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
7078         cmdBuffer,
7079         &storeDataParams));
7080 
7081     // store HUC_STATUS register
7082     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
7083     storeRegParams.presStoreBuffer = &encodeStatusBuf.resStatusBuffer;
7084     storeRegParams.dwOffset = baseOffset + encodeStatusBuf.dwHuCStatusRegOffset;
7085     storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
7086     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(
7087         cmdBuffer,
7088         &storeRegParams));
7089 
7090     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, cmdBuffer, false));
7091     CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(cmdBuffer));
7092 
7093     return eStatus;
7094 }
7095 
HucPakIntegrateStitch(PMOS_COMMAND_BUFFER cmdBuffer)7096 MOS_STATUS CodechalVdencHevcStateG12::HucPakIntegrateStitch(
7097     PMOS_COMMAND_BUFFER cmdBuffer)
7098 {
7099     CODECHAL_ENCODE_FUNCTION_ENTER;
7100 
7101     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7102 
7103     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
7104 
7105     CODECHAL_ENCODE_CHK_COND_RETURN(
7106         (m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()),
7107         "ERROR - vdbox index exceed the maximum");
7108 
7109     auto mmioRegisters = m_hwInterface->GetHucInterface()->GetMmioRegisters(m_vdboxIndex);
7110 
7111     // load kernel from WOPCM into L2 storage RAM
7112     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
7113     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
7114     imemParams.dwKernelDescriptor = VDBOX_HUC_PAK_INTEGRATION_KERNEL_DESCRIPTOR;
7115 
7116     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucImemStateCmd(cmdBuffer, &imemParams));
7117 
7118     // pipe mode select
7119     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
7120     pipeModeSelectParams.Mode = m_mode;
7121     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
7122 
7123     // DMEM set
7124     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
7125     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakIntegrateStitch(&dmemParams));
7126     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucDmemStateCmd(cmdBuffer, &dmemParams));
7127 
7128     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
7129     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCPakIntegrateStitch(&virtualAddrParams));
7130     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucVirtualAddrStateCmd(cmdBuffer, &virtualAddrParams));
7131 
7132     // Write HUC_STATUS2 mask - bit 6 - valid IMEM loaded
7133     MHW_MI_STORE_DATA_PARAMS storeDataParams;
7134     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
7135     storeDataParams.pOsResource = &m_resHucStatus2Buffer;
7136     storeDataParams.dwResourceOffset = 0;
7137     storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatus2ImemLoadedMask();
7138     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
7139 
7140     // Store HUC_STATUS2 register
7141     MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
7142     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
7143     storeRegParams.presStoreBuffer = &m_resHucStatus2Buffer;
7144     storeRegParams.dwOffset = sizeof(uint32_t);
7145     storeRegParams.dwRegister = mmioRegisters->hucStatus2RegOffset;
7146     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams));
7147 
7148     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(cmdBuffer));
7149 
7150     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucStartCmd(cmdBuffer, true));
7151 
7152     // wait Huc completion (use HEVC bit for now)
7153     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
7154     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
7155     vdPipeFlushParams.Flags.bFlushHEVC = 1;
7156     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
7157     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetVdencInterface()->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams));
7158 
7159     // Flush the engine to ensure memory written out
7160     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
7161     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
7162     flushDwParams.bVideoPipelineCacheInvalidate = true;
7163     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
7164 
7165     EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
7166 
7167     uint32_t baseOffset =
7168         (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2;  // pEncodeStatus is offset by 2 DWs in the resource
7169 
7170     // Write HUC_STATUS mask
7171     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
7172     storeDataParams.pOsResource = &encodeStatusBuf.resStatusBuffer;
7173     storeDataParams.dwResourceOffset = baseOffset + encodeStatusBuf.dwHuCStatusMaskOffset;
7174     storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatusReEncodeMask();
7175     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
7176         cmdBuffer,
7177         &storeDataParams));
7178 
7179     // store HUC_STATUS register
7180     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
7181     storeRegParams.presStoreBuffer = &encodeStatusBuf.resStatusBuffer;
7182     storeRegParams.dwOffset = baseOffset + encodeStatusBuf.dwHuCStatusRegOffset;
7183     storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
7184     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(
7185         cmdBuffer,
7186         &storeRegParams));
7187 
7188     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, cmdBuffer, false));
7189     CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(cmdBuffer));
7190 
7191     return eStatus;
7192 }
7193 
CreateMhwParams()7194 void CodechalVdencHevcStateG12::CreateMhwParams()
7195 {
7196     m_sliceStateParams = MOS_New(MHW_VDBOX_HEVC_SLICE_STATE_G12);
7197     m_pipeModeSelectParams = MOS_New(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12);
7198     m_pipeBufAddrParams = MOS_New(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS_G12);
7199 }
7200 
CalculatePictureStateCommandSize()7201 MOS_STATUS CodechalVdencHevcStateG12::CalculatePictureStateCommandSize()
7202 {
7203     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7204 
7205     CODECHAL_ENCODE_FUNCTION_ENTER;
7206 
7207     MHW_VDBOX_STATE_CMDSIZE_PARAMS_G12 stateCmdSizeParams;
7208     CODECHAL_ENCODE_CHK_STATUS_RETURN(
7209         m_hwInterface->GetHxxStateCommandSize(
7210             CODECHAL_ENCODE_MODE_HEVC,
7211             &m_defaultPictureStatesSize,
7212             &m_defaultPicturePatchListSize,
7213             &stateCmdSizeParams));
7214 
7215     return eStatus;
7216 }
7217 
AddHcpPipeBufAddrCmd(PMOS_COMMAND_BUFFER cmdBuffer)7218 MOS_STATUS CodechalVdencHevcStateG12::AddHcpPipeBufAddrCmd(
7219     PMOS_COMMAND_BUFFER  cmdBuffer)
7220 {
7221     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7222 
7223     CODECHAL_ENCODE_FUNCTION_ENTER;
7224 
7225 #ifdef _MMC_SUPPORTED
7226     m_mmcState->SetPipeBufAddr(m_pipeBufAddrParams);
7227     // Recon P010v MMC state set from RC for compression write
7228     // Reference P010v MMC state set from MC for compression read
7229     if (m_reconSurface.Format == Format_P010 && m_pipeBufAddrParams && MmcEnable(m_pipeBufAddrParams->PreDeblockSurfMmcState))
7230     {
7231         auto paramsG12 = dynamic_cast<PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS_G12>(m_pipeBufAddrParams);
7232         MHW_CHK_NULL_RETURN(paramsG12);
7233         paramsG12->bSpecificReferencedMmcRequired = true;
7234         paramsG12->ReferencesMmcState             = m_pipeBufAddrParams->PreDeblockSurfMmcState;
7235 
7236         m_pipeBufAddrParams->PreDeblockSurfMmcState = MOS_MEMCOMP_RC;
7237     }
7238 #endif
7239     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(cmdBuffer, m_pipeBufAddrParams));
7240 
7241     return eStatus;
7242 }
7243 
SetTileData(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 * tileCodingParams)7244 MOS_STATUS CodechalVdencHevcStateG12::SetTileData(
7245     MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12*   tileCodingParams)
7246 {
7247     CODECHAL_ENCODE_FUNCTION_ENTER;
7248 
7249     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
7250 
7251     if (!m_hevcPicParams->tiles_enabled_flag)
7252     {
7253         return eStatus;
7254     }
7255 
7256     uint32_t colBd[100] = { 0 };
7257     uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
7258     for (uint32_t i = 0; i < numTileColumns; i++)
7259     {
7260         colBd[i + 1] = colBd[i] + m_hevcPicParams->tile_column_width[i];
7261     }
7262 
7263     uint32_t rowBd[100] = { 0 };
7264     uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
7265     for (uint32_t i = 0; i < numTileRows; i++)
7266     {
7267         rowBd[i + 1] = rowBd[i] + m_hevcPicParams->tile_row_height[i];
7268     }
7269 
7270     m_numTiles = numTileRows * numTileColumns;
7271     if (m_numTiles > CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_VDENC_MIN_TILE_WIDTH_SIZE) *
7272         CODECHAL_GET_HEIGHT_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_VDENC_MIN_TILE_HEIGHT_SIZE))
7273     {
7274         return MOS_STATUS_INVALID_PARAMETER;
7275     }
7276     m_numTileRows = numTileRows;
7277 
7278     uint32_t const numCuRecordTab[] = { 1, 4, 16, 64 }; //LCU: 8x8->1, 16x16->4, 32x32->16, 64x64->64
7279     uint32_t       numCuRecord = numCuRecordTab[MOS_MIN(3, m_hevcSeqParams->log2_max_coding_block_size_minus3)];
7280     uint32_t       maxBytePerLCU = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
7281     maxBytePerLCU = maxBytePerLCU * maxBytePerLCU; // number of pixels per LCU
7282     maxBytePerLCU = maxBytePerLCU * 3 / (m_is10BitHevc ? 1 : 2);  //assume 4:2:0 format
7283     uint32_t    bitstreamByteOffset = 0, saoRowstoreOffset = 0, cuLevelStreamoutOffset = 0, sseRowstoreOffset = 0;
7284     int32_t     frameWidthInMinCb = m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1;
7285     int32_t     frameHeightInMinCb = m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1;
7286     int32_t     shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
7287     uint32_t    ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
7288     uint32_t    streamInWidthinLCU = MOS_ROUNDUP_DIVIDE((frameWidthInMinCb << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
7289     uint32_t    numLcuInPic = 0;
7290     uint32_t    tileStartLCUAddr = 0;
7291 
7292     for (uint32_t numLcusInTiles = 0, i = 0; i < numTileRows; i++)
7293     {
7294         for (uint32_t j = 0; j < numTileColumns; j++)
7295         {
7296             numLcuInPic += m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j];
7297         }
7298     }
7299 
7300     uint32_t    numSliceInTile = 0;
7301     uint64_t    activeBitstreamSize = (uint64_t)m_encodeParams.dwBitstreamSize;
7302     // There would be padding at the end of last tile in CBR, reserve dedicated part in the BS buf
7303     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR)
7304     {
7305         // Assume max padding num < target frame size derived from target bit rate and frame rate
7306         uint32_t actualFrameRate = m_hevcSeqParams->FrameRate.Numerator / m_hevcSeqParams->FrameRate.Denominator;
7307         uint64_t reservedPart    = (uint64_t)m_hevcSeqParams->TargetBitRate / 8 / (uint64_t)actualFrameRate * 1024;
7308 
7309         if (reservedPart > activeBitstreamSize)
7310         {
7311             CODECHAL_ENCODE_ASSERTMESSAGE("Frame size cal from target Bit rate is larger than BS buf! Issues in CBR paras!");
7312             return MOS_STATUS_INVALID_PARAMETER;
7313         }
7314 
7315         // Capping the reserved part to 1/10 of bs buf size
7316         if (reservedPart > activeBitstreamSize / 10)
7317         {
7318             reservedPart = activeBitstreamSize / 10;
7319         }
7320 
7321         activeBitstreamSize -= reservedPart;
7322     }
7323 
7324     for (uint32_t numLcusInTiles = 0, i = 0; i < numTileRows; i++)
7325     {
7326         for (uint32_t j = 0; j < numTileColumns; j++)
7327         {
7328             uint32_t idx = i * numTileColumns + j;
7329             uint32_t numLcuInTile = m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j];
7330 
7331             tileCodingParams[idx].TileStartLCUX = colBd[j];
7332             tileCodingParams[idx].TileStartLCUY = rowBd[i];
7333 
7334             tileCodingParams[idx].TileColumnStoreSelect = j % 2;
7335             tileCodingParams[idx].TileRowStoreSelect = i % 2;
7336 
7337             if (j != numTileColumns - 1)
7338             {
7339                 tileCodingParams[idx].TileWidthInMinCbMinus1 = (m_hevcPicParams->tile_column_width[j] << shift) - 1;
7340                 tileCodingParams[idx].IsLastTileofRow = false;
7341             }
7342             else
7343             {
7344                 tileCodingParams[idx].TileWidthInMinCbMinus1 = (frameWidthInMinCb - (colBd[j] << shift)) - 1;
7345                 tileCodingParams[idx].IsLastTileofRow = true;
7346 
7347             }
7348 
7349             if (i != numTileRows - 1)
7350             {
7351                 tileCodingParams[idx].IsLastTileofColumn = false;
7352                 tileCodingParams[idx].TileHeightInMinCbMinus1 = (m_hevcPicParams->tile_row_height[i] << shift) - 1;
7353             }
7354             else
7355             {
7356                 tileCodingParams[idx].TileHeightInMinCbMinus1 = (frameHeightInMinCb - (rowBd[i] << shift)) - 1;
7357                 tileCodingParams[idx].IsLastTileofColumn = true;
7358             }
7359 
7360             tileCodingParams[idx].NumOfTilesInFrame = m_numTiles;
7361             tileCodingParams[idx].NumOfTileColumnsInFrame = numTileColumns;
7362             tileCodingParams[idx].CuRecordOffset = MOS_ALIGN_CEIL(((numCuRecord * numLcusInTiles) * m_hcpInterface->GetHevcEncCuRecordSize()),
7363                 CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
7364             tileCodingParams[idx].NumberOfActiveBePipes = (m_numPipe > 1) ? m_numPipe : 1;
7365 
7366             tileCodingParams[idx].PakTileStatisticsOffset = 9 * idx;
7367             tileCodingParams[idx].TileSizeStreamoutOffset = idx;
7368             tileCodingParams[idx].Vp9ProbabilityCounterStreamoutOffset = 0;
7369             tileCodingParams[idx].presHcpSyncBuffer = &m_resHcpScalabilitySyncBuffer.sResource;
7370             tileCodingParams[idx].CuLevelStreamoutOffset = cuLevelStreamoutOffset;
7371             tileCodingParams[idx].SliceSizeStreamoutOffset = numSliceInTile;
7372             tileCodingParams[idx].SseRowstoreOffset = sseRowstoreOffset;
7373             tileCodingParams[idx].BitstreamByteOffset = bitstreamByteOffset;
7374             tileCodingParams[idx].SaoRowstoreOffset = saoRowstoreOffset;
7375 
7376             uint32_t tileHeightInLCU = MOS_ROUNDUP_DIVIDE(((tileCodingParams[idx].TileHeightInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
7377             uint32_t tileWidthInLCU = MOS_ROUNDUP_DIVIDE(((tileCodingParams[idx].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
7378 
7379             //StreamIn data is 4 Cachelines per LCU
7380             tileCodingParams[idx].TileStreaminOffset = 4 * (tileCodingParams[idx].TileStartLCUY * streamInWidthinLCU + tileCodingParams[idx].TileStartLCUX * tileHeightInLCU);
7381             tileCodingParams[idx].SliceSizeStreamoutOffset = tileStartLCUAddr;
7382             tileStartLCUAddr += (tileWidthInLCU * tileHeightInLCU);
7383 
7384             cuLevelStreamoutOffset += (tileCodingParams[idx].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[idx].TileHeightInMinCbMinus1 + 1) * 16 / CODECHAL_CACHELINE_SIZE;
7385             sseRowstoreOffset += ((m_hevcPicParams->tile_column_width[j] + 3) * m_sizeOfSseSrcPixelRowStoreBufferPerLcu) / CODECHAL_CACHELINE_SIZE;
7386             saoRowstoreOffset += (MOS_ALIGN_CEIL(m_hevcPicParams->tile_column_width[j], 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU) / CODECHAL_CACHELINE_SIZE;
7387 
7388             uint64_t totalSizeTemp = (uint64_t)activeBitstreamSize * (uint64_t)numLcuInTile;
7389             uint32_t bitStreamSizePerTile = (uint32_t)(totalSizeTemp / (uint64_t)numLcuInPic) + ((totalSizeTemp % (uint64_t)numLcuInPic) ? 1 : 0);
7390             bitstreamByteOffset += MOS_ALIGN_CEIL(bitStreamSizePerTile, CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
7391 
7392             numLcusInTiles += numLcuInTile;
7393 
7394             for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
7395             {
7396                 bool lastSliceInTile = false, sliceInTile = false;
7397                 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
7398                     &tileCodingParams[idx],
7399                     &sliceInTile,
7400                     &lastSliceInTile));
7401                 numSliceInTile += (sliceInTile ? 1 : 0);
7402             }
7403         }
7404 
7405         // same row store buffer for different tile rows.
7406         saoRowstoreOffset = 0;
7407         sseRowstoreOffset = 0;
7408     }
7409 
7410     return eStatus;
7411 }
7412 
IsSliceInTile(uint32_t sliceNumber,PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 currentTile,bool * sliceInTile,bool * lastSliceInTile)7413 MOS_STATUS CodechalVdencHevcStateG12::IsSliceInTile(
7414     uint32_t                                sliceNumber,
7415     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12   currentTile,
7416     bool                                   *sliceInTile,
7417     bool                                   *lastSliceInTile)
7418 {
7419     CODECHAL_ENCODE_FUNCTION_ENTER;
7420 
7421     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
7422 
7423     CODECHAL_ENCODE_CHK_NULL_RETURN(currentTile);
7424     CODECHAL_ENCODE_CHK_NULL_RETURN(sliceInTile);
7425     CODECHAL_ENCODE_CHK_NULL_RETURN(lastSliceInTile);
7426 
7427     uint32_t shift            = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
7428     uint32_t residual = (1 << shift) - 1;
7429     uint32_t frameWidthInLCU  = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift;
7430     uint32_t frameHeightInLCU = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift;
7431 
7432     PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = &m_hevcSliceParams[sliceNumber];
7433     uint32_t sliceStartLCU = hevcSlcParams->slice_segment_address;
7434     uint32_t sliceLCUx = sliceStartLCU % frameWidthInLCU;
7435     uint32_t sliceLCUy = sliceStartLCU / frameWidthInLCU;
7436 
7437     uint32_t tileColumnWidth = (currentTile->TileWidthInMinCbMinus1 + 1 + residual) >> shift;
7438     uint32_t tileRowHeight = (currentTile->TileHeightInMinCbMinus1 + 1 + residual) >> shift;
7439     if (sliceLCUx <  currentTile->TileStartLCUX ||
7440         sliceLCUy <  currentTile->TileStartLCUY ||
7441         sliceLCUx >= currentTile->TileStartLCUX + tileColumnWidth ||
7442         sliceLCUy >= currentTile->TileStartLCUY + tileRowHeight
7443         )
7444     {
7445         // slice start is not in the tile boundary
7446         *lastSliceInTile = *sliceInTile = false;
7447         return eStatus;
7448     }
7449 
7450     sliceLCUx += (hevcSlcParams->NumLCUsInSlice - 1) % tileColumnWidth;
7451     sliceLCUy += (hevcSlcParams->NumLCUsInSlice - 1) / tileColumnWidth;
7452 
7453     if (sliceLCUx >= currentTile->TileStartLCUX + tileColumnWidth)
7454     {
7455         sliceLCUx -= tileColumnWidth;
7456         sliceLCUy++;
7457     }
7458 
7459     if (sliceLCUx <  currentTile->TileStartLCUX ||
7460         sliceLCUy <  currentTile->TileStartLCUY ||
7461         sliceLCUx >= currentTile->TileStartLCUX + tileColumnWidth ||
7462         sliceLCUy >= currentTile->TileStartLCUY + tileRowHeight
7463         )
7464     {
7465         // last LCU of the slice is out of the tile boundary
7466         *lastSliceInTile = *sliceInTile = false;
7467         return eStatus;
7468     }
7469 
7470     *sliceInTile = true;
7471 
7472     sliceLCUx++;
7473     sliceLCUy++;
7474 
7475     // the end of slice is at the boundary of tile
7476     *lastSliceInTile = (
7477         sliceLCUx == currentTile->TileStartLCUX + tileColumnWidth &&
7478         sliceLCUy == currentTile->TileStartLCUY + tileRowHeight);
7479 
7480     return eStatus;
7481 }
7482 
InitMmcState()7483 MOS_STATUS CodechalVdencHevcStateG12::InitMmcState()
7484 {
7485     CODECHAL_ENCODE_FUNCTION_ENTER;
7486 #ifdef _MMC_SUPPORTED
7487     m_mmcState = MOS_New(CodechalMmcEncodeHevcG12, m_hwInterface, this);
7488     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
7489 #endif
7490     return MOS_STATUS_SUCCESS;
7491 }
7492 
7493 #ifdef _ENCODE_VDENC_RESERVED
InitReserveState(CodechalSetting * settings)7494 MOS_STATUS CodechalVdencHevcStateG12::InitReserveState(CodechalSetting * settings)
7495 {
7496     CODECHAL_ENCODE_FUNCTION_ENTER;
7497 
7498     m_rsvdState = MOS_New(CodechalVdencHevcG12Rsvd, m_hwInterface, this);
7499     return MOS_STATUS_SUCCESS;
7500 }
7501 #endif
7502 
CalculateCommandBufferSize()7503 uint32_t CodechalVdencHevcStateG12::CalculateCommandBufferSize()
7504 {
7505     CODECHAL_ENCODE_FUNCTION_ENTER;
7506 
7507     // To be refined later, differentiate BRC and CQP
7508     uint32_t commandBufferSize =
7509         m_pictureStatesSize        +
7510         m_extraPictureStatesSize   +
7511         (m_sliceStatesSize * m_numSlices) +
7512         m_hucCommandsSize * 5;
7513 
7514     if (m_singleTaskPhaseSupported)
7515     {
7516         commandBufferSize *= (m_numPasses + 1);
7517     }
7518 
7519     if (m_osInterface->bUsesPatchList && m_hevcPicParams->tiles_enabled_flag)
7520     {
7521         commandBufferSize += (m_tileLevelBatchSize * m_numTiles * CODECHAL_VDENC_BRC_NUM_OF_PASSES);
7522     }
7523 
7524     // 4K align since allocation is in chunks of 4K bytes.
7525     commandBufferSize = MOS_ALIGN_CEIL(commandBufferSize, 0x1000);
7526 
7527     return commandBufferSize;
7528 }
7529 
VerifyCommandBufferSize()7530 MOS_STATUS CodechalVdencHevcStateG12::VerifyCommandBufferSize()
7531 {
7532     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7533 
7534     CODECHAL_ENCODE_FUNCTION_ENTER;
7535 
7536     if (UseRenderCommandBuffer() || m_numPipe == 1)
7537     {
7538         // legacy mode & resize CommandBuffer Size for every BRC pass
7539         if (!m_singleTaskPhaseSupported)
7540         {
7541             CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
7542         }
7543         return eStatus;
7544     }
7545 
7546     // virtual engine
7547     uint32_t requestedSize =
7548         m_pictureStatesSize +
7549         m_extraPictureStatesSize +
7550         (m_sliceStatesSize * m_numSlices);
7551 
7552     requestedSize += (requestedSize * m_numPassesInOnePipe + m_hucCommandsSize);
7553 
7554     // Running in the multiple VDBOX mode
7555     int currentPipe = GetCurrentPipe();
7556     if (currentPipe < 0 || currentPipe >= m_numPipe)
7557     {
7558         eStatus = MOS_STATUS_INVALID_PARAMETER;
7559         return eStatus;
7560     }
7561     int currentPass = GetCurrentPass();
7562     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
7563     {
7564         eStatus = MOS_STATUS_INVALID_PARAMETER;
7565         return eStatus;
7566     }
7567 
7568     if (IsFirstPipe() && m_osInterface->bUsesPatchList)
7569     {
7570         CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
7571     }
7572 
7573     PMOS_COMMAND_BUFFER pCmdBuffer;
7574     if (m_osInterface->phasedSubmission)
7575     {
7576         m_osInterface->pfnVerifyCommandBufferSize(m_osInterface, requestedSize, 0);
7577         return eStatus;
7578     }
7579     else
7580     {
7581         pCmdBuffer = m_singleTaskPhaseSupported ? &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass];
7582     }
7583 
7584     if (Mos_ResourceIsNull(&pCmdBuffer->OsResource) ||
7585         m_sizeOfVeBatchBuffer < requestedSize)
7586     {
7587         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
7588 
7589         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
7590         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
7591         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
7592         allocParamsForBufferLinear.Format = Format_Buffer;
7593         allocParamsForBufferLinear.dwBytes = requestedSize;
7594         allocParamsForBufferLinear.pBufName = "Batch buffer for each VDBOX";
7595 
7596         if (!Mos_ResourceIsNull(&pCmdBuffer->OsResource))
7597         {
7598             if (pCmdBuffer->pCmdBase)
7599             {
7600                 m_osInterface->pfnUnlockResource(m_osInterface, &pCmdBuffer->OsResource);
7601             }
7602             m_osInterface->pfnFreeResource(m_osInterface, &pCmdBuffer->OsResource);
7603         }
7604 
7605         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
7606             m_osInterface,
7607             &allocParamsForBufferLinear,
7608             &pCmdBuffer->OsResource));
7609 
7610         m_sizeOfVeBatchBuffer = requestedSize;
7611     }
7612 
7613     if (pCmdBuffer->pCmdBase == nullptr)
7614     {
7615         MOS_LOCK_PARAMS lockParams;
7616         MOS_ZeroMemory(&lockParams, sizeof(lockParams));
7617         lockParams.WriteOnly = true;
7618         pCmdBuffer->pCmdPtr = pCmdBuffer->pCmdBase = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &pCmdBuffer->OsResource, &lockParams);
7619         pCmdBuffer->iRemaining                     = m_sizeOfVeBatchBuffer;
7620         pCmdBuffer->iOffset = 0;
7621         pCmdBuffer->is1stLvlBB = true;
7622 
7623         if (pCmdBuffer->pCmdBase == nullptr)
7624         {
7625             eStatus = MOS_STATUS_NULL_POINTER;
7626             return eStatus;
7627         }
7628     }
7629 
7630     return eStatus;
7631 }
7632 
GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)7633 MOS_STATUS CodechalVdencHevcStateG12::GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)
7634 {
7635     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7636 
7637     CODECHAL_ENCODE_FUNCTION_ENTER;
7638 
7639     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
7640     CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
7641 
7642     if (UseRenderCommandBuffer() || m_numPipe == 1)
7643     {
7644         // legacy mode
7645         m_realCmdBuffer.pCmdBase = m_realCmdBuffer.pCmdPtr = nullptr;
7646         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, 0));
7647         return eStatus;
7648     }
7649 
7650     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &m_realCmdBuffer, 0));
7651 
7652     int currentPipe = GetCurrentPipe();
7653     if (currentPipe < 0 || currentPipe >= m_numPipe)
7654     {
7655         eStatus = MOS_STATUS_INVALID_PARAMETER;
7656         return eStatus;
7657     }
7658     int currentPass = GetCurrentPass();
7659     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
7660     {
7661         eStatus = MOS_STATUS_INVALID_PARAMETER;
7662         return eStatus;
7663     }
7664 
7665     if (m_osInterface->phasedSubmission)
7666     {
7667         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, currentPipe + 1));
7668 
7669         CodecHalEncodeScalability_EncodePhaseToSubmissionType(IsFirstPipe(), cmdBuffer);
7670         if (IsLastPipe())
7671         {
7672             cmdBuffer->iSubmissionType |= SUBMISSION_TYPE_MULTI_PIPE_FLAGS_LAST_PIPE;
7673         }
7674     }
7675     else
7676     {
7677         *cmdBuffer = m_singleTaskPhaseSupported ? m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass];
7678     }
7679 
7680     if (m_osInterface->osCpInterface->IsCpEnabled() && cmdBuffer->iOffset == 0)
7681     {
7682         // Insert CP Prolog
7683         CODECHAL_ENCODE_NORMALMESSAGE("Adding cp prolog for secure scalable encode");
7684         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, cmdBuffer));
7685     }
7686     return eStatus;
7687 }
7688 
ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)7689 MOS_STATUS CodechalVdencHevcStateG12::ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)
7690 {
7691     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7692 
7693     CODECHAL_ENCODE_FUNCTION_ENTER;
7694 
7695     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
7696 
7697     if (UseRenderCommandBuffer() || m_numPipe == 1)
7698     {
7699         // legacy mode
7700         m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, 0);
7701         return eStatus;
7702     }
7703 
7704     int currentPipe = GetCurrentPipe();
7705     if (currentPipe < 0 || currentPipe >= m_numPipe)
7706     {
7707         eStatus = MOS_STATUS_INVALID_PARAMETER;
7708         return eStatus;
7709     }
7710     int currentPass = GetCurrentPass();
7711     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
7712     {
7713         eStatus = MOS_STATUS_INVALID_PARAMETER;
7714         return eStatus;
7715     }
7716 
7717     if (m_osInterface->phasedSubmission)
7718     {
7719         m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, currentPipe + 1);
7720         m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
7721     }
7722     else
7723     {
7724         uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
7725         m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][passIndex] = *cmdBuffer;
7726         m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
7727     }
7728 
7729     return eStatus;
7730 }
7731 
SubmitCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer,bool bNullRendering)7732 MOS_STATUS CodechalVdencHevcStateG12::SubmitCommandBuffer(
7733     PMOS_COMMAND_BUFFER cmdBuffer,
7734     bool                bNullRendering)
7735 {
7736     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7737 
7738     CODECHAL_ENCODE_FUNCTION_ENTER;
7739 
7740     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
7741 
7742     if (IsLastPass())
7743     {
7744         HalOcaInterface::On1stLevelBBEnd(*cmdBuffer, *m_osInterface);
7745     }
7746 
7747     if (UseRenderCommandBuffer() || m_numPipe == 1)
7748     {
7749         // legacy mode
7750         if (!UseRenderCommandBuffer())  // Set VE Hints for video contexts only
7751         {
7752             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(cmdBuffer));
7753         }
7754         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, bNullRendering));
7755         return eStatus;
7756     }
7757 
7758     bool cmdBufferReadyForSubmit = IsLastPipe();
7759 
7760     // In STF, Hold the command buffer submission till last pass
7761     if (m_singleTaskPhaseSupported)
7762     {
7763         cmdBufferReadyForSubmit = cmdBufferReadyForSubmit && IsLastPass();
7764     }
7765 
7766     if(!cmdBufferReadyForSubmit)
7767     {
7768         return eStatus;
7769     }
7770 
7771     int currentPass = GetCurrentPass();
7772     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
7773     {
7774         eStatus = MOS_STATUS_INVALID_PARAMETER;
7775         return eStatus;
7776     }
7777 
7778     if (m_osInterface->phasedSubmission)
7779     {
7780         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
7781     }
7782     else
7783     {
7784         uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
7785 
7786         for (uint32_t i = 0; i < m_numPipe; i++)
7787         {
7788             PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex];
7789 
7790             if(cmdBuffer->pCmdBase)
7791             {
7792                 m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
7793             }
7794 
7795             cmdBuffer->pCmdBase = 0;
7796             cmdBuffer->iOffset = cmdBuffer->iRemaining = 0;
7797         }
7798         m_sizeOfVeBatchBuffer = 0;
7799 
7800         if(eStatus == MOS_STATUS_SUCCESS)
7801         {
7802             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(&m_realCmdBuffer));
7803             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
7804         }
7805     }
7806 
7807     return eStatus;
7808 }
7809 
SendPrologWithFrameTracking(PMOS_COMMAND_BUFFER cmdBuffer,bool frameTrackingRequested,MHW_MI_MMIOREGISTERS * mmioRegister)7810 MOS_STATUS CodechalVdencHevcStateG12::SendPrologWithFrameTracking(
7811     PMOS_COMMAND_BUFFER         cmdBuffer,
7812     bool                  frameTrackingRequested,
7813     MHW_MI_MMIOREGISTERS *mmioRegister)
7814 {
7815     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7816 
7817     CODECHAL_ENCODE_FUNCTION_ENTER;
7818 
7819     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
7820 
7821     CODECHAL_ENCODE_CHK_NULL_RETURN(m_hevcSeqParams);
7822 
7823     // Set flag bIsMdfLoad in remote gaming scenario to boost GPU frequency for low latency
7824     cmdBuffer->Attributes.bFrequencyBoost = (m_hevcSeqParams->ScenarioInfo == ESCENARIO_REMOTEGAMING);
7825 
7826     MOS_GPU_CONTEXT gpuContext = m_osInterface->pfnGetGpuContext(m_osInterface);
7827 
7828     if (UseRenderCommandBuffer())
7829     {
7830         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SendPrologWithFrameTracking(cmdBuffer, frameTrackingRequested, mmioRegister));
7831         return eStatus;
7832     }
7833 
7834 #ifdef _MMC_SUPPORTED
7835     CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
7836     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SendPrologCmd(m_miInterface, cmdBuffer, gpuContext));
7837 #endif
7838 
7839     if (!IsLastPipe())
7840     {
7841         return eStatus;
7842     }
7843 
7844     PMOS_COMMAND_BUFFER commandBufferInUse;
7845     if (m_realCmdBuffer.pCmdBase)
7846     {
7847         commandBufferInUse = &m_realCmdBuffer;
7848     }
7849     else
7850         if (cmdBuffer && cmdBuffer->pCmdBase)
7851         {
7852             commandBufferInUse = cmdBuffer;
7853         }
7854         else
7855         {
7856             eStatus = MOS_STATUS_INVALID_PARAMETER;
7857             return eStatus;
7858         }
7859 
7860     // initialize command buffer attributes
7861     commandBufferInUse->Attributes.bTurboMode = m_hwInterface->m_turboMode;
7862     commandBufferInUse->Attributes.dwNumRequestedEUSlices = m_hwInterface->m_numRequestedEuSlices;
7863     commandBufferInUse->Attributes.dwNumRequestedSubSlices = m_hwInterface->m_numRequestedSubSlices;
7864     commandBufferInUse->Attributes.dwNumRequestedEUs = m_hwInterface->m_numRequestedEus;
7865     commandBufferInUse->Attributes.bValidPowerGatingRequest = true;
7866 
7867     if (frameTrackingRequested && m_frameTrackingEnabled)
7868     {
7869         commandBufferInUse->Attributes.bEnableMediaFrameTracking = true;
7870         commandBufferInUse->Attributes.resMediaFrameTrackingSurface =
7871             &m_encodeStatusBuf.resStatusBuffer;
7872         commandBufferInUse->Attributes.dwMediaFrameTrackingTag = m_storeData;
7873         // Set media frame tracking address offset(the offset from the encoder status buffer page)
7874         commandBufferInUse->Attributes.dwMediaFrameTrackingAddrOffset = 0;
7875     }
7876 
7877     MHW_GENERIC_PROLOG_PARAMS  genericPrologParams;
7878     MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
7879     genericPrologParams.pOsInterface = m_hwInterface->GetOsInterface();
7880     genericPrologParams.pvMiInterface = m_hwInterface->GetMiInterface();
7881     genericPrologParams.bMmcEnabled = m_mmcState ? m_mmcState->IsMmcEnabled() : false;
7882     genericPrologParams.dwStoreDataValue = m_storeData - 1;
7883 
7884     CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(commandBufferInUse, &genericPrologParams));
7885 
7886     return eStatus;
7887 }
7888 
SetSliceStructs()7889 MOS_STATUS CodechalVdencHevcStateG12::SetSliceStructs()
7890 {
7891     MOS_STATUS                          eStatus = MOS_STATUS_SUCCESS;
7892     eStatus = CodechalEncodeHevcBase::SetSliceStructs();
7893     CODECHAL_ENCODE_CHK_COND_RETURN((m_lookaheadPass && !m_lowDelay), "RA B frame is not expected in lookahead pass.");
7894     m_numPassesInOnePipe                        = m_numPasses;
7895     m_numPasses                                 = (m_numPasses + 1) * m_numPipe - 1;
7896     return eStatus;
7897 }
7898 
AllocateTileStatistics()7899 MOS_STATUS CodechalVdencHevcStateG12::AllocateTileStatistics()
7900 {
7901     CODECHAL_ENCODE_FUNCTION_ENTER;
7902 
7903     MOS_STATUS                  eStatus = MOS_STATUS_SUCCESS;
7904 
7905     if (!m_hevcPicParams->tiles_enabled_flag)
7906     {
7907         return eStatus;
7908     }
7909 
7910     auto num_tile_rows    = m_hevcPicParams->num_tile_rows_minus1 + 1;
7911     auto num_tile_columns = m_hevcPicParams->num_tile_columns_minus1 + 1;
7912     auto num_tiles = num_tile_rows * num_tile_columns;
7913 
7914     MOS_ZeroMemory(&m_hevcFrameStatsOffset, sizeof(HEVC_TILE_STATS_INFO));
7915     MOS_ZeroMemory(&m_hevcTileStatsOffset, sizeof(HEVC_TILE_STATS_INFO));
7916     MOS_ZeroMemory(&m_hevcStatsSize, sizeof(HEVC_TILE_STATS_INFO));
7917 
7918     MOS_LOCK_PARAMS lockFlagsWriteOnly;
7919     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
7920     lockFlagsWriteOnly.WriteOnly = true;
7921 
7922     // Set the maximum size based on frame level statistics.
7923     m_hevcStatsSize.uiTileSizeRecord     = CODECHAL_CACHELINE_SIZE;
7924     m_hevcStatsSize.uiHevcPakStatistics  = m_sizeOfHcpPakFrameStats;
7925     m_hevcStatsSize.uiVdencStatistics    = CODECHAL_HEVC_VDENC_STATS_SIZE;
7926     m_hevcStatsSize.uiHevcSliceStreamout = CODECHAL_CACHELINE_SIZE;
7927 
7928     // Maintain the offsets to use for patching addresses in to the HuC Pak Integration kernel Aggregated Frame Statistics Output Buffer
7929     // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions
7930     m_hevcFrameStatsOffset.uiTileSizeRecord     = 0;  // Tile Size Record is not present in resHuCPakAggregatedFrameStatsBuffer
7931     m_hevcFrameStatsOffset.uiHevcPakStatistics  = 0;
7932     m_hevcFrameStatsOffset.uiVdencStatistics    = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcPakStatistics + m_hevcStatsSize.uiHevcPakStatistics, CODECHAL_PAGE_SIZE);
7933     m_hevcFrameStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiVdencStatistics + m_hevcStatsSize.uiVdencStatistics, CODECHAL_PAGE_SIZE);
7934 
7935     // Frame level statistics
7936     m_hwInterface->m_pakIntAggregatedFrameStatsSize = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcSliceStreamout + (m_hevcStatsSize.uiHevcSliceStreamout * m_numLcu), CODECHAL_PAGE_SIZE);
7937 
7938     // HEVC Frame Statistics Buffer - Output from HuC PAK Integration kernel
7939     if (Mos_ResourceIsNull(&m_resHuCPakAggregatedFrameStatsBuffer.sResource))
7940     {
7941         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
7942         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
7943         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
7944         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
7945         allocParamsForBufferLinear.Format = Format_Buffer;
7946         allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntAggregatedFrameStatsSize;
7947         allocParamsForBufferLinear.pBufName = "GEN12 HCP Aggregated Frame Statistics Streamout Buffer";
7948 
7949         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
7950             m_osInterface,
7951             &allocParamsForBufferLinear,
7952             &m_resHuCPakAggregatedFrameStatsBuffer.sResource));
7953         m_resHuCPakAggregatedFrameStatsBuffer.dwSize = m_hwInterface->m_pakIntAggregatedFrameStatsSize;
7954 
7955         uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
7956             m_osInterface,
7957             &m_resHuCPakAggregatedFrameStatsBuffer.sResource,
7958             &lockFlagsWriteOnly);
7959 
7960         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
7961         MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
7962         m_osInterface->pfnUnlockResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource);
7963     }
7964 
7965     // Maintain the offsets to use for patching addresses in to the Tile Based Statistics Buffer
7966     // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions
7967     m_hevcTileStatsOffset.uiTileSizeRecord     = 0; // TileReord is in a separated resource
7968     m_hevcTileStatsOffset.uiHevcPakStatistics  = 0; // PakStaticstics is head of m_resTileBasedStatisticsBuffer;
7969     m_hevcTileStatsOffset.uiVdencStatistics    = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcPakStatistics + (m_hevcStatsSize.uiHevcPakStatistics * num_tiles), CODECHAL_PAGE_SIZE);
7970     m_hevcTileStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiVdencStatistics + (m_hevcStatsSize.uiVdencStatistics * num_tiles), CODECHAL_PAGE_SIZE);
7971     // Combined statistics size for all tiles
7972     m_hwInterface->m_pakIntTileStatsSize = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcSliceStreamout + m_hevcStatsSize.uiHevcSliceStreamout * m_numLcu, CODECHAL_PAGE_SIZE);
7973 
7974     // Tile size record size for all tiles
7975     m_hwInterface->m_tileRecordSize = m_hevcStatsSize.uiTileSizeRecord * num_tiles;
7976 
7977     if (Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource) || m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_pakIntTileStatsSize)
7978     {
7979         if (!Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource))
7980         {
7981             m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource);
7982         }
7983         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
7984         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
7985         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
7986         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
7987         allocParamsForBufferLinear.Format = Format_Buffer;
7988         allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntTileStatsSize;
7989         allocParamsForBufferLinear.pBufName = "GEN12 HCP Tile Level Statistics Streamout Buffer";
7990 
7991         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
7992             m_osInterface,
7993             &allocParamsForBufferLinear,
7994             &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource));
7995         m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize = m_hwInterface->m_pakIntTileStatsSize;
7996 
7997         uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
7998             m_osInterface,
7999             &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
8000             &lockFlagsWriteOnly);
8001         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8002 
8003         MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
8004         m_osInterface->pfnUnlockResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource);
8005     }
8006 
8007     // Allocate the updated tile size buffer for PAK integration kernel
8008     if (Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource) || m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_tileRecordSize)
8009     {
8010         if (!Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource))
8011         {
8012             m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource);
8013         }
8014         MOS_ALLOC_GFXRES_PARAMS  allocParamsForBufferLinear;
8015         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
8016         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
8017         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
8018         allocParamsForBufferLinear.Format = Format_Buffer;
8019         allocParamsForBufferLinear.dwBytes = m_hwInterface->m_tileRecordSize;
8020         allocParamsForBufferLinear.pBufName = "Tile Record buffer";
8021 
8022         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
8023                                                       m_osInterface,
8024                                                       &allocParamsForBufferLinear,
8025                                                       &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource),
8026             "Failed to create GEN12 Tile Record buffer");
8027 
8028         m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize = allocParamsForBufferLinear.dwBytes;
8029     }
8030 
8031     // Only needed when tile & BRC is enabled, but the size is not changing at frame level
8032     // Move to more properiate place later
8033     if (Mos_ResourceIsNull(&m_resBrcDataBuffer))
8034     {
8035         uint8_t* data;
8036         MOS_ALLOC_GFXRES_PARAMS  allocParamsForBufferLinear;
8037 
8038         // Pak stitch DMEM
8039         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
8040         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
8041         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
8042         allocParamsForBufferLinear.Format = Format_Buffer;
8043         allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemVdencG12), CODECHAL_CACHELINE_SIZE);
8044         allocParamsForBufferLinear.pBufName = "PAK Stitch Dmem Buffer";
8045         auto numOfPasses = CODECHAL_VDENC_BRC_NUM_OF_PASSES;
8046 
8047         for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
8048         {
8049             for (auto i = 0; i < numOfPasses; i++)
8050             {
8051                 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
8052                     m_osInterface->pfnAllocateResource(
8053                         m_osInterface,
8054                         &allocParamsForBufferLinear,
8055                         &m_resHucPakStitchDmemBuffer[k][i]),
8056                     "Failed to allocate PAK Stitch Dmem Buffer.");
8057 
8058                 MOS_LOCK_PARAMS lockFlagsWriteOnly;
8059                 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
8060                 lockFlagsWriteOnly.WriteOnly = 1;
8061 
8062                 data = (uint8_t*)m_osInterface->pfnLockResource(
8063                     m_osInterface,
8064                     &m_resHucPakStitchDmemBuffer[k][i],
8065                     &lockFlagsWriteOnly);
8066 
8067                 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8068 
8069                 MOS_ZeroMemory(
8070                     data,
8071                     allocParamsForBufferLinear.dwBytes);
8072 
8073                 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucPakStitchDmemBuffer[k][i]);
8074             }
8075         }
8076 
8077         // BRC Data Buffer
8078         allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_numTiles * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
8079         allocParamsForBufferLinear.pBufName = "BRC Data Buffer";
8080 
8081         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
8082             m_osInterface->pfnAllocateResource(
8083                 m_osInterface,
8084                 &allocParamsForBufferLinear,
8085                 &m_resBrcDataBuffer),
8086             "Failed to allocate BRC Data Buffer Buffer.");
8087 
8088         MOS_LOCK_PARAMS lockFlags;
8089         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
8090         lockFlags.WriteOnly = 1;
8091 
8092         data = (uint8_t*)m_osInterface->pfnLockResource(
8093             m_osInterface,
8094             &m_resBrcDataBuffer,
8095             &lockFlags);
8096 
8097         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8098 
8099         MOS_ZeroMemory(
8100             data,
8101             allocParamsForBufferLinear.dwBytes);
8102 
8103         m_osInterface->pfnUnlockResource(m_osInterface, &m_resBrcDataBuffer);
8104     }
8105 
8106     return eStatus;
8107 }
8108 
ReadSseStatistics(PMOS_COMMAND_BUFFER cmdBuffer)8109 MOS_STATUS CodechalVdencHevcStateG12::ReadSseStatistics(PMOS_COMMAND_BUFFER cmdBuffer)
8110 {
8111     CODECHAL_ENCODE_FUNCTION_ENTER;
8112 
8113     MOS_STATUS                  eStatus = MOS_STATUS_SUCCESS;
8114 
8115     // encodeStatus is offset by 2 DWs in the resource
8116     uint32_t sseOffsetinBytes = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2 + m_encodeStatusBuf.dwSumSquareErrorOffset;
8117     for (auto i = 0; i < 6; i++)    // 64 bit SSE values for luma/ chroma channels need to be copied
8118     {
8119         MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
8120         MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
8121         miCpyMemMemParams.presSrc     = m_hevcPicParams->tiles_enabled_flag && (m_numPipe > 1) ? &m_resHuCPakAggregatedFrameStatsBuffer.sResource : &m_resFrameStatStreamOutBuffer;
8122         miCpyMemMemParams.dwSrcOffset = (HEVC_PAK_STATISTICS_SSE_OFFSET + i) * sizeof(uint32_t);    // SSE luma offset is located at DW32 in Frame statistics, followed by chroma
8123         miCpyMemMemParams.presDst = &m_encodeStatusBuf.resStatusBuffer;
8124         miCpyMemMemParams.dwDstOffset = sseOffsetinBytes + i * sizeof(uint32_t);
8125         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
8126     }
8127     return eStatus;
8128 }
8129 
SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS & indObjBaseAddrParams)8130 void CodechalVdencHevcStateG12::SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS& indObjBaseAddrParams)
8131 {
8132     PCODECHAL_ENCODE_BUFFER tileRecordBuffer    = &m_tileRecordBuffer[m_virtualEngineBbIndex];
8133     bool useTileRecordBuffer = !Mos_ResourceIsNull(&tileRecordBuffer->sResource);
8134 
8135     MOS_ZeroMemory(&indObjBaseAddrParams, sizeof(indObjBaseAddrParams));
8136     indObjBaseAddrParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
8137     indObjBaseAddrParams.presMvObjectBuffer = &m_resMbCodeSurface;
8138     indObjBaseAddrParams.dwMvObjectOffset = m_mvOffset;
8139     indObjBaseAddrParams.dwMvObjectSize = m_mbCodeSize - m_mvOffset;
8140     indObjBaseAddrParams.presPakBaseObjectBuffer = &m_resBitstreamBuffer;
8141     indObjBaseAddrParams.dwPakBaseObjectSize = m_bitstreamUpperBound;
8142     indObjBaseAddrParams.presPakTileSizeStasBuffer = useTileRecordBuffer ? &tileRecordBuffer->sResource : nullptr;
8143     indObjBaseAddrParams.dwPakTileSizeStasBufferSize = useTileRecordBuffer ? m_hwInterface->m_tileRecordSize : 0;
8144     indObjBaseAddrParams.dwPakTileSizeRecordOffset   = useTileRecordBuffer ? m_hevcTileStatsOffset.uiTileSizeRecord : 0;
8145 }
8146 
HuCLookaheadInit()8147 MOS_STATUS CodechalVdencHevcStateG12::HuCLookaheadInit()
8148 {
8149     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8150 
8151     CODECHAL_ENCODE_FUNCTION_ENTER;
8152 
8153     m_firstTaskInPhase = !m_singleTaskPhaseSupported;
8154     m_lastTaskInPhase  = !m_singleTaskPhaseSupported;
8155 
8156     // set DMEM
8157     uint32_t initVbvFullness = MOS_MIN(m_hevcSeqParams->InitVBVBufferFullnessInBit, m_hevcSeqParams->VBVBufferSizeInBit);
8158     MOS_LOCK_PARAMS lockFlagsWriteOnly;
8159     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
8160     lockFlagsWriteOnly.WriteOnly = true;
8161 
8162     auto dmem = (PCodechalVdencHevcLaDmem)m_osInterface->pfnLockResource(
8163         m_osInterface, &m_vdencLaInitDmemBuffer, &lockFlagsWriteOnly);
8164     CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
8165     MOS_ZeroMemory(dmem, sizeof(CodechalVdencHevcLaDmem));
8166 
8167     uint8_t downscaleRatioIndicator = 2; // 4x downscaling
8168     if (m_hevcPicParams->DownScaleRatio.fields.X16Minus1_X == 15 && m_hevcPicParams->DownScaleRatio.fields.X16Minus1_Y == 15)
8169     {
8170         downscaleRatioIndicator = 0; // no downscaling
8171     }
8172 
8173     dmem->lookAheadFunc      = 0;
8174     dmem->lengthAhead        = m_lookaheadDepth;
8175     dmem->vbvBufferSize      = m_hevcSeqParams->VBVBufferSizeInBit / m_averageFrameSize;
8176     dmem->vbvInitialFullness = initVbvFullness / m_averageFrameSize;
8177     dmem->statsRecords       = m_numLaDataEntry;
8178     dmem->avgFrameSizeInByte = m_averageFrameSize >> 3;
8179     dmem->downscaleRatio     = downscaleRatioIndicator;
8180     dmem->enc_frame_width    = m_frameWidth;
8181     dmem->enc_frame_height   = m_frameHeight;
8182     dmem->codec_type         = 2;
8183     dmem->mbr_ratio          = (m_hevcSeqParams->TargetBitRate > 0 && m_hevcSeqParams->MaxBitRate >= m_hevcSeqParams->TargetBitRate) ?
8184                                m_hevcSeqParams->MaxBitRate * 100 / m_hevcSeqParams->TargetBitRate : 100;
8185 
8186     if (m_hevcSeqParams->bLookAheadPhase)
8187     {
8188         if (m_hevcSeqParams->GopRefDist == 1)
8189         {
8190             dmem->PGop = 4;
8191         }
8192         else
8193         {
8194             dmem->BGop   = m_hevcSeqParams->GopRefDist;
8195             dmem->maxGop = m_hevcSeqParams->GopPicSize;
8196         }
8197 
8198         dmem->GopOpt = m_hevcSeqParams->GopFlags.fields.StrictGop ? 2 : m_hevcSeqParams->GopFlags.fields.ClosedGop;
8199         dmem->AGop = m_hevcSeqParams->GopFlags.fields.AdaptiveGop;
8200         if (m_hevcSeqParams->GopFlags.fields.AdaptiveGop)
8201         {
8202             dmem->AGop_Threshold = 30;
8203         }
8204 
8205         dmem->maxGop = m_hevcSeqParams->MaxAdaptiveGopPicSize;
8206         dmem->minGop = m_hevcSeqParams->MinAdaptiveGopPicSize;
8207         dmem->adaptiveIDR = (uint8_t)m_lookaheadAdaptiveI;
8208     }
8209 
8210     m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaInitDmemBuffer);
8211 
8212     // set HuC regions
8213     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
8214     MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
8215     virtualAddrParams.regionParams[0].presRegion = &m_vdencLaHistoryBuffer;
8216     virtualAddrParams.regionParams[0].isWritable = true;
8217 
8218 #if USE_CODECHAL_DEBUG_TOOL && _ENCODE_VDENC_RESERVED
8219     if (m_swLaMode)
8220     {
8221         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgCallSwLookaheadImpl(
8222             m_debugInterface,
8223             m_swLaMode,
8224             CODECHAL_MEDIA_STATE_BRC_INIT_RESET,
8225             &m_vdencLaInitDmemBuffer,
8226             nullptr,
8227             &virtualAddrParams));
8228 
8229         return eStatus;
8230     }
8231 #endif
8232 
8233     MOS_COMMAND_BUFFER cmdBuffer;
8234     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
8235 
8236     if (!m_singleTaskPhaseSupported || m_firstTaskInPhase)
8237     {
8238         // Send command buffer header at the beginning (OS dependent)
8239         bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : 0;
8240         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
8241     }
8242 
8243     // load kernel from WOPCM into L2 storage RAM
8244     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
8245     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
8246     imemParams.dwKernelDescriptor = m_vdboxHucHevcLaAnalysisKernelDescriptor;
8247 
8248     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
8249 
8250     // pipe mode select
8251     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
8252     pipeModeSelectParams.Mode = m_mode;
8253     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
8254 
8255     // set HuC DMEM param
8256     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
8257     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
8258     dmemParams.presHucDataSource = &m_vdencLaInitDmemBuffer;
8259     dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencLaInitDmemBufferSize, CODECHAL_CACHELINE_SIZE);
8260     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
8261     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
8262     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
8263     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
8264     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
8265 
8266     // wait Huc completion (use HEVC bit for now)
8267     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
8268     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
8269     vdPipeFlushParams.Flags.bFlushHEVC = 1;
8270     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
8271     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
8272 
8273     // Flush the engine to ensure memory written out
8274     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
8275     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
8276     flushDwParams.bVideoPipelineCacheInvalidate = true;
8277     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
8278 
8279     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
8280     {
8281         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
8282     }
8283 
8284     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
8285 
8286     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
8287     {
8288         bool renderingFlags = m_videoContextUsesNullHw;
8289 
8290         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
8291     }
8292 
8293     return eStatus;
8294 }
8295 
HuCLookaheadUpdate()8296 MOS_STATUS CodechalVdencHevcStateG12::HuCLookaheadUpdate()
8297 {
8298     uint8_t currentPass = (uint8_t)GetCurrentPass();
8299     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8300 
8301     CODECHAL_ENCODE_FUNCTION_ENTER;
8302 
8303     m_firstTaskInPhase = !m_singleTaskPhaseSupported;
8304     m_lastTaskInPhase  = (currentPass == m_numPasses);
8305 
8306     // set DMEM
8307     MOS_LOCK_PARAMS lockFlagsWriteOnly;
8308     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
8309     lockFlagsWriteOnly.WriteOnly = true;
8310 
8311     auto dmem = (PCodechalVdencHevcLaDmem)m_osInterface->pfnLockResource(
8312         m_osInterface, &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx][currentPass], &lockFlagsWriteOnly);
8313     CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
8314     MOS_ZeroMemory(dmem, sizeof(CodechalVdencHevcLaDmem));
8315 
8316     dmem->lookAheadFunc = 1;
8317     dmem->validStatsRecords = m_numValidLaRecords;
8318     dmem->offset = (m_numLaDataEntry + m_currLaDataIdx + 1 - m_numValidLaRecords) % m_numLaDataEntry;
8319     dmem->cqmQpThreshold = m_cqmQpThreshold;
8320     dmem->currentPass = currentPass;
8321 
8322     m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx][currentPass]);
8323 
8324     // set HuC regions
8325     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
8326     MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
8327     virtualAddrParams.regionParams[0].presRegion = &m_vdencLaHistoryBuffer;
8328     virtualAddrParams.regionParams[0].isWritable = true;
8329     virtualAddrParams.regionParams[1].presRegion = &m_vdencLaStatsBuffer;
8330     virtualAddrParams.regionParams[2].presRegion = &m_vdencLaDataBuffer;
8331     virtualAddrParams.regionParams[2].isWritable = true;
8332 
8333 #if USE_CODECHAL_DEBUG_TOOL && _ENCODE_VDENC_RESERVED
8334     if (m_swLaMode)
8335     {
8336         bool isLaAnalysisRequired = true;
8337         MOS_LOCK_PARAMS lockFlags;
8338         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
8339         lockFlags.ReadOnly = true;
8340 
8341         if (!IsFirstPass())
8342         {
8343             uint32_t *data = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &m_resPakMmioBuffer, &lockFlags);
8344             CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8345             isLaAnalysisRequired = (*data == CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK);
8346             m_osInterface->pfnUnlockResource(m_osInterface, &m_resPakMmioBuffer);
8347         }
8348 
8349         if (isLaAnalysisRequired)
8350         {
8351             CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgCallSwLookaheadImpl(
8352                 m_debugInterface,
8353                 m_swLaMode,
8354                 CODECHAL_MEDIA_STATE_BRC_UPDATE,
8355                 &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx][currentPass],
8356                 &m_resPakMmioBuffer,
8357                 &virtualAddrParams));
8358 
8359             EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
8360             uint32_t baseOffset = (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize);
8361 
8362             CodechalVdencHevcLaData *data = (CodechalVdencHevcLaData *)m_osInterface->pfnLockResource(m_osInterface, &m_vdencLaDataBuffer, &lockFlags);
8363             CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8364 
8365             LookaheadReport *lookaheadStatus = (LookaheadReport *)(encodeStatusBuf.pEncodeStatus + baseOffset + encodeStatusBuf.dwLookaheadStatusOffset);
8366             lookaheadStatus->targetFrameSize = data[dmem->offset].targetFrameSize;
8367             lookaheadStatus->targetBufferFulness = data[dmem->offset].targetBufferFulness;
8368             lookaheadStatus->encodeHints = data[dmem->offset].encodeHints;
8369             lookaheadStatus->pyramidDeltaQP = data[dmem->offset].pyramidDeltaQP;
8370 
8371             m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaDataBuffer);
8372         }
8373 
8374         return eStatus;
8375     }
8376 #endif
8377 
8378     MOS_COMMAND_BUFFER cmdBuffer;
8379     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
8380 
8381     if (!m_singleTaskPhaseSupported || m_firstTaskInPhase)
8382     {
8383         // Send command buffer header at the beginning (OS dependent)
8384         bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
8385         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
8386     }
8387 
8388     if (!IsFirstPass() && m_firstTaskInPhase)
8389     {
8390         // VDENC uses HuC FW generated semaphore for conditional 2nd pass
8391         MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
8392         MOS_ZeroMemory(
8393             &miConditionalBatchBufferEndParams,
8394             sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
8395         miConditionalBatchBufferEndParams.presSemaphoreBuffer =
8396             &m_resPakMmioBuffer;
8397         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
8398             &cmdBuffer,
8399             &miConditionalBatchBufferEndParams));
8400     }
8401 
8402     CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
8403 
8404     // load kernel from WOPCM into L2 storage RAM
8405     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
8406     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
8407     imemParams.dwKernelDescriptor = m_vdboxHucHevcLaAnalysisKernelDescriptor;
8408 
8409     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
8410 
8411     // pipe mode select
8412     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
8413     pipeModeSelectParams.Mode = m_mode;
8414     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
8415 
8416     // set HuC DMEM param
8417     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
8418     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
8419     dmemParams.presHucDataSource = &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx][currentPass];
8420     dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencLaUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
8421     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
8422     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
8423     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
8424     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
8425     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
8426 
8427     // wait Huc completion (use HEVC bit for now)
8428     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
8429     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
8430     vdPipeFlushParams.Flags.bFlushHEVC = 1;
8431     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
8432     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
8433 
8434     // Flush the engine to ensure memory written out
8435     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
8436     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
8437     flushDwParams.bVideoPipelineCacheInvalidate = true;
8438     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
8439 
8440     if (IsFirstPass())
8441     {
8442         // Write HUC_STATUS mask: DW1 (mask value)
8443         MHW_MI_STORE_DATA_PARAMS storeDataParams;
8444         MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
8445         storeDataParams.pOsResource = &m_resPakMmioBuffer;
8446         storeDataParams.dwResourceOffset = sizeof(uint32_t);
8447         storeDataParams.dwValue = CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK;
8448         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
8449 
8450         // store HUC_STATUS register: DW0 (actual value)
8451         CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum");
8452         auto mmioRegisters = m_hucInterface->GetMmioRegisters(m_vdboxIndex);
8453         MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
8454         MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
8455         storeRegParams.presStoreBuffer = &m_resPakMmioBuffer;
8456         storeRegParams.dwOffset = 0;
8457         storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
8458         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
8459     }
8460 
8461     // Write lookahead status to encode status buffer
8462     MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
8463     EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
8464     uint32_t baseOffset =
8465         (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2;  // pEncodeStatus is offset by 2 DWs in the resource
8466     MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS));
8467     miCpyMemMemParams.presSrc = &m_vdencLaDataBuffer;
8468     miCpyMemMemParams.dwSrcOffset = dmem->offset * sizeof(CodechalVdencHevcLaData) + CODECHAL_OFFSETOF(CodechalVdencHevcLaData, encodeHints);
8469     miCpyMemMemParams.presDst = &encodeStatusBuf.resStatusBuffer;
8470     miCpyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf.dwLookaheadStatusOffset + CODECHAL_OFFSETOF(LookaheadReport, encodeHints);
8471     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
8472     miCpyMemMemParams.dwSrcOffset = dmem->offset * sizeof(CodechalVdencHevcLaData) + CODECHAL_OFFSETOF(CodechalVdencHevcLaData, targetFrameSize);
8473     miCpyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf.dwLookaheadStatusOffset + CODECHAL_OFFSETOF(LookaheadReport, targetFrameSize);
8474     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
8475     miCpyMemMemParams.dwSrcOffset = dmem->offset * sizeof(CodechalVdencHevcLaData) + CODECHAL_OFFSETOF(CodechalVdencHevcLaData, targetBufferFulness);
8476     miCpyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf.dwLookaheadStatusOffset + CODECHAL_OFFSETOF(LookaheadReport, targetBufferFulness);
8477     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
8478     miCpyMemMemParams.dwSrcOffset = dmem->offset * sizeof(CodechalVdencHevcLaData) + CODECHAL_OFFSETOF(CodechalVdencHevcLaData, pyramidDeltaQP);
8479     miCpyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf.dwLookaheadStatusOffset + CODECHAL_OFFSETOF(LookaheadReport, pyramidDeltaQP);
8480     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
8481 
8482     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
8483     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
8484 
8485     CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
8486 
8487     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
8488     {
8489         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
8490     }
8491 
8492     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
8493 
8494     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
8495     {
8496         bool renderingFlags = m_videoContextUsesNullHw;
8497         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
8498     }
8499 
8500     return eStatus;
8501 }
8502 
AnalyzeLookaheadStats()8503 MOS_STATUS CodechalVdencHevcStateG12::AnalyzeLookaheadStats()
8504 {
8505     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8506 
8507     CODECHAL_ENCODE_FUNCTION_ENTER;
8508 
8509     if(IsFirstPass())
8510     {
8511         m_numValidLaRecords++;
8512     }
8513 
8514     if (m_lookaheadInit)
8515     {
8516         CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCLookaheadInit());
8517         m_lookaheadInit = false;
8518     }
8519 
8520     CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCLookaheadUpdate());
8521     if (IsLastPass() && (m_numValidLaRecords >= m_lookaheadDepth))
8522     {
8523         m_numValidLaRecords--;
8524         m_lookaheadReport = true;
8525     }
8526 
8527     int32_t currentPass = GetCurrentPass();
8528     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
8529         &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx][currentPass],
8530         CodechalDbgAttr::attrVdencOutput,
8531         "_LookaheadDmem",
8532         sizeof(CodechalVdencHevcLaDmem),
8533         0,
8534         CODECHAL_NUM_MEDIA_STATES)));
8535 
8536     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
8537         &m_vdencLaDataBuffer,
8538         CodechalDbgAttr::attrVdencOutput,
8539         "_LookaheadData",
8540         m_brcLooaheadDataBufferSize,
8541         0,
8542         CODECHAL_NUM_MEDIA_STATES)));
8543 
8544     CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
8545         &m_vdencLaHistoryBuffer,
8546         CodechalDbgAttr::attrVdencOutput,
8547         "_LookaheadHistory",
8548         m_LaHistoryBufSize,
8549         0,
8550         CODECHAL_NUM_MEDIA_STATES)));
8551 
8552     if (m_hevcPicParams->bLastPicInStream)
8553     {
8554         // Flush the last frames
8555         while (m_numValidLaRecords > 0)
8556         {
8557             CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCLookaheadUpdate());
8558             m_numValidLaRecords--;
8559         }
8560     }
8561 
8562     return eStatus;
8563 }
8564 
HuCBrcInitReset()8565 MOS_STATUS CodechalVdencHevcStateG12::HuCBrcInitReset()
8566 {
8567     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8568 
8569     CODECHAL_ENCODE_FUNCTION_ENTER;
8570 
8571 #if (_DEBUG || _RELEASE_INTERNAL) && _ENCODE_VDENC_RESERVED
8572     if (m_swBrcMode != nullptr && !m_enableTileReplay && !m_hevcVdencWeightedPredEnabled)
8573     {
8574         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
8575 
8576         MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
8577         MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
8578         virtualAddrParams.regionParams[0].presRegion = &m_vdencBrcHistoryBuffer;
8579         virtualAddrParams.regionParams[0].isWritable = true;
8580 
8581         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgCallHevcVdencSwBrcImpl(
8582             m_debugInterface,
8583             m_swBrcMode,
8584             CODECHAL_MEDIA_STATE_BRC_INIT_RESET,
8585             !m_brcInit,
8586             &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx],
8587             &m_resPakMmioBuffer,
8588             &virtualAddrParams));
8589 
8590         CODECHAL_DEBUG_TOOL(DumpHucBrcInit());
8591 
8592         return eStatus;
8593     }
8594 #endif
8595 
8596     MOS_COMMAND_BUFFER cmdBuffer;
8597     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
8598 
8599     if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && (m_numPipe == 1))
8600     {
8601         // Send command buffer header at the beginning (OS dependent)
8602         bool requestFrameTracking = m_singleTaskPhaseSupported ?
8603             m_firstTaskInPhase : 0;
8604         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
8605     }
8606 
8607     // load kernel from WOPCM into L2 storage RAM
8608     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
8609     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
8610     imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcInitKernelDescriptor;
8611 
8612     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
8613 
8614     // pipe mode select
8615     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
8616     pipeModeSelectParams.Mode = m_mode;
8617     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
8618 
8619     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
8620 
8621     // set HuC DMEM param
8622     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
8623     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
8624     dmemParams.presHucDataSource = &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx];
8625     dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcInitDmemBufferSize, CODECHAL_CACHELINE_SIZE);
8626     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
8627     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
8628 
8629     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
8630     MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
8631     virtualAddrParams.regionParams[0].presRegion = &m_vdencBrcHistoryBuffer;
8632     virtualAddrParams.regionParams[0].isWritable = true;
8633     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
8634 
8635     // Store HUC_STATUS2 register bit 6 before HUC_Start command
8636     // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
8637     // (HUC_Start command with last start bit set).
8638     CODECHAL_DEBUG_TOOL(
8639         CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
8640     )
8641 
8642     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
8643 
8644     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
8645 
8646     // wait Huc completion (use HEVC bit for now)
8647     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
8648     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
8649     vdPipeFlushParams.Flags.bFlushHEVC = 1;
8650     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
8651     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
8652 
8653     // Flush the engine to ensure memory written out
8654     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
8655     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
8656     flushDwParams.bVideoPipelineCacheInvalidate = true;
8657     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
8658 
8659     CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum");
8660     auto mmioRegisters = m_hucInterface->GetMmioRegisters(m_vdboxIndex);
8661     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, &cmdBuffer, true));
8662     CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(&cmdBuffer));
8663 
8664     if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd) && (m_numPipe == 1))
8665     {
8666         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
8667     }
8668 
8669     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
8670 
8671     if (!m_singleTaskPhaseSupported)
8672     {
8673         bool renderingFlags = m_videoContextUsesNullHw;
8674 
8675         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
8676             &cmdBuffer,
8677             CODECHAL_MEDIA_STATE_BRC_INIT_RESET,
8678             nullptr)));
8679         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
8680     }
8681 
8682     CODECHAL_DEBUG_TOOL(DumpHucBrcInit());
8683     return eStatus;
8684 }
8685 
HuCBrcUpdate()8686 MOS_STATUS CodechalVdencHevcStateG12::HuCBrcUpdate()
8687 {
8688     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8689 
8690     CODECHAL_ENCODE_FUNCTION_ENTER;
8691 
8692     *m_pipeBufAddrParams = {};
8693     if (m_pictureCodingType != I_TYPE)
8694     {
8695         for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
8696         {
8697             if (!m_picIdx[i].bValid || !m_currUsedRefPic[i])
8698             {
8699                 continue;
8700             }
8701 
8702             uint8_t idx = m_picIdx[i].ucPicIdx;
8703             CodecHalGetResourceInfo(m_osInterface, &(m_refList[idx]->sRefReconBuffer));
8704 
8705             uint8_t frameStoreId = (uint8_t)m_refIdxMapping[i];
8706             m_pipeBufAddrParams->presReferences[frameStoreId] = &(m_refList[idx]->sRefReconBuffer.OsResource);
8707         }
8708     }
8709 
8710     if (m_enableSCC && m_hevcPicParams->pps_curr_pic_ref_enabled_flag)
8711     {
8712         // I frame is much simpler
8713         if (m_pictureCodingType == I_TYPE)
8714         {
8715             m_slotForRecNotFiltered = 0;
8716         }
8717         // LDB
8718         else
8719         {
8720             unsigned int i;
8721 
8722             // Find one available slot
8723             for (i = 0; i < CODECHAL_MAX_CUR_NUM_REF_FRAME_HEVC; i++)
8724             {
8725                 if (m_pipeBufAddrParams->presReferences[i] == nullptr)
8726                 {
8727                     break;
8728                 }
8729             }
8730 
8731             CODECHAL_ENCODE_ASSERT(i < CODECHAL_MAX_CUR_NUM_REF_FRAME_HEVC);
8732 
8733             //record the slot for HCP_REF_IDX_STATE
8734             m_slotForRecNotFiltered = (unsigned char)i;
8735         }
8736     }
8737 
8738     int32_t currentPass = GetCurrentPass();
8739     if (currentPass < 0)
8740     {
8741         eStatus = MOS_STATUS_INVALID_PARAMETER;
8742         return eStatus;
8743     }
8744 
8745     CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructBatchBufferHuCBRC(&m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass]));
8746 
8747 #if (_DEBUG || _RELEASE_INTERNAL) && _ENCODE_VDENC_RESERVED
8748     if (m_swBrcMode != nullptr && !m_enableTileReplay && !m_hevcVdencWeightedPredEnabled)
8749     {
8750         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
8751         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetConstDataHuCBrcUpdate());
8752         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCBrcUpdate(&m_virtualAddrParams));
8753 
8754         CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(true));
8755 
8756         CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgCallHevcVdencSwBrcImpl(
8757             m_debugInterface,
8758             m_swBrcMode,
8759             CODECHAL_MEDIA_STATE_BRC_UPDATE,
8760             (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW),
8761             &m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][m_currPass],
8762             &m_resPakMmioBuffer,
8763             &m_virtualAddrParams));
8764 
8765         CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(false));
8766 
8767         return eStatus;
8768     }
8769 #endif
8770 
8771     MOS_COMMAND_BUFFER cmdBuffer;
8772     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
8773 
8774     if (((!m_singleTaskPhaseSupported) || ((m_firstTaskInPhase) && (!m_brcInit))) && (m_numPipe == 1))
8775     {
8776         // Send command buffer header at the beginning (OS dependent)
8777         bool requestFrameTracking = m_singleTaskPhaseSupported ?
8778             m_firstTaskInPhase : 0;
8779         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
8780     }
8781 
8782     // load kernel from WOPCM into L2 storage RAM
8783     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
8784     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
8785 
8786     if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)  // Low Delay BRC
8787     {
8788         imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcLowdelayKernelDescriptor;
8789     }
8790     else
8791     {
8792         imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcUpdateKernelDescriptor;
8793     }
8794 
8795     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
8796 
8797     // pipe mode select
8798     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
8799     pipeModeSelectParams.Mode = m_mode;
8800     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
8801 
8802     // DMEM set
8803     m_CurrentPassForOverAll = 0;
8804     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
8805 
8806     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
8807     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
8808     dmemParams.presHucDataSource = &(m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][currentPass]);
8809     dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
8810     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
8811 
8812     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
8813 
8814     // Set Const Data buffer
8815     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetConstDataHuCBrcUpdate());
8816 
8817     // Add Virtual addr
8818     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCBrcUpdate(&m_virtualAddrParams));
8819     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &m_virtualAddrParams));
8820 
8821     // Store HUC_STATUS2 register bit 6 before HUC_Start command
8822     // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
8823     // (HUC_Start command with last start bit set).
8824     CODECHAL_DEBUG_TOOL(
8825         CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
8826     )
8827 
8828     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
8829 
8830     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
8831 
8832     // wait Huc completion (use HEVC bit for now)
8833     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
8834     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
8835     vdPipeFlushParams.Flags.bFlushHEVC = 1;
8836     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
8837     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
8838 
8839     // Flush the engine to ensure memory written out
8840     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
8841     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
8842     flushDwParams.bVideoPipelineCacheInvalidate = true;
8843     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
8844 
8845     // Write HUC_STATUS mask: DW1 (mask value)
8846     MHW_MI_STORE_DATA_PARAMS storeDataParams;
8847     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
8848     storeDataParams.pOsResource = &m_resPakMmioBuffer;
8849     storeDataParams.dwResourceOffset = sizeof(uint32_t);
8850     storeDataParams.dwValue = CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK;
8851     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
8852 
8853     // store HUC_STATUS register: DW0 (actual value)
8854     CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum");
8855     auto mmioRegisters = m_hucInterface->GetMmioRegisters(m_vdboxIndex);
8856     MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
8857     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
8858     storeRegParams.presStoreBuffer = &m_resPakMmioBuffer;
8859     storeRegParams.dwOffset = 0;
8860     storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
8861     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
8862 
8863     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, &cmdBuffer, true));
8864     CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(&cmdBuffer));
8865 
8866     // DW0 & DW1 will considered together for conditional batch buffer end cmd later
8867     if ((!m_singleTaskPhaseSupported) && (m_osInterface->bNoParsingAssistanceInKmd) && (m_numPipe == 1))
8868     {
8869         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
8870     }
8871 
8872     // HuC Input
8873     CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(true));
8874 
8875     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
8876 
8877     if (!m_singleTaskPhaseSupported)
8878     {
8879         bool renderingFlags = m_videoContextUsesNullHw;
8880 
8881         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
8882             &cmdBuffer,
8883             CODECHAL_MEDIA_STATE_BRC_UPDATE,
8884             nullptr)));
8885         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
8886     }
8887 
8888     // HuC Output
8889     CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(false));
8890 
8891     return eStatus;
8892 }
8893 
HuCBrcTileRowUpdate(PMOS_COMMAND_BUFFER cmdBuffer)8894 MOS_STATUS CodechalVdencHevcStateG12::HuCBrcTileRowUpdate(PMOS_COMMAND_BUFFER cmdBuffer)
8895 {
8896     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8897 
8898     CODECHAL_ENCODE_FUNCTION_ENTER;
8899 
8900     MOS_LOCK_PARAMS lockFlags;
8901     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
8902     lockFlags.WriteOnly = true;
8903 
8904     uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow].OsResource), &lockFlags);
8905     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8906 
8907     MOS_COMMAND_BUFFER tileRowBRCBatchBuf;
8908     MOS_ZeroMemory(&tileRowBRCBatchBuf, sizeof(tileRowBRCBatchBuf));
8909     tileRowBRCBatchBuf.pCmdBase = tileRowBRCBatchBuf.pCmdPtr = (uint32_t *)data;
8910     tileRowBRCBatchBuf.iRemaining = m_hwInterface->m_hucCommandBufferSize;
8911 
8912     // Add batch buffer start for tile row BRC batch
8913     HalOcaInterface::OnSubLevelBBStart(*cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, &m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow].OsResource, 0, true, 0);
8914     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(cmdBuffer, &m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow]));
8915 
8916     // load kernel from WOPCM into L2 storage RAM
8917     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
8918     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
8919 
8920     if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)  // Low Delay BRC
8921     {
8922         imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcLowdelayKernelDescriptor;
8923     }
8924     else
8925     {
8926         imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcUpdateKernelDescriptor;
8927     }
8928 
8929     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&tileRowBRCBatchBuf, &imemParams));
8930 
8931     // pipe mode select
8932     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
8933     pipeModeSelectParams.Mode = m_mode;
8934     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&tileRowBRCBatchBuf, &pipeModeSelectParams));
8935 
8936     // DMEM set
8937     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
8938 
8939     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
8940     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
8941     dmemParams.presHucDataSource = &(m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][m_CurrentPassForOverAll]);
8942     dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
8943     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
8944 
8945     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&tileRowBRCBatchBuf, &dmemParams));
8946 
8947     // Set Const Data buffer
8948     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetConstDataHuCBrcUpdate());
8949 
8950     // Add Virtual addr
8951     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
8952     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCTileRowBrcUpdate(&virtualAddrParams));
8953     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&tileRowBRCBatchBuf, &virtualAddrParams));
8954 
8955     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(cmdBuffer));
8956 
8957     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&tileRowBRCBatchBuf, true));
8958 
8959     // wait Huc completion (use HEVC bit for now)
8960     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
8961     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
8962     vdPipeFlushParams.Flags.bFlushHEVC = 1;
8963     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
8964     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&tileRowBRCBatchBuf, &vdPipeFlushParams));
8965 
8966     // Flush the engine to ensure memory written out
8967     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
8968     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
8969     flushDwParams.bVideoPipelineCacheInvalidate = true;
8970     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&tileRowBRCBatchBuf, &flushDwParams));
8971 
8972     // Write HUC_STATUS mask: DW1 (mask value)
8973     MHW_MI_STORE_DATA_PARAMS storeDataParams;
8974     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
8975     storeDataParams.pOsResource = &m_resPakMmioBuffer;
8976     storeDataParams.dwResourceOffset = sizeof(uint32_t);
8977     storeDataParams.dwValue = CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK;
8978     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&tileRowBRCBatchBuf, &storeDataParams));
8979 
8980     // store HUC_STATUS register: DW0 (actual value)
8981     CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum");
8982     auto mmioRegisters = m_hucInterface->GetMmioRegisters(m_vdboxIndex);
8983     MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
8984     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
8985     storeRegParams.presStoreBuffer = &m_resPakMmioBuffer;
8986     storeRegParams.dwOffset = 0;
8987     storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
8988     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&tileRowBRCBatchBuf, &storeRegParams));
8989 
8990     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, &tileRowBRCBatchBuf, true));
8991     CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(&tileRowBRCBatchBuf));
8992 
8993     // Set the tile row BRC update sync semaphore
8994     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
8995     storeDataParams.pOsResource = &m_resTileRowBRCsyncSemaphore;
8996     storeDataParams.dwValue     = 0xFF;
8997     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&tileRowBRCBatchBuf, &storeDataParams));
8998 
8999     (&m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow])->iCurrent = tileRowBRCBatchBuf.iOffset;
9000     (&m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow])->iRemaining = tileRowBRCBatchBuf.iRemaining;
9001     (&m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow])->pData = data;
9002     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow]));
9003 
9004     if (data)
9005     {
9006         m_osInterface->pfnUnlockResource(m_osInterface, &(m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow].OsResource));
9007     }
9008 
9009     return eStatus;
9010 }
9011 
UpdateCmdBufAttribute(PMOS_COMMAND_BUFFER cmdBuffer,bool renderEngineInUse)9012 MOS_STATUS CodechalVdencHevcStateG12::UpdateCmdBufAttribute(
9013     PMOS_COMMAND_BUFFER cmdBuffer,
9014     bool                renderEngineInUse)
9015 {
9016     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9017 
9018     // should not be there. Will remove it in the next change
9019     CODECHAL_ENCODE_FUNCTION_ENTER;
9020     if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe)
9021     {
9022         PMOS_CMD_BUF_ATTRI_VE attriExt =
9023             (PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe);
9024 
9025         memset(attriExt, 0, sizeof(MOS_CMD_BUF_ATTRI_VE));
9026         attriExt->bUseVirtualEngineHint =
9027             attriExt->VEngineHintParams.NeedSyncWithPrevious = !renderEngineInUse;
9028     }
9029 
9030     return eStatus;
9031 }
9032 
AddMediaVfeCmd(PMOS_COMMAND_BUFFER cmdBuffer,SendKernelCmdsParams * params)9033 MOS_STATUS CodechalVdencHevcStateG12::AddMediaVfeCmd(
9034     PMOS_COMMAND_BUFFER cmdBuffer,
9035     SendKernelCmdsParams *params)
9036 {
9037     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
9038 
9039     MHW_VFE_PARAMS_G12 vfeParams = {};
9040     vfeParams.pKernelState              = params->pKernelState;
9041     vfeParams.eVfeSliceDisable          = MHW_VFE_SLICE_ALL;
9042     vfeParams.dwMaximumNumberofThreads  = m_encodeVfeMaxThreads;
9043     vfeParams.bFusedEuDispatch          = false; // legacy mode
9044 
9045     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaVfeCmd(cmdBuffer, &vfeParams));
9046 
9047     return MOS_STATUS_SUCCESS;
9048 }
9049 
SetStreaminDataPerLcu(PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,void * streaminData)9050 void CodechalVdencHevcStateG12::SetStreaminDataPerLcu(
9051     PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,
9052     void* streaminData)
9053 {
9054     CODECHAL_ENCODE_FUNCTION_ENTER;
9055     PCODECHAL_VDENC_HEVC_STREAMIN_STATE_G12 data = (PCODECHAL_VDENC_HEVC_STREAMIN_STATE_G12)streaminData;
9056     if (streaminParams->setQpRoiCtrl)
9057     {
9058         if (m_vdencNativeROIEnabled || m_brcAdaptiveRegionBoostEnable)
9059         {
9060             data->DW0.RoiCtrl = streaminParams->roiCtrl;
9061         }
9062         else
9063         {
9064             data->DW7.QpEnable = 0xf;
9065             data->DW14.ForceQp_0 = streaminParams->forceQp[0];
9066             data->DW14.ForceQp_1 = streaminParams->forceQp[1];
9067             data->DW14.ForceQp_2 = streaminParams->forceQp[2];
9068             data->DW14.ForceQp_3 = streaminParams->forceQp[3];
9069         }
9070     }
9071     else
9072     {
9073         data->DW0.MaxTuSize = streaminParams->maxTuSize;
9074         data->DW0.MaxCuSize = streaminParams->maxCuSize;
9075         data->DW0.NumImePredictors = streaminParams->numImePredictors;
9076         data->DW0.PuTypeCtrl = streaminParams->puTypeCtrl;
9077         data->DW6.NumMergeCandidateCu64x64 = streaminParams->numMergeCandidateCu64x64;
9078         data->DW6.NumMergeCandidateCu32x32 = streaminParams->numMergeCandidateCu32x32;
9079         data->DW6.NumMergeCandidateCu16x16 = streaminParams->numMergeCandidateCu16x16;
9080         data->DW6.NumMergeCandidateCu8x8 = streaminParams->numMergeCandidateCu8x8;
9081     }
9082 }
9083 
GetTileInfo(uint32_t xPosition,uint32_t yPosition,uint32_t * tileId,uint32_t * tileEndLCUX,uint32_t * tileEndLCUY)9084 void CodechalVdencHevcStateG12::GetTileInfo(
9085     uint32_t xPosition,
9086     uint32_t yPosition,
9087     uint32_t* tileId,
9088     uint32_t* tileEndLCUX,
9089     uint32_t* tileEndLCUY)
9090 {
9091     *tileId = 0;
9092     uint32_t ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
9093     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
9094 
9095     for (uint8_t i = 0; i < m_numTiles; i++)
9096     {
9097         uint32_t tileWidthInLCU = MOS_ROUNDUP_DIVIDE(((tileParams[i].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
9098         uint32_t tileHeightInLCU = MOS_ROUNDUP_DIVIDE(((tileParams[i].TileHeightInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
9099         *tileEndLCUX = tileParams[i].TileStartLCUX + tileWidthInLCU;
9100         *tileEndLCUY = tileParams[i].TileStartLCUY + tileHeightInLCU;
9101 
9102         if (xPosition >= (tileParams[i].TileStartLCUX * 2) &&
9103             yPosition >= (tileParams[i].TileStartLCUY * 2) &&
9104             xPosition < (*tileEndLCUX * 2) &&
9105             yPosition < (*tileEndLCUY * 2))
9106         {
9107             *tileId = i;
9108             break;
9109         }
9110     }
9111 }
9112 
PrepareVDEncStreamInData()9113 MOS_STATUS CodechalVdencHevcStateG12::PrepareVDEncStreamInData()
9114 {
9115     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9116 
9117     CODECHAL_ENCODE_FUNCTION_ENTER;
9118 
9119     if (m_lookaheadPass && m_firstFrame)
9120     {
9121         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupForceIntraStreamIn(&m_resVdencStreamInBuffer[0]));
9122     }
9123 
9124     if (m_hevcPicParams->tiles_enabled_flag)
9125     {
9126         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData(m_tileParams[m_virtualEngineBbIndex]));
9127     }
9128     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::PrepareVDEncStreamInData());
9129 
9130     return eStatus;
9131 }
9132 
SetStreaminDataPerRegion(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,void * streaminData)9133 void CodechalVdencHevcStateG12::SetStreaminDataPerRegion(
9134     uint32_t streamInWidth,
9135     uint32_t top,
9136     uint32_t bottom,
9137     uint32_t left,
9138     uint32_t right,
9139     PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,
9140     void* streaminData)
9141 {
9142     CODECHAL_ENCODE_FUNCTION_ENTER;
9143 
9144     if (!m_hevcPicParams->tiles_enabled_flag)
9145     {
9146         CodechalVdencHevcState::SetStreaminDataPerRegion(streamInWidth, top, bottom, left, right, streaminParams, streaminData);
9147         return;
9148     }
9149 
9150     uint8_t* data = (uint8_t*)streaminData;
9151     uint32_t tileId = 0, tileEndLCUX = 0, tileEndLCUY = 0;
9152     uint32_t ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
9153     GetTileInfo(left, top, &tileId, &tileEndLCUX, &tileEndLCUY);
9154 
9155     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
9156 
9157     for (auto y = top; y < bottom; y++)
9158     {
9159         for (auto x = left; x < right; x++)
9160         {
9161             uint32_t streamInBaseOffset = 0, offset = 0, xyOffset = 0;
9162 
9163             if (x < (tileParams[tileId].TileStartLCUX * 2) ||
9164                 y < (tileParams[tileId].TileStartLCUY * 2) ||
9165                 x >= (tileEndLCUX * 2) ||
9166                 y >= (tileEndLCUY * 2))
9167             {
9168                 GetTileInfo(x, y, &tileId, &tileEndLCUX, &tileEndLCUY);
9169             }
9170             streamInBaseOffset = tileParams[tileId].TileStreaminOffset;
9171 
9172             auto xPositionInTile = x - (tileParams[tileId].TileStartLCUX * 2);
9173             auto yPositionInTile = y - (tileParams[tileId].TileStartLCUY * 2);
9174             auto tileWidthInLCU = MOS_ROUNDUP_DIVIDE(((tileParams[tileId].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
9175 
9176             StreaminZigZagToLinearMap(tileWidthInLCU * 2, xPositionInTile, yPositionInTile, &offset, &xyOffset);
9177 
9178             SetStreaminDataPerLcu(streaminParams, data + (streamInBaseOffset + offset + xyOffset) * 64);
9179         }
9180     }
9181 }
9182 
SetBrcRoiDeltaQpMap(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,uint8_t regionId,PDeltaQpForROI deltaQpMap)9183 void CodechalVdencHevcStateG12::SetBrcRoiDeltaQpMap(
9184     uint32_t streamInWidth,
9185     uint32_t top,
9186     uint32_t bottom,
9187     uint32_t left,
9188     uint32_t right,
9189     uint8_t regionId,
9190     PDeltaQpForROI deltaQpMap)
9191 {
9192 
9193     CODECHAL_ENCODE_FUNCTION_ENTER;
9194 
9195     if (!m_hevcPicParams->tiles_enabled_flag)
9196     {
9197         CodechalVdencHevcState::SetBrcRoiDeltaQpMap(streamInWidth, top, bottom, left, right, regionId, deltaQpMap);
9198         return;
9199     }
9200 
9201     uint32_t tileId = 0, tileEndLCUX = 0, tileEndLCUY = 0;
9202     uint32_t ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
9203     GetTileInfo(left, top, &tileId, &tileEndLCUX, &tileEndLCUY);
9204 
9205     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
9206 
9207     for (auto y = top; y < bottom; y++)
9208     {
9209         for (auto x = left; x < right; x++)
9210         {
9211             uint32_t streamInBaseOffset = 0, offset = 0, xyOffset = 0;
9212 
9213             if (x < (tileParams[tileId].TileStartLCUX * 2) ||
9214                 y < (tileParams[tileId].TileStartLCUY * 2) ||
9215                 x >= (tileEndLCUX * 2) ||
9216                 y >= (tileEndLCUY * 2))
9217             {
9218                 GetTileInfo(x, y, &tileId, &tileEndLCUX, &tileEndLCUY);
9219             }
9220             streamInBaseOffset = tileParams[tileId].TileStreaminOffset;
9221 
9222             auto xPositionInTile = x - (tileParams[tileId].TileStartLCUX * 2);
9223             auto yPositionInTile = y - (tileParams[tileId].TileStartLCUY * 2);
9224             auto tileWidthInLCU = MOS_ROUNDUP_DIVIDE(((tileParams[tileId].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
9225 
9226             StreaminZigZagToLinearMap(tileWidthInLCU * 2, xPositionInTile, yPositionInTile, &offset, &xyOffset);
9227 
9228             (deltaQpMap + (streamInBaseOffset + offset + xyOffset))->iDeltaQp = m_hevcPicParams->ROI[regionId].PriorityLevelOrDQp;
9229         }
9230     }
9231 }
9232 
SetAndPopulateVEHintParams(PMOS_COMMAND_BUFFER cmdBuffer)9233 MOS_STATUS CodechalVdencHevcStateG12::SetAndPopulateVEHintParams(
9234     PMOS_COMMAND_BUFFER  cmdBuffer)
9235 {
9236     MOS_STATUS                      eStatus = MOS_STATUS_SUCCESS;
9237 
9238     CODECHAL_ENCODE_FUNCTION_ENTER;
9239 
9240     if (!MOS_VE_SUPPORTED(m_osInterface))
9241     {
9242         return eStatus;
9243     }
9244 
9245     CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS scalSetParms;
9246     MOS_ZeroMemory(&scalSetParms, sizeof(CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS));
9247 
9248     if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
9249     {
9250         scalSetParms.bNeedSyncWithPrevious = true;
9251     }
9252 
9253     int32_t currentPass = GetCurrentPass();
9254     if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
9255     {
9256         eStatus = MOS_STATUS_INVALID_PARAMETER;
9257         return eStatus;
9258     }
9259     uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
9260     if (m_numPipe >= 2)
9261     {
9262         for (auto i = 0; i < m_numPipe; i++)
9263         {
9264             scalSetParms.veBatchBuffer[i] = m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex].OsResource;
9265         }
9266     }
9267 
9268     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_SetHintParams(this, m_scalabilityState, &scalSetParms));
9269     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
9270     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_PopulateHintParams(m_scalabilityState, cmdBuffer));
9271 
9272     return eStatus;
9273 }
9274 
9275 #if USE_CODECHAL_DEBUG_TOOL
DumpVdencOutputs()9276 MOS_STATUS CodechalVdencHevcStateG12::DumpVdencOutputs()
9277 {
9278     CODECHAL_ENCODE_FUNCTION_ENTER;
9279 
9280     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::DumpVdencOutputs());
9281 
9282     if (m_hevcPicParams->tiles_enabled_flag)
9283     {
9284         PMOS_RESOURCE presVdencTileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;
9285         auto          num_tiles                     = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1);
9286         auto          vdencStatsSizeAllTiles        = num_tiles * m_vdencBrcStatsBufferSize;
9287         auto          vdencStatsOffset              = m_hevcTileStatsOffset.uiVdencStatistics;
9288         auto          pakStatsSizeAllTiles          = num_tiles * 9 * CODECHAL_CACHELINE_SIZE;
9289         auto          pakStatsOffset                = m_hevcTileStatsOffset.uiHevcPakStatistics;
9290 
9291         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
9292             presVdencTileStatisticsBuffer,
9293             CodechalDbgAttr::attrVdencOutput,
9294             "_TileVDEncStats",
9295             vdencStatsSizeAllTiles,
9296             vdencStatsOffset,
9297             CODECHAL_NUM_MEDIA_STATES));
9298 
9299         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
9300             presVdencTileStatisticsBuffer,
9301             CodechalDbgAttr::attrPakOutput,
9302             "_TilePAKStats",
9303             pakStatsSizeAllTiles,
9304             pakStatsOffset,
9305             CODECHAL_NUM_MEDIA_STATES));
9306 
9307         // Slice Size Conformance
9308         if (m_hevcSeqParams->SliceSizeControl)
9309         {
9310             PMOS_RESOURCE presLcuBaseAddressBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;
9311             auto          sliceStreamoutOffset     = m_hevcTileStatsOffset.uiHevcSliceStreamout;
9312             uint32_t size = m_numLcu * CODECHAL_CACHELINE_SIZE;
9313             // Slice Size StreamOut Surface
9314             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
9315                 presLcuBaseAddressBuffer,
9316                 CodechalDbgAttr::attrVdencOutput,
9317                 "_SliceSize",
9318                 size,
9319                 sliceStreamoutOffset,
9320                 CODECHAL_NUM_MEDIA_STATES));
9321         }
9322     }
9323 
9324     return MOS_STATUS_SUCCESS;
9325 }
9326 
DumpHucDebugOutputBuffers()9327 MOS_STATUS CodechalVdencHevcStateG12::DumpHucDebugOutputBuffers()
9328 {
9329     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9330 
9331     // Virtual Engine does only one submit per pass. Dump all HuC debug outputs
9332     bool dumpDebugBuffers = IsLastPipe() && (m_numPipe > 1);
9333     if (m_singleTaskPhaseSupported)
9334     {
9335         dumpDebugBuffers = dumpDebugBuffers && IsLastPass();
9336     }
9337 
9338     if (dumpDebugBuffers)
9339     {
9340         CODECHAL_DEBUG_TOOL(
9341             DumpHucPakIntegrate();
9342             DumpHucCqp();
9343            )
9344     }
9345 
9346     return eStatus;
9347 }
9348 
DumpHucPakIntegrate()9349 MOS_STATUS CodechalVdencHevcStateG12::DumpHucPakIntegrate()
9350 {
9351     int32_t currentPass = GetCurrentPass();
9352     // HuC Input
9353     // HuC DMEM
9354     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
9355         &m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass],
9356         MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemVdencG12), CODECHAL_CACHELINE_SIZE),
9357         currentPass,
9358         hucRegionDumpPakIntegrate));
9359 
9360     CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9361         &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
9362         0,
9363         m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize,
9364         0,
9365         "",
9366         true,
9367         currentPass,
9368         hucRegionDumpPakIntegrate));
9369 
9370     CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9371         &m_resHuCPakAggregatedFrameStatsBuffer.sResource,
9372         0,
9373         m_resHuCPakAggregatedFrameStatsBuffer.dwSize,
9374         1,
9375         "",
9376         false,
9377         currentPass,
9378         hucRegionDumpPakIntegrate));
9379 
9380     PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
9381     CODECHAL_ENCODE_CHK_NULL_RETURN(tileParams);
9382 
9383     auto bitStreamSize = m_encodeParams.dwBitstreamSize -
9384         MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
9385 
9386     CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9387         &m_resBitstreamBuffer,
9388         MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE),
9389         bitStreamSize,
9390         4,
9391         "",
9392         true,
9393         currentPass,
9394         hucRegionDumpPakIntegrate));
9395 
9396     CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9397         &m_resBitstreamBuffer,
9398         MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE),
9399         bitStreamSize,
9400         5,
9401         "",
9402         false,
9403         currentPass,
9404         hucRegionDumpPakIntegrate));
9405 
9406     // Region 6 - BRC History buffer
9407     CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9408         &m_vdencBrcHistoryBuffer,
9409         0,
9410         CODECHAL_VDENC_HEVC_BRC_HISTORY_BUF_SIZE,
9411         6,
9412         "",
9413         false,
9414         currentPass,
9415         hucRegionDumpPakIntegrate));
9416 
9417     CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9418         &m_thirdLevelBatchBuffer.OsResource,
9419         0,
9420         m_thirdLBSize,
9421         7,
9422         "",
9423         true,
9424         currentPass,
9425         hucRegionDumpPakIntegrate));
9426 
9427     // Region 8
9428     CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9429         &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass],
9430         0,
9431         MOS_ALIGN_CEIL(sizeof(HucCommandDataVdencG12), CODECHAL_PAGE_SIZE),
9432         8,
9433         "",
9434         true,
9435         currentPass,
9436         hucRegionDumpPakIntegrate));
9437 
9438     // Region 9 - HCP BRC Data Output
9439     CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9440         &m_resBrcDataBuffer,
9441         0,
9442         CODECHAL_CACHELINE_SIZE,
9443         9,
9444         "",
9445         false,
9446         currentPass,
9447         hucRegionDumpPakIntegrate));
9448 
9449     // Region 10
9450     CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9451         &m_HucStitchCmdBatchBuffer.OsResource,
9452         0,
9453         m_hwInterface->m_HucStitchCmdBatchBufferSize,
9454         10,
9455         "",
9456         false,
9457         currentPass,
9458         hucRegionDumpPakIntegrate));
9459 
9460     CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9461         &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource,
9462         0,
9463         m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize,
9464         15,
9465         "",
9466         true,
9467         currentPass,
9468         hucRegionDumpPakIntegrate));
9469 
9470     return MOS_STATUS_SUCCESS;
9471 }
9472 
DumpHucCqp()9473 MOS_STATUS CodechalVdencHevcStateG12::DumpHucCqp()
9474 {
9475     CODECHAL_ENCODE_FUNCTION_ENTER;
9476     int32_t currentPass = GetCurrentPass();
9477 
9478     // Region 5 - Output SLB Buffer
9479     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
9480         &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource,
9481         0,
9482         m_hwInterface->m_vdenc2ndLevelBatchBufferSize,
9483         5,
9484         "_Out_Slb",
9485         false,
9486         currentPass,
9487         hucRegionDumpUpdate));
9488 
9489     return MOS_STATUS_SUCCESS;
9490 }
9491 #endif
9492 
SetRoundingValues()9493 MOS_STATUS CodechalVdencHevcStateG12::SetRoundingValues()
9494 {
9495     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9496 
9497     CODECHAL_ENCODE_FUNCTION_ENTER;
9498 
9499     if (m_hevcPicParams->CustomRoundingOffsetsParams.fields.EnableCustomRoudingIntra)
9500     {
9501         m_roundIntraValue = m_hevcPicParams->CustomRoundingOffsetsParams.fields.RoundingOffsetIntra;
9502     }
9503     else
9504     {
9505         if (m_hevcPicParams->CodingType == I_TYPE)
9506         {
9507             m_roundIntraValue = 10;
9508         }
9509         else if (m_hevcSeqParams->HierarchicalFlag && m_hevcPicParams->HierarchLevelPlus1 > 0)
9510         {
9511             if (m_hevcPicParams->HierarchLevelPlus1 == 1)
9512             {
9513                 m_roundIntraValue = 10;
9514             }
9515             else if (m_hevcPicParams->HierarchLevelPlus1 == 2)
9516             {
9517                 m_roundIntraValue = 9;
9518             }
9519             else
9520             {
9521                 m_roundIntraValue = 8;
9522             }
9523         }
9524         else
9525         {
9526             m_roundIntraValue = 10;
9527         }
9528     }
9529 
9530     if (m_hevcPicParams->CustomRoundingOffsetsParams.fields.EnableCustomRoudingInter)
9531     {
9532         m_roundInterValue = m_hevcPicParams->CustomRoundingOffsetsParams.fields.RoundingOffsetInter;
9533     }
9534     else
9535     {
9536         if (m_hevcPicParams->CodingType == I_TYPE)
9537         {
9538             m_roundInterValue = 4;
9539         }
9540         else if (m_hevcSeqParams->HierarchicalFlag && m_hevcPicParams->HierarchLevelPlus1 > 0)
9541         {
9542             if (m_hevcPicParams->HierarchLevelPlus1 == 1)
9543             {
9544                 m_roundInterValue = 4;
9545             }
9546             else if (m_hevcPicParams->HierarchLevelPlus1 == 2)
9547             {
9548                 m_roundInterValue = 3;
9549             }
9550             else
9551             {
9552                 m_roundInterValue = 2;
9553             }
9554         }
9555         else
9556         {
9557             m_roundInterValue = 4;
9558         }
9559     }
9560 
9561     return eStatus;
9562 }
9563 
SetAddCommands(uint32_t commandType,PMOS_COMMAND_BUFFER cmdBuffer,bool addToBatchBufferHuCBRC,uint32_t roundInterValue,uint32_t roundIntraValue,bool isLowDelayB,int8_t * pRefIdxMapping,int8_t recNotFilteredID)9564 MOS_STATUS CodechalVdencHevcStateG12::SetAddCommands(uint32_t commandType, PMOS_COMMAND_BUFFER cmdBuffer, bool addToBatchBufferHuCBRC, uint32_t roundInterValue, uint32_t roundIntraValue, bool isLowDelayB, int8_t * pRefIdxMapping, int8_t recNotFilteredID)
9565 {
9566 #ifdef _HEVC_ENCODE_VDENC_SUPPORTED
9567     void *pCmdParams = nullptr;
9568 
9569     if (commandType == CODECHAL_CMD1)
9570     {
9571         // Send CMD1 command
9572         MHW_VDBOX_VDENC_CMD1_PARAMS  cmd1Params;
9573         MOS_ZeroMemory(&cmd1Params, sizeof(cmd1Params));
9574         cmd1Params.Mode = CODECHAL_ENCODE_MODE_HEVC;
9575         cmd1Params.pHevcEncPicParams = m_hevcPicParams;
9576         cmd1Params.pHevcEncSlcParams = m_hevcSliceParams;
9577         cmd1Params.pInputParams      = pCmdParams;
9578         cmd1Params.bHevcVisualQualityImprovement = m_hevcVisualQualityImprovement;
9579         //down cast?
9580         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencCmd1Cmd(cmdBuffer, nullptr, &cmd1Params));
9581     }
9582     else if (commandType == CODECHAL_CMD2)
9583     {
9584         PMHW_VDBOX_VDENC_CMD2_STATE cmd2Params(new MHW_VDBOX_VDENC_CMD2_STATE);
9585         CODECHAL_ENCODE_CHK_NULL_RETURN(cmd2Params);
9586 
9587         // set CMD2 command
9588         cmd2Params->Mode                    = CODECHAL_ENCODE_MODE_HEVC;
9589         cmd2Params->pHevcEncSeqParams       = m_hevcSeqParams;
9590         cmd2Params->pHevcEncPicParams       = m_hevcPicParams;
9591         cmd2Params->pHevcEncSlcParams       = m_hevcSliceParams;
9592         cmd2Params->bRoundingEnabled        = m_hevcVdencRoundingEnabled;
9593         cmd2Params->bPakOnlyMultipassEnable = m_pakOnlyPass;
9594         cmd2Params->bUseDefaultQpDeltas     = (m_hevcVdencAcqpEnabled && cmd2Params->pHevcEncSeqParams->QpAdjustment) ||
9595                                           (m_brcEnabled && cmd2Params->pHevcEncSeqParams->MBBRC != mbBrcDisabled);
9596         cmd2Params->bPanicEnabled                 = (m_brcEnabled) && (m_panicEnable) && (IsLastPass()) && !m_pakOnlyPass;
9597         cmd2Params->bStreamInEnabled              = m_vdencStreamInEnabled;
9598         cmd2Params->bROIStreamInEnabled           = m_vdencNativeROIEnabled;
9599         cmd2Params->bTileReplayEnable             = m_enableTileReplay;
9600         cmd2Params->bIsLowDelayB                  = isLowDelayB;
9601         cmd2Params->bCaptureModeEnable            = m_CaptureModeEnable;
9602         cmd2Params->m_WirelessSessionID           = 0;
9603         cmd2Params->pRefIdxMapping                = pRefIdxMapping;
9604         cmd2Params->recNotFilteredID              = recNotFilteredID;
9605         cmd2Params->pInputParams                  = pCmdParams;
9606         cmd2Params->ucNumRefIdxL0ActiveMinus1     = cmd2Params->pHevcEncSlcParams->num_ref_idx_l0_active_minus1;
9607         cmd2Params->bHevcVisualQualityImprovement = m_hevcVisualQualityImprovement;
9608         cmd2Params->roundInterValue               = roundInterValue;
9609         cmd2Params->roundIntraValue               = roundIntraValue;
9610         cmd2Params->bROIStreamInEnabled           = m_brcAdaptiveRegionBoostEnable ? true : cmd2Params->bROIStreamInEnabled;
9611         cmd2Params->bEnableSubPelMode             = m_encodeParams.bEnableSubPelMode;
9612         cmd2Params->SubPelMode                    = m_encodeParams.SubPelMode;
9613 
9614         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencCmd2Cmd(cmdBuffer, nullptr, cmd2Params));
9615     }
9616 #endif
9617     return MOS_STATUS_SUCCESS;
9618 }
9619 
InsertConditionalBBEndWithHucErrorStatus(PMOS_COMMAND_BUFFER cmdBuffer)9620 MOS_STATUS CodechalVdencHevcStateG12::InsertConditionalBBEndWithHucErrorStatus(PMOS_COMMAND_BUFFER cmdBuffer)
9621 {
9622     MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS  miEnhancedConditionalBatchBufferEndParams;
9623 
9624     MOS_ZeroMemory(
9625         &miEnhancedConditionalBatchBufferEndParams,
9626         sizeof(MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
9627 
9628     miEnhancedConditionalBatchBufferEndParams.presSemaphoreBuffer = &m_resHucErrorStatusBuffer;
9629 
9630     miEnhancedConditionalBatchBufferEndParams.dwParamsType = MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS::ENHANCED_PARAMS;
9631     miEnhancedConditionalBatchBufferEndParams.enableEndCurrentBatchBuffLevel = false;
9632     miEnhancedConditionalBatchBufferEndParams.compareOperation = MAD_EQUAL_IDD;
9633 
9634     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
9635         cmdBuffer,
9636         (PMHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS)(&miEnhancedConditionalBatchBufferEndParams)));
9637 
9638     return MOS_STATUS_SUCCESS;
9639 }
9640 
9641