1 /*
2 * Copyright (c) 2017-2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_vdenc_hevc_g12.cpp
24 //! \brief HEVC VDEnc encoder for GEN12.
25 //!
26
27 #include "codechal_vdenc_hevc_g12.h"
28 #include "codechal_kernel_header_g12.h"
29 #include "codeckrnheader.h"
30 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
31 #include "igcodeckrn_g12.h"
32 #endif
33 #include "mhw_vdbox_g12_X.h"
34 #include "mhw_vdbox_hcp_g12_X.h"
35 #include "mhw_vdbox_vdenc_g12_X.h"
36 #include "mhw_mi_g12_X.h"
37 #include "mhw_render_g12_X.h"
38 #include "codechal_mmc_encode_hevc_g12.h"
39 #include "mhw_mmio_g12.h"
40 #include "hal_oca_interface.h"
41 #ifdef _ENCODE_VDENC_RESERVED
42 #include "codechal_debug_encode_brc.h"
43 #endif
44 const uint32_t CodechalVdencHevcStateG12::m_VdboxVDENCRegBase[4] = M_VDBOX_VDENC_REG_BASE;
45
46 const double CodechalVdencHevcStateG12::m_devThreshIFPNEG[] = {
47 0.80, 0.60, 0.34, 0.2,
48 };
49
50 const double CodechalVdencHevcStateG12::m_devThreshIFPPOS[] = {
51 0.2, 0.4 , 0.66, 0.9,
52 };
53
54 const double CodechalVdencHevcStateG12::m_devThreshPBFPNEG[] = {
55 0.90, 0.66, 0.46, 0.3,
56 };
57
58 const double CodechalVdencHevcStateG12::m_devThreshPBFPPOS[] = {
59 0.3, 0.46, 0.70, 0.90,
60 };
61
62 const double CodechalVdencHevcStateG12::m_devThreshVBRNEG[] = {
63 0.90, 0.70, 0.50, 0.3,
64 };
65
66 const double CodechalVdencHevcStateG12::m_devThreshVBRPOS[] = {
67 0.4, 0.5, 0.75, 0.90,
68 };
69
70 const int8_t CodechalVdencHevcStateG12::m_lowdelayDevThreshPB[] = {
71 -45, -33, -23, -15, -8, 0, 15, 25,
72 };
73 const int8_t CodechalVdencHevcStateG12::m_lowdelayDevThreshVBR[] = {
74 -45, -35, -25, -15, -8, 0, 20, 40,
75 };
76 const int8_t CodechalVdencHevcStateG12::m_lowdelayDevThreshI[] = {
77 -40, -30, -17, -10, -5, 0, 10, 20,
78 };
79
80 const int8_t CodechalVdencHevcStateG12::m_lowdelayDeltaFrmszI[][8] = {
81 { 0, 0, -8, -12, -16, -20, -28, -36 },
82 { 0, 0, -4, -8, -12, -16, -24, -32 },
83 { 4, 2, 0, -1, -3, -8, -16, -24 },
84 { 8, 4, 2, 0, -1, -4, -8, -16 },
85 { 20, 16, 4, 0, -1, -4, -8, -16 },
86 { 24, 20, 16, 8, 4, 0, -4, -8 },
87 { 28, 24, 20, 16, 8, 4, 0, -8 },
88 { 32, 24, 20, 16, 8, 4, 0, -4 },
89 { 64, 48, 28, 20, 16, 12, 8, 4 },
90 };
91
92 const int8_t CodechalVdencHevcStateG12::m_lowdelayDeltaFrmszP[][8] = {
93 { -8, -24, -32, -40, -44, -48, -52, -80 },
94 { -8, -16, -32, -40, -40, -44, -44, -56 },
95 { 0, 0, -12, -20, -24, -28, -32, -36 },
96 { 8, 4, 0, 0, -8, -16, -24, -32 },
97 { 32, 16, 8, 4, -4, -8, -16, -20 },
98 { 36, 24, 16, 8, 4, -2, -4, -8 },
99 { 40, 36, 24, 20, 16, 8, 0, -8 },
100 { 48, 40, 28, 24, 20, 12, 0, -4 },
101 { 64, 48, 28, 20, 16, 12, 8, 4 },
102 };
103
104 const int8_t CodechalVdencHevcStateG12::m_lowdelayDeltaFrmszB[][8] = {
105 { 0, -4, -8, -16, -24, -32, -40, -48 },
106 { 1, 0, -4, -8, -16, -24, -32, -40 },
107 { 4, 2, 0, -1, -3, -8, -16, -24 },
108 { 8, 4, 2, 0, -1, -4, -8, -16 },
109 { 20, 16, 4, 0, -1, -4, -8, -16 },
110 { 24, 20, 16, 8, 4, 0, -4, -8 },
111 { 28, 24, 20, 16, 8, 4, 0, -8 },
112 { 32, 24, 20, 16, 8, 4, 0, -4 },
113 { 64, 48, 28, 20, 16, 12, 8, 4 },
114 };
115
116 const uint32_t CodechalVdencHevcStateG12::m_hucConstantData[] = {
117 0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x012c012c, 0x012c012c, 0x012c012c,
118 0x012c012c, 0x012c012c, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00640064,
119 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064,
120 0x00640064, 0x00640064, 0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x01900190, 0x012c012c,
121 0x012c012c, 0x012c012c, 0x012c012c, 0x012c012c, 0x00c800c8, 0x00c800c8, 0x00c800c8, 0x00c800c8,
122 0x00c800c8, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x00640064,
123 0x00640064, 0x00640064, 0x00640064, 0x00640064, 0x503c1e04, 0xffc88c78, 0x3c1e0400, 0xc88c7850,
124 0x140200ff, 0xa0824628, 0x0000ffc8, 0x00000000, 0x04030302, 0x00000000, 0x03030200, 0x0000ff04,
125 0x02020000, 0xffff0303, 0x01000000, 0xff020202, 0x0000ffff, 0x02020100, 0x00fffffe, 0x01010000,
126 0xfffffe02, 0x010000ff, 0xfefe0201, 0x0000ffff, 0xfe010100, 0x00fffffe, 0x01010000, 0x00000000,
127 0x03030200, 0x00000004, 0x03020000, 0x00ff0403, 0x02000000, 0xff030302, 0x000000ff, 0x02020201,
128 0x00ffffff, 0x02010000, 0xfffffe02, 0x01000000, 0xfffe0201, 0x0000ffff, 0xfe020101, 0x00fffffe,
129 0x01010000, 0xfffffefe, 0x01000000, 0x00000001, 0x03020000, 0x00000403, 0x02000000, 0xff040303,
130 0x00000000, 0x03030202, 0x0000ffff, 0x02020100, 0xffffff02, 0x01000000, 0xfffe0202, 0x000000ff,
131 0xfe020101, 0x00ffffff, 0x02010100, 0xfffffefe, 0x01000000, 0xfffefe01, 0x000000ff, 0xe0e00101,
132 0xc0d0d0d0, 0xe0e0b0c0, 0xd0d0d0e0, 0xf0f0c0d0, 0xd0e0e0e0, 0x0408d0d0, 0xe8f0f800, 0x1820dce0,
133 0xf8fc0210, 0x2024ecf0, 0x0008101c, 0x2428f8fc, 0x08101418, 0x2830f800, 0x0c14181c, 0x3040fc00,
134 0x0c10141c, 0xe8f80408, 0xc8d0d4e0, 0xf0f8b0c0, 0xccd4d8e0, 0x0000c0c8, 0xd8dce4f0, 0x0408d0d4,
135 0xf0f80000, 0x0808dce8, 0xf0f80004, 0x0810dce8, 0x00080808, 0x0810f8fc, 0x08080808, 0x1010f800,
136 0x08080808, 0x1020fc00, 0x08080810, 0xfc000408, 0xe0e8f0f8, 0x0001d0d8, 0xe8f0f8fc, 0x0204d8e0,
137 0xf8fdff00, 0x0408e8f0, 0xfcff0002, 0x1014f0f8, 0xfcff0004, 0x1418f0f8, 0x00040810, 0x181cf8fc,
138 0x04081014, 0x1820f800, 0x04081014, 0x3040fc00, 0x0c10141c, 0x40300408, 0x80706050, 0x30a0a090,
139 0x70605040, 0xa0a09080, 0x60504030, 0xa0908070, 0x040201a0, 0x18141008, 0x02012420, 0x0a080604,
140 0x01101010, 0x0c080402, 0x10101010, 0x05030201, 0x02010106, 0x00000503, 0xff030201, 0x02010000,
141 0x000000ff, 0xfffefe01, 0xfdfd0100, 0xfb00ffff, 0xfffffefd, 0xfefdfbfa, 0x030201ff, 0x01010605,
142 0x00050302, 0x03020101, 0x010000ff, 0x0000ff02, 0xffff0100, 0xfe0100ff, 0x00ffffff, 0xfffffefc,
143 0xfefcfb00, 0x0101ffff, 0x01050402, 0x04020101, 0x01010000, 0x0000ff02, 0x00ff0101, 0xff000000,
144 0x0100ffff, 0xfffffffe, 0xfffefd00, 0xfcfb00ff, 0x1efffffe, 0x070d0e10, 0x00003207, 0x00000000,
145 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
146 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
147 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
148 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
149 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
150 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
151 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
152 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
153 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
154 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
155 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
156 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
157 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
158 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
159 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
160 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
161 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
162 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
163 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
164 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
165 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
166 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
167 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
168 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
169 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
170 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
171 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
172 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
173 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
174 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
175 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
176 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
177 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
178 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
179 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
180 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
181 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
182 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
183 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000,
184 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10,
185 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207,
186 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000,
187 0x00000000, 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000,
188 0x00000000, 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000,
189 0x00000000, 0x1e000000, 0x070d0e10, 0x00003207, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
190 0x00000000, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
191 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
192 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
193 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000,
194 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff,
195 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff,
196 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000,
197 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff,
198 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff,
199 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
200 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
201 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
202 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000,
203 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff,
204 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff,
205 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000,
206 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff,
207 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff,
208 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
209 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
210 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
211 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000,
212 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff,
213 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff,
214 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000,
215 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff,
216 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff,
217 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
218 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
219 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
220 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000,
221 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff,
222 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff,
223 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000,
224 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff,
225 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff, 0x0000ffff,
226 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff, 0xffffffff,
227 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff, 0xffffffff,
228 0xffffffff, 0x0000ffff, 0xffffffff, 0xffff0000, 0xffffffff, 0xffffffff, 0xffff0000, 0x0000ffff,
229 0xffffffff, 0xffffffff, 0x0000ffff, 0xffffffff
230 };
231
GetMaxBtCount()232 uint32_t CodechalVdencHevcStateG12::GetMaxBtCount()
233 {
234 CODECHAL_ENCODE_FUNCTION_ENTER;
235
236 uint32_t maxBtCount = 0;
237
238 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
239 auto btIdxAlignment = m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
240
241 // DsConversion kernel
242 maxBtCount = (m_32xMeSupported ? 3 : 2) * (MOS_ALIGN_CEIL(m_cscDsState->GetBTCount(), btIdxAlignment));
243
244 // add ME and stream-in kernel
245 if(m_b16XMeEnabled)
246 {
247 MHW_KERNEL_STATE kernelState = m_lowDelay ? m_vdencMeKernelState : m_vdencMeKernelStateRAB;
248 if(m_b32XMeEnabled)
249 {
250 maxBtCount += MOS_ALIGN_CEIL(kernelState.KernelParams.iBTCount, btIdxAlignment);
251 }
252 maxBtCount += MOS_ALIGN_CEIL(kernelState.KernelParams.iBTCount, btIdxAlignment);
253
254 kernelState = m_lowDelay ? m_vdencStreaminKernelState : m_vdencStreaminKernelStateRAB;
255 maxBtCount += MOS_ALIGN_CEIL(kernelState.KernelParams.iBTCount, btIdxAlignment);
256 }
257 #endif
258
259 return maxBtCount;
260 }
261
InitKernelStateMe()262 MOS_STATUS CodechalVdencHevcStateG12::InitKernelStateMe()
263 {
264 CODECHAL_ENCODE_FUNCTION_ENTER;
265
266 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
267
268 CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface);
269 CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface->pStateHeapInterface);
270
271 uint32_t kernelSize = m_combinedKernelSize;
272 CODECHAL_KERNEL_HEADER currKrnHeader;
273 CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
274 m_kernelBinary,
275 VDENC_ME_P,
276 0,
277 &currKrnHeader,
278 &kernelSize));
279
280 auto kernelStatePtr = &m_vdencMeKernelState;
281 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
282 VDENC_ME_P,
283 &kernelStatePtr->KernelParams));
284
285 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
286 VDENC_ME_P,
287 &m_vdencMeKernelBindingTable));
288
289 kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
290 kernelStatePtr->KernelParams.pBinary =
291 m_kernelBinary +
292 (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
293 kernelStatePtr->KernelParams.iSize = kernelSize;
294
295 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
296 m_stateHeapInterface,
297 kernelStatePtr->KernelParams.iBTCount,
298 &kernelStatePtr->dwSshSize,
299 &kernelStatePtr->dwBindingTableSize));
300
301 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
302
303 CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
304 m_kernelBinary,
305 VDENC_ME_B,
306 0,
307 &currKrnHeader,
308 &kernelSize));
309
310 kernelStatePtr = &m_vdencMeKernelStateRAB;
311 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
312 VDENC_ME_B,
313 &kernelStatePtr->KernelParams));
314
315 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
316 VDENC_ME_B,
317 &m_vdencStreaminKernelBindingTable));
318
319 kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
320 kernelStatePtr->KernelParams.pBinary =
321 m_kernelBinary +
322 (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
323 kernelStatePtr->KernelParams.iSize = kernelSize;
324
325 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
326 m_stateHeapInterface,
327 kernelStatePtr->KernelParams.iBTCount,
328 &kernelStatePtr->dwSshSize,
329 &kernelStatePtr->dwBindingTableSize));
330
331 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
332
333 return eStatus;
334 }
335
InitKernelStateStreamIn()336 MOS_STATUS CodechalVdencHevcStateG12::InitKernelStateStreamIn()
337 {
338 CODECHAL_ENCODE_FUNCTION_ENTER;
339 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
340
341 CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface);
342 CODECHAL_ENCODE_CHK_NULL_RETURN(m_stateHeapInterface->pStateHeapInterface);
343
344 uint32_t kernelSize = m_combinedKernelSize;
345 CODECHAL_KERNEL_HEADER currKrnHeader;
346 CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
347 m_kernelBinary,
348 VDENC_STREAMIN_HEVC,
349 0,
350 &currKrnHeader,
351 &kernelSize));
352
353 auto kernelStatePtr = &m_vdencStreaminKernelState;
354 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
355 VDENC_STREAMIN_HEVC,
356 &kernelStatePtr->KernelParams));
357
358 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
359 VDENC_STREAMIN_HEVC,
360 &m_vdencStreaminKernelBindingTable));
361
362 kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
363 kernelStatePtr->KernelParams.pBinary =
364 m_kernelBinary +
365 (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
366 kernelStatePtr->KernelParams.iSize = kernelSize;
367
368 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
369 m_stateHeapInterface,
370 kernelStatePtr->KernelParams.iBTCount,
371 &kernelStatePtr->dwSshSize,
372 &kernelStatePtr->dwBindingTableSize));
373
374 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
375
376 CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
377 m_kernelBinary,
378 VDENC_STREAMIN_HEVC_RAB,
379 0,
380 &currKrnHeader,
381 &kernelSize));
382
383 kernelStatePtr = &m_vdencStreaminKernelStateRAB;
384 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
385 VDENC_STREAMIN_HEVC_RAB,
386 &kernelStatePtr->KernelParams));
387
388 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
389 VDENC_STREAMIN_HEVC_RAB,
390 &m_vdencStreaminKernelBindingTable));
391
392 kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
393 kernelStatePtr->KernelParams.pBinary =
394 m_kernelBinary +
395 (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
396 kernelStatePtr->KernelParams.iSize = kernelSize;
397
398 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
399 m_stateHeapInterface,
400 kernelStatePtr->KernelParams.iBTCount,
401 &kernelStatePtr->dwSshSize,
402 &kernelStatePtr->dwBindingTableSize));
403
404 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
405
406 return eStatus;
407 }
408
InitKernelState()409 MOS_STATUS CodechalVdencHevcStateG12::InitKernelState()
410 {
411 CODECHAL_ENCODE_FUNCTION_ENTER;
412
413 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
414
415 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
416 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMe());
417 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateStreamIn());
418 #endif
419
420 return eStatus;
421 }
422
DecideEncodingPipeNumber()423 MOS_STATUS CodechalVdencHevcStateG12::DecideEncodingPipeNumber()
424 {
425 CODECHAL_ENCODE_FUNCTION_ENTER;
426
427 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
428
429 m_numPipePre = m_numPipe;
430 m_numPipe = m_numVdbox;
431
432 uint8_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
433
434 CODECHAL_ENCODE_VERBOSEMESSAGE("Tile Columns = %d.", numTileColumns);
435
436 if (numTileColumns > m_numPipe)
437 {
438 // Streaming buffer does does work if numTileColumns > m_numPipe
439 if (m_hevcSeqParams->EnableStreamingBufferLLC || m_hevcSeqParams->EnableStreamingBufferDDR)
440 {
441 CODECHAL_ENCODE_ASSERTMESSAGE("Streaming buffer does does work if numTileColumns > m_numPipe!");
442 return MOS_STATUS_INVALID_PARAMETER;
443 }
444 m_numPipe = 1;
445 }
446
447 if (numTileColumns < m_numPipe)
448 {
449 if (numTileColumns >= 1 && numTileColumns <= 4)
450 {
451 m_numPipe = numTileColumns;
452 }
453 else
454 {
455 m_numPipe = 1; // invalid tile column test cases and switch back to the single VDBOX mode
456 }
457 }
458
459 // Tile replay needs scalability enabled, Remove Resolution check for scalability
460
461 m_useVirtualEngine = true; // always use virtual engine interface for single pipe and scalability mode
462
463 m_numUsedVdbox = m_numPipe;
464 m_numberTilesInFrame = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1);
465
466 if (m_scalabilityState)
467 {
468 // Create/ re-use a GPU context with 2 pipes
469 m_scalabilityState->ucScalablePipeNum = m_numPipe;
470 }
471
472 CODECHAL_ENCODE_VERBOSEMESSAGE("System VDBOX number = %d, decided pipe num = %d.", m_numVdbox, m_numPipe);
473
474 return eStatus;
475 }
476
CheckSupportedFormat(PMOS_SURFACE surface)477 bool CodechalVdencHevcStateG12::CheckSupportedFormat(PMOS_SURFACE surface)
478 {
479 CODECHAL_ENCODE_FUNCTION_ENTER;
480
481 bool isColorFormatSupported = false;
482
483 if (nullptr == surface)
484 {
485 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid (nullptr) Pointer.");
486 return isColorFormatSupported;
487 }
488
489 switch (surface->Format)
490 {
491 case Format_NV12:
492 case Format_NV21:
493 case Format_P010: // Planar 4:2:0
494 case Format_YUY2:
495 case Format_YUYV:
496 case Format_YVYU:
497 case Format_UYVY:
498 case Format_VYUY:
499 case Format_A8R8G8B8:
500 case Format_A8B8G8R8:
501 case Format_R10G10B10A2:// Packed RGB 4:4:4
502 case Format_B10G10R10A2:// Packed RGB 4:4:4
503 case Format_AYUV:
504 case Format_Y410: // Packed 4:4:4
505 isColorFormatSupported = true;
506 break;
507 case Format_Y210: // Packed 4:2:2
508 if (MEDIA_IS_WA(m_waTable, WaHEVCVDEncY210LinearInputNotSupported))
509 {
510 isColorFormatSupported = surface->TileType == MOS_TILE_Y;
511 }
512 else
513 {
514 isColorFormatSupported = true;
515 }
516 break;
517 default:
518 CODECHAL_ENCODE_ASSERTMESSAGE("Input surface color format = %d not supported!", surface->Format);
519 break;
520 }
521
522 return isColorFormatSupported;
523 }
524
PlatformCapabilityCheck()525 MOS_STATUS CodechalVdencHevcStateG12::PlatformCapabilityCheck()
526 {
527 CODECHAL_ENCODE_FUNCTION_ENTER;
528
529 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
530
531 CODECHAL_ENCODE_CHK_STATUS_RETURN(DecideEncodingPipeNumber());
532
533 if (MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
534 {
535 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ChkGpuCtxReCreation(this, m_scalabilityState,
536 (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
537 }
538
539 if (m_frameWidth * m_frameHeight > ENCODE_HEVC_MAX_16K_PIC_WIDTH * ENCODE_HEVC_MAX_16K_PIC_HEIGHT)
540 {
541 eStatus = MOS_STATUS_INVALID_PARAMETER;
542 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Frame resolution greater than 16k not supported");
543 }
544
545 if (m_hevcSeqParams->SliceSizeControl && m_frameWidth * m_frameHeight < ENCODE_HEVC_MIN_DSS_PIC_WIDTH * ENCODE_HEVC_MIN_DSS_PIC_HEIGHT)
546 {
547 eStatus = MOS_STATUS_INVALID_PARAMETER;
548 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "DSS is not supported when frame resolution less than 320p");
549 }
550
551 if (m_hevcSeqParams->ParallelBRC)
552 {
553 eStatus = MOS_STATUS_INVALID_PARAMETER;
554 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Parallel BRC is not supported on VDENC");
555 }
556
557 if (m_hevcSeqParams->bit_depth_luma_minus8 >= 4 || m_hevcSeqParams->bit_depth_chroma_minus8 >= 4)
558 {
559 eStatus = MOS_STATUS_INVALID_PARAMETER;
560 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "12bit encoding is not supported on VDENC");
561 }
562
563 if (m_hevcSeqParams->chroma_format_idc == 2)
564 {
565 eStatus = MOS_STATUS_INVALID_PARAMETER;
566 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "422 recon format encoding is not supported on HEVC VDENC");
567 }
568
569 // TU configuration for RDOQ
570 if (m_hevcRdoqEnabled)
571 {
572 m_hevcRdoqEnabled = (m_hevcSeqParams->TargetUsage < 7);
573 }
574
575 // set RDOQ Intra blocks Threshold for Gen11+
576 m_rdoqIntraTuThreshold = 0;
577 if (m_hevcRdoqEnabled)
578 {
579 if (1 == m_hevcSeqParams->TargetUsage)
580 {
581 m_rdoqIntraTuThreshold = 0xffff;
582 }
583 else if (4 == m_hevcSeqParams->TargetUsage)
584 {
585 m_rdoqIntraTuThreshold = m_picWidthInMb * m_picHeightInMb;
586 m_rdoqIntraTuThreshold = MOS_MIN(m_rdoqIntraTuThreshold / 10, 0xffff);
587 }
588 }
589
590 if (m_enableSCC && m_hevcPicParams->pps_curr_pic_ref_enabled_flag)
591 {
592 if (m_hevcPicParams->tiles_enabled_flag)
593 {
594 for (auto i = 0; i < m_hevcPicParams->num_tile_columns_minus1 + 1; i++)
595 {
596 if (m_hevcPicParams->tile_column_width[i] < 5)
597 {
598 CODECHAL_ENCODE_ASSERTMESSAGE("SCC IBC mode can't support tile width < 5 LCU");
599 return MOS_STATUS_PLATFORM_NOT_SUPPORTED;
600 }
601 }
602 }
603 else
604 {
605 if (MOS_ROUNDUP_DIVIDE(m_frameWidth, MAX_LCU_SIZE) < 5)
606 {
607 CODECHAL_ENCODE_ASSERTMESSAGE("in tiling disabled case, SCC IBC mode can't support picture width < 5 LCU");
608 return MOS_STATUS_PLATFORM_NOT_SUPPORTED;
609 }
610 }
611 }
612
613 return eStatus;
614 }
615
~CodechalVdencHevcStateG12()616 CodechalVdencHevcStateG12::~CodechalVdencHevcStateG12()
617 {
618 CODECHAL_ENCODE_FUNCTION_ENTER;
619
620 if (m_scalabilityState)
621 {
622 MOS_FreeMemAndSetNull(m_scalabilityState);
623 }
624 //Note: virtual engine interface destroy is done in MOS layer
625
626 CODECHAL_DEBUG_TOOL(
627 MOS_Delete(m_encodeParState);
628 )
629 #ifdef _ENCODE_VDENC_RESERVED
630 if (m_rsvdState)
631 {
632 MOS_Delete(m_rsvdState);
633 m_rsvdState = nullptr;
634 }
635 #endif
636 if(m_gpuCtxCreatOpt)
637 {
638 MOS_Delete(m_gpuCtxCreatOpt);
639 m_gpuCtxCreatOpt = nullptr;
640 }
641 return;
642 }
643
AllocatePakResources()644 MOS_STATUS CodechalVdencHevcStateG12::AllocatePakResources()
645 {
646 CODECHAL_ENCODE_FUNCTION_ENTER;
647
648 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
649
650 uint32_t mvt_size = MOS_ALIGN_CEIL(((m_frameWidth + 63) >> 6)*((m_frameHeight + 15) >> 4), 2) * CODECHAL_CACHELINE_SIZE;
651 uint32_t mvtb_size = MOS_ALIGN_CEIL(((m_frameWidth + 31) >> 5)*((m_frameHeight + 31) >> 5), 2) * CODECHAL_CACHELINE_SIZE;
652 m_sizeOfMvTemporalBuffer = MOS_MAX(mvt_size, mvtb_size);
653
654 const uint32_t picWidthInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_LCU_SIZE); //assume smallest LCU to get max width
655 const uint32_t picHeightInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameHeight, CODECHAL_HEVC_MIN_LCU_SIZE); //assume smallest LCU to get max height
656
657 MHW_VDBOX_HCP_BUFFER_SIZE_PARAMS hcpBufSizeParam;
658 MOS_ZeroMemory(&hcpBufSizeParam, sizeof(hcpBufSizeParam));
659 hcpBufSizeParam.ucMaxBitDepth = m_bitDepth;
660 hcpBufSizeParam.ucChromaFormat = m_chromaFormat;
661 // We should move the buffer allocation to picture level if the size is dependent on LCU size
662 hcpBufSizeParam.dwCtbLog2SizeY = 6; //assume Max LCU size
663 hcpBufSizeParam.dwPicWidth = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE);
664 hcpBufSizeParam.dwPicHeight = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE);
665
666 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
667 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
668 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
669 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
670 allocParamsForBufferLinear.Format = Format_Buffer;
671
672 // Deblocking Filter Row Store Scratch data surface
673 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
674 MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_LINE,
675 &hcpBufSizeParam);
676
677 if (eStatus != MOS_STATUS_SUCCESS)
678 {
679 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Row Store Scratch Buffer.");
680 return eStatus;
681 }
682
683 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
684 allocParamsForBufferLinear.pBufName = "DeblockingScratchBuffer";
685
686 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
687 m_osInterface,
688 &allocParamsForBufferLinear,
689 &m_resDeblockingFilterRowStoreScratchBuffer);
690
691 if (eStatus != MOS_STATUS_SUCCESS)
692 {
693 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Row Store Scratch Buffer.");
694 return eStatus;
695 }
696
697 // Deblocking Filter Tile Row Store Scratch data surface
698 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
699 MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_LINE,
700 &hcpBufSizeParam);
701
702 if (eStatus != MOS_STATUS_SUCCESS)
703 {
704 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Row Store Scratch Buffer.");
705 return eStatus;
706 }
707
708 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
709 allocParamsForBufferLinear.pBufName = "DeblockingTileRowScratchBuffer";
710
711 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
712 m_osInterface,
713 &allocParamsForBufferLinear,
714 &m_resDeblockingFilterTileRowStoreScratchBuffer);
715
716 if (eStatus != MOS_STATUS_SUCCESS)
717 {
718 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Row Store Scratch Buffer.");
719 return eStatus;
720 }
721
722 // Deblocking Filter Column Row Store Scratch data surface
723 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
724 MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_COL,
725 &hcpBufSizeParam);
726
727 if (eStatus != MOS_STATUS_SUCCESS)
728 {
729 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Column Store Scratch Buffer.");
730 return eStatus;
731 }
732
733 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
734 allocParamsForBufferLinear.pBufName = "DeblockingColumnScratchBuffer";
735
736 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
737 m_osInterface,
738 &allocParamsForBufferLinear,
739 &m_resDeblockingFilterColumnRowStoreScratchBuffer);
740
741 if (eStatus != MOS_STATUS_SUCCESS)
742 {
743 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Column Row Store Scratch Buffer.");
744 return eStatus;
745 }
746
747 // Metadata Line buffer
748 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
749 MHW_VDBOX_HCP_INTERNAL_BUFFER_META_LINE,
750 &hcpBufSizeParam);
751
752 if (eStatus != MOS_STATUS_SUCCESS)
753 {
754 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Line Buffer.");
755 return eStatus;
756 }
757
758 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
759 allocParamsForBufferLinear.pBufName = "MetadataLineBuffer";
760
761 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
762 m_osInterface,
763 &allocParamsForBufferLinear,
764 &m_resMetadataLineBuffer);
765
766 if (eStatus != MOS_STATUS_SUCCESS)
767 {
768 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Line Buffer.");
769 return eStatus;
770 }
771
772 // Metadata Tile Line buffer
773 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
774 MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_LINE,
775 &hcpBufSizeParam);
776
777 if (eStatus != MOS_STATUS_SUCCESS)
778 {
779 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Line Buffer.");
780 return eStatus;
781 }
782
783 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
784 allocParamsForBufferLinear.pBufName = "MetadataTileLineBuffer";
785
786 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
787 m_osInterface,
788 &allocParamsForBufferLinear,
789 &m_resMetadataTileLineBuffer);
790
791 if (eStatus != MOS_STATUS_SUCCESS)
792 {
793 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Line Buffer.");
794 return eStatus;
795 }
796
797 // Metadata Tile Column buffer
798 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
799 MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_COL,
800 &hcpBufSizeParam);
801
802 if (eStatus != MOS_STATUS_SUCCESS)
803 {
804 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Column Buffer.");
805 return eStatus;
806 }
807
808 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
809 allocParamsForBufferLinear.pBufName = "MetadataTileColumnBuffer";
810
811 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
812 m_osInterface,
813 &allocParamsForBufferLinear,
814 &m_resMetadataTileColumnBuffer);
815
816 if (eStatus != MOS_STATUS_SUCCESS)
817 {
818 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Column Buffer.");
819 return eStatus;
820 }
821
822 // SAO Line buffer
823 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
824 MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_LINE,
825 &hcpBufSizeParam);
826
827 if (eStatus != MOS_STATUS_SUCCESS)
828 {
829 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Line Buffer.");
830 return eStatus;
831 }
832
833 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
834 allocParamsForBufferLinear.pBufName = "SaoLineBuffer";
835
836 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
837 m_osInterface,
838 &allocParamsForBufferLinear,
839 &m_resSaoLineBuffer);
840
841 if (eStatus != MOS_STATUS_SUCCESS)
842 {
843 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Line Buffer.");
844 return eStatus;
845 }
846
847 // SAO Tile Line buffer
848 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
849 MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_LINE,
850 &hcpBufSizeParam);
851
852 if (eStatus != MOS_STATUS_SUCCESS)
853 {
854 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Line Buffer.");
855 return eStatus;
856 }
857
858 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
859 allocParamsForBufferLinear.pBufName = "SaoTileLineBuffer";
860
861 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
862 m_osInterface,
863 &allocParamsForBufferLinear,
864 &m_resSaoTileLineBuffer);
865
866 if (eStatus != MOS_STATUS_SUCCESS)
867 {
868 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Line Buffer.");
869 return eStatus;
870 }
871
872 // SAO Tile Column buffer
873 eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize(
874 MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_COL,
875 &hcpBufSizeParam);
876
877 if (eStatus != MOS_STATUS_SUCCESS)
878 {
879 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Column Buffer.");
880 return eStatus;
881 }
882
883 allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize;
884 allocParamsForBufferLinear.pBufName = "SaoTileColumnBuffer";
885
886 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
887 m_osInterface,
888 &allocParamsForBufferLinear,
889 &m_resSaoTileColumnBuffer);
890
891 if (eStatus != MOS_STATUS_SUCCESS)
892 {
893 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Column Buffer.");
894 return eStatus;
895 }
896
897 // Lcu ILDB StreamOut buffer
898 allocParamsForBufferLinear.dwBytes = CODECHAL_CACHELINE_SIZE;
899 allocParamsForBufferLinear.pBufName = "LcuILDBStreamOutBuffer";
900
901 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
902 m_osInterface,
903 &allocParamsForBufferLinear,
904 &m_resLcuIldbStreamOutBuffer);
905
906 if (eStatus != MOS_STATUS_SUCCESS)
907 {
908 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU ILDB StreamOut Buffer.");
909 return eStatus;
910 }
911
912 // Lcu Base Address buffer
913 // HEVC Encoder Mode: Slice size is written to this buffer when slice size conformance is enabled.
914 // 1 CL (= 16 DWs = 64 bytes) per slice * Maximum number of slices in a frame.
915 // Align to page for HUC requirement
916 uint32_t maxLcu = picWidthInMinLCU * picHeightInMinLCU;
917 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(maxLcu * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
918 allocParamsForBufferLinear.pBufName = "LcuBaseAddressBuffer";
919
920 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
921 m_osInterface,
922 &allocParamsForBufferLinear,
923 &m_resLcuBaseAddressBuffer);
924
925 if (eStatus != MOS_STATUS_SUCCESS)
926 {
927 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU Base Address Buffer.");
928 return eStatus;
929 }
930
931 // SAO Row Store buffer
932 // Aligned to 4 for each tile column
933 uint32_t maxTileColumn = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE);
934 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(picWidthInMinLCU + 3 * maxTileColumn, 4) * 16;
935 allocParamsForBufferLinear.pBufName = "SaoRowStoreBuffer";
936
937 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
938 m_osInterface,
939 &allocParamsForBufferLinear,
940 &m_vdencSAORowStoreBuffer);
941
942 if (eStatus != MOS_STATUS_SUCCESS)
943 {
944 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO row store Buffer.");
945 return eStatus;
946 }
947
948 // SAO StreamOut buffer
949 uint32_t size = MOS_ALIGN_CEIL(picWidthInMinLCU, 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU;
950 //extra added size to cover tile enabled case, per tile width aligned to 4. 20: max tile column No.
951 size += 3 * 20 * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU;
952 allocParamsForBufferLinear.dwBytes = size;
953 allocParamsForBufferLinear.pBufName = "SaoStreamOutBuffer";
954
955 eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
956 m_osInterface,
957 &allocParamsForBufferLinear,
958 &m_resSaoStreamOutBuffer);
959
960 if (eStatus != MOS_STATUS_SUCCESS)
961 {
962 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO StreamOut Buffer.");
963 return eStatus;
964 }
965
966 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
967 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
968 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
969 allocParamsForBufferLinear.Format = Format_Buffer;
970
971 // Allocate Frame Statistics Streamout Data Destination Buffer. DW98-100 in HCP PipeBufAddr command
972 size = MOS_ALIGN_CEIL(m_sizeOfHcpPakFrameStats * m_maxTileNumber, CODECHAL_PAGE_SIZE); //Each tile has 9 cache size bytes of data, Align to page is HuC requirement
973 allocParamsForBufferLinear.dwBytes = size;
974 allocParamsForBufferLinear.pBufName = "FrameStatStreamOutBuffer";
975
976 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
977 m_osInterface,
978 &allocParamsForBufferLinear,
979 &m_resFrameStatStreamOutBuffer),
980 "Failed to create VDENC FrameStatStreamOutBuffer Buffer");
981
982 // PAK Statistics buffer
983 size = MOS_ALIGN_CEIL(m_vdencBrcPakStatsBufferSize, CODECHAL_PAGE_SIZE);
984 CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
985 m_standard, size, 1, pakStats, "pakStats"));
986
987 // Slice Count buffer 1 DW = 4 Bytes
988 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
989 allocParamsForBufferLinear.pBufName = "Slice Count Buffer";
990
991 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
992 m_osInterface,
993 &allocParamsForBufferLinear,
994 &m_sliceCountBuffer),
995 "Failed to create VDENC Slice Count Buffer");
996
997 // VDEncMode Timer buffer 1 DW = 4 Bytes
998 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
999 allocParamsForBufferLinear.pBufName = "VDEncMode Timer Buffer";
1000
1001 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1002 m_osInterface,
1003 &allocParamsForBufferLinear,
1004 &m_vdencModeTimerBuffer),
1005 "Failed to create VDEncMode Timer Buffer");
1006
1007 uint32_t frameWidthInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MIN_CU_SIZE);
1008 uint32_t frameHeightInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MIN_CU_SIZE);
1009 uint32_t frameWidthInLcus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MAX_LCU_SIZE_G10);
1010 uint32_t frameHeightInLcus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MAX_LCU_SIZE_G10);
1011 uint32_t maxTileColumns = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE);
1012
1013 // PAK CU Level Streamout Data: DW57-59 in HCP pipe buffer address command
1014 // One CU has 16-byte. But, each tile needs to be aliged to the cache line
1015 size = MOS_ALIGN_CEIL(frameWidthInCus * frameHeightInCus * 16, CODECHAL_CACHELINE_SIZE);
1016 allocParamsForBufferLinear.dwBytes = size;
1017 allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
1018
1019 CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
1020 m_osInterface,
1021 &allocParamsForBufferLinear,
1022 &m_resPakcuLevelStreamoutData.sResource));
1023 m_resPakcuLevelStreamoutData.dwSize = size;
1024 CODECHAL_ENCODE_VERBOSEMESSAGE("first allocate cu steam out buffer, size=0x%x.\n", size);
1025
1026 // these 2 buffers are not used so far, but put the correct size calculation here
1027 // PAK CU Level Streamout Data: DW57-59 in HCP pipe buffer address command
1028 // One CU has 16-byte. But, each tile needs to be aliged to the cache line
1029 // size = MOS_ALIGN_CEIL(frameWidthInCus * frameHeightInCus * 16, CODECHAL_CACHELINE_SIZE);
1030
1031 // PAK Slice Level Streamut Data. DW60-DW62 in HCP pipe buffer address command
1032 // one LCU has one cache line. Use CU as LCU during creation
1033 // size = frameWidthInLcus * frameHeightInLcus * CODECHAL_CACHELINE_SIZE;
1034
1035 // Allocate SSE Source Pixel Row Store Buffer
1036 m_sizeOfSseSrcPixelRowStoreBufferPerLcu = CODECHAL_CACHELINE_SIZE * (4 + 4) << 1;
1037 allocParamsForBufferLinear.dwBytes = 2 * m_sizeOfSseSrcPixelRowStoreBufferPerLcu * (m_widthAlignedMaxLcu + 3 * maxTileColumns);
1038 allocParamsForBufferLinear.pBufName = "SseSrcPixelRowStoreBuffer";
1039
1040 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1041 m_osInterface,
1042 &allocParamsForBufferLinear,
1043 &m_resSseSrcPixelRowStoreBuffer),
1044 "Failed to create SseSrcPixelRowStoreBuffer");
1045
1046 //HCP scalability Sync buffer
1047 allocParamsForBufferLinear.dwBytes = CODECHAL_HEVC_MAX_NUM_HCP_PIPE * CODECHAL_CACHELINE_SIZE;
1048 allocParamsForBufferLinear.pBufName = "GEN11 HCP scalability Sync buffer ";
1049
1050 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1051 m_osInterface,
1052 &allocParamsForBufferLinear,
1053 &m_resHcpScalabilitySyncBuffer.sResource),
1054 "Failed to create GEN11 HCP scalability Sync Buffer");
1055
1056 // create the tile coding state parameters
1057 for (auto i = 0; i < CODECHAL_NUM_UNCOMPRESSED_SURFACE_HEVC; i++)
1058 {
1059 m_tileParams[i] = (PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12)MOS_AllocAndZeroMemory(
1060 sizeof(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12)* m_maxTileNumber);
1061 }
1062
1063 if (m_enableHWSemaphore)
1064 {
1065 // Create the HW sync objects which will be used by each reference frame and BRC in GEN11
1066 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1067 allocParamsForBufferLinear.pBufName = "SemaphoreMemory";
1068
1069 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1070 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1071 lockFlagsWriteOnly.WriteOnly = 1;
1072
1073 uint32_t* data = nullptr;
1074
1075 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_refSync); i++)
1076 {
1077 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1078 m_osInterface,
1079 &allocParamsForBufferLinear,
1080 &m_refSync[i].resSemaphoreMem.sResource),
1081 "Failed to create HW Semaphore Memory.");
1082 m_refSync[i].resSemaphoreMem.dwSize = allocParamsForBufferLinear.dwBytes;
1083
1084 CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1085 m_osInterface,
1086 &m_refSync[i].resSemaphoreMem.sResource,
1087 &lockFlagsWriteOnly));
1088
1089 *data = 1;
1090
1091 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1092 m_osInterface,
1093 &m_refSync[i].resSemaphoreMem.sResource));
1094 }
1095 }
1096
1097 // create the HW semaphore buffer to sync up between VDBOXes. This is used to WA HW internal lock issue
1098 if (m_enableVdBoxHWSemaphore)
1099 {
1100 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1101 allocParamsForBufferLinear.pBufName = "VDBOX SemaphoreMemory";
1102
1103 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1104 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1105 lockFlagsWriteOnly.WriteOnly = 1;
1106
1107 uint32_t* data = nullptr;
1108
1109 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resVdBoxSemaphoreMem); i++)
1110 {
1111 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1112 m_osInterface,
1113 &allocParamsForBufferLinear,
1114 &m_resVdBoxSemaphoreMem[i].sResource),
1115 "Failed to create VDBOX HW Semaphore Memory.");
1116
1117 CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1118 m_osInterface,
1119 &m_resVdBoxSemaphoreMem[i].sResource,
1120 &lockFlagsWriteOnly));
1121
1122 *data = 0;
1123
1124 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1125 m_osInterface,
1126 &m_resVdBoxSemaphoreMem[i].sResource));
1127 }
1128
1129 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resPipeStartSemaMem); i++)
1130 {
1131 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1132 m_osInterface,
1133 &allocParamsForBufferLinear,
1134 &m_resPipeStartSemaMem[i].sResource),
1135 "Failed to create VDBOX HW Semaphore Memory.");
1136
1137 CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1138 m_osInterface,
1139 &m_resPipeStartSemaMem[i].sResource,
1140 &lockFlagsWriteOnly));
1141
1142 *data = 0;
1143
1144 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1145 m_osInterface,
1146 &m_resPipeStartSemaMem[i].sResource));
1147 }
1148 }
1149
1150 uint32_t* data = nullptr;
1151 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1152 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1153 lockFlagsWriteOnly.WriteOnly = 1;
1154
1155 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1156 allocParamsForBufferLinear.pBufName = "BrcPakSemaphoreMemory";
1157
1158 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1159 m_osInterface,
1160 &allocParamsForBufferLinear,
1161 &m_resBrcPakSemaphoreMem.sResource),
1162 "Failed to create BRC PAK Semaphore Memory.");
1163
1164 CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1165 m_osInterface,
1166 &m_resBrcPakSemaphoreMem.sResource,
1167 &lockFlagsWriteOnly));
1168
1169 *data = 0;
1170
1171 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1172 m_osInterface,
1173 &m_resBrcPakSemaphoreMem.sResource));
1174
1175 // 3rd level batch buffer
1176 // To be moved to a more proper place later
1177 MOS_ZeroMemory(&m_thirdLevelBatchBuffer, sizeof(m_thirdLevelBatchBuffer));
1178 m_thirdLevelBatchBuffer.bSecondLevel = true;
1179 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
1180 m_osInterface,
1181 &m_thirdLevelBatchBuffer,
1182 nullptr,
1183 m_thirdLBSize));
1184
1185 if (m_enableTileStitchByHW)
1186 {
1187 if (Mos_ResourceIsNull(&m_resHucStatus2Buffer))
1188 {
1189 // HUC STATUS 2 Buffer for HuC status check in COND_BB_END
1190 allocParamsForBufferLinear.dwBytes = sizeof(uint64_t);
1191 allocParamsForBufferLinear.pBufName = "HUC STATUS 2 Buffer";
1192 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
1193 m_osInterface->pfnAllocateResource(
1194 m_osInterface,
1195 &allocParamsForBufferLinear,
1196 &m_resHucStatus2Buffer),
1197 "%s: Failed to allocate HUC STATUS 2 Buffer\n",
1198 __FUNCTION__);
1199 }
1200 uint8_t *data;
1201 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
1202 {
1203 for (auto j = 0; j < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; j++)
1204 {
1205 // HuC stitching Data buffer
1206 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucCommandDataVdencG12), CODECHAL_PAGE_SIZE);
1207 allocParamsForBufferLinear.pBufName = "HEVC HuC Stitch Data Buffer";
1208 CODECHAL_ENCODE_CHK_STATUS_RETURN(
1209 m_osInterface->pfnAllocateResource(
1210 m_osInterface,
1211 &allocParamsForBufferLinear,
1212 &m_resHucStitchDataBuffer[i][j]));
1213 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1214 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1215 lockFlagsWriteOnly.WriteOnly = 1;
1216 uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource(
1217 m_osInterface,
1218 &m_resHucStitchDataBuffer[i][j],
1219 &lockFlagsWriteOnly);
1220 CODECHAL_ENCODE_CHK_NULL_RETURN(pData);
1221 MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes);
1222 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[i][j]);
1223 }
1224 }
1225 //Second level BB for huc stitching cmd
1226 MOS_ZeroMemory(&m_HucStitchCmdBatchBuffer, sizeof(m_HucStitchCmdBatchBuffer));
1227 m_HucStitchCmdBatchBuffer.bSecondLevel = true;
1228 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
1229 m_osInterface,
1230 &m_HucStitchCmdBatchBuffer,
1231 nullptr,
1232 m_hwInterface->m_HucStitchCmdBatchBufferSize));
1233 }
1234
1235 if (m_numDelay)
1236 {
1237 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1238 allocParamsForBufferLinear.pBufName = "DelayMinusMemory";
1239
1240 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1241 m_osInterface,
1242 &allocParamsForBufferLinear,
1243 &m_resDelayMinus), "Failed to allocate delay minus memory.");
1244
1245 uint8_t* data;
1246 MOS_LOCK_PARAMS lockFlags;
1247 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1248 lockFlags.WriteOnly = 1;
1249 data = (uint8_t*)m_osInterface->pfnLockResource(
1250 m_osInterface,
1251 &m_resDelayMinus,
1252 &lockFlags);
1253
1254 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1255
1256 MOS_ZeroMemory(data, sizeof(uint32_t));
1257
1258 m_osInterface->pfnUnlockResource(m_osInterface, &m_resDelayMinus);
1259 }
1260
1261 return eStatus;
1262 }
1263
FreePakResources()1264 MOS_STATUS CodechalVdencHevcStateG12::FreePakResources()
1265 {
1266 CODECHAL_ENCODE_FUNCTION_ENTER;
1267
1268 m_osInterface->pfnFreeResource(m_osInterface, &m_resSseSrcPixelRowStoreBuffer);
1269 m_osInterface->pfnFreeResource(m_osInterface, &m_resHcpScalabilitySyncBuffer.sResource);
1270 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencSAORowStoreBuffer);
1271 m_osInterface->pfnFreeResource(m_osInterface, &m_resPakcuLevelStreamoutData.sResource);
1272 if (!Mos_ResourceIsNull(&m_resHwCountTileReplay))
1273 {
1274 m_osInterface->pfnFreeResource(m_osInterface, &m_resHwCountTileReplay);
1275 }
1276
1277 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resTileBasedStatisticsBuffer); i++)
1278 {
1279 m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[i].sResource);
1280 }
1281 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileRecordBuffer); i++)
1282 {
1283 m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[i].sResource);
1284 }
1285 m_osInterface->pfnFreeResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource);
1286
1287 m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcDataBuffer);
1288
1289 for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
1290 {
1291 for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
1292 {
1293 m_osInterface->pfnFreeResource(m_osInterface, &m_resHucPakStitchDmemBuffer[k][i]);
1294 }
1295 }
1296
1297 if (m_numDelay)
1298 {
1299 m_osInterface->pfnFreeResource(m_osInterface, &m_resDelayMinus);
1300 }
1301
1302 for (auto i = 0; i < CODECHAL_NUM_UNCOMPRESSED_SURFACE_HEVC; i++)
1303 {
1304 MOS_FreeMemory(m_tileParams[i]);
1305 }
1306
1307 // command buffer for VE, allocated in MOS_STATUS CodechalEncodeHevcBase::VerifyCommandBufferSize()
1308 for (auto i = 0; i < CODECHAL_NUM_UNCOMPRESSED_SURFACE_HEVC; i++)
1309 {
1310 for (auto j = 0; j < CODECHAL_HEVC_MAX_NUM_HCP_PIPE; j++)
1311 {
1312 for (auto k = 0; k < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; k++)
1313 {
1314 PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[i][j][k];
1315
1316 if (!Mos_ResourceIsNull(&cmdBuffer->OsResource))
1317 {
1318 if (cmdBuffer->pCmdBase)
1319 {
1320 m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
1321 }
1322 m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
1323 }
1324 }
1325 }
1326 }
1327
1328 for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_refSync); i++)
1329 {
1330 auto sync = &m_refSync[i];
1331
1332 if (!Mos_ResourceIsNull(&sync->resSyncObject))
1333 {
1334 // if this object has been signaled before, we need to wait to ensure singal-wait is in pair.
1335 if (sync->uiSemaphoreObjCount || sync->bInUsed)
1336 {
1337 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
1338 syncParams.GpuContext = m_renderContext;
1339 syncParams.presSyncResource = &sync->resSyncObject;
1340 syncParams.uiSemaphoreCount = sync->uiSemaphoreObjCount;
1341 m_osInterface->pfnEngineWait(m_osInterface, &syncParams);
1342 }
1343 }
1344 m_osInterface->pfnFreeResource(m_osInterface, &sync->resSemaphoreMem.sResource);
1345 }
1346
1347 for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resVdBoxSemaphoreMem); i++)
1348 {
1349 m_osInterface->pfnFreeResource(m_osInterface, &m_resVdBoxSemaphoreMem[i].sResource);
1350 }
1351
1352 for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resPipeStartSemaMem); i++)
1353 {
1354 m_osInterface->pfnFreeResource(m_osInterface, &m_resPipeStartSemaMem[i].sResource);
1355 }
1356
1357 if (m_enableTileStitchByHW)
1358 {
1359 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
1360 {
1361 for (auto j = 0; j < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; j++)
1362 {
1363 // HuC stitching Data buffer
1364 m_osInterface->pfnFreeResource(
1365 m_osInterface,
1366 &m_resHucStitchDataBuffer[i][j]);
1367 }
1368 }
1369 //Second level BB for huc stitching cmd
1370 Mhw_FreeBb(m_osInterface, &m_HucStitchCmdBatchBuffer, nullptr);
1371 }
1372
1373 Mhw_FreeBb(m_osInterface, &m_thirdLevelBatchBuffer, nullptr);
1374 FreeTileLevelBatch();
1375 FreeTileRowLevelBRCBatch();
1376
1377 m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcPakSemaphoreMem.sResource);
1378
1379 return CodechalVdencHevcState::FreePakResources();
1380 }
1381
AllocateEncResources()1382 MOS_STATUS CodechalVdencHevcStateG12::AllocateEncResources()
1383 {
1384 CODECHAL_ENCODE_FUNCTION_ENTER;
1385
1386 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1387
1388 //CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::AllocateEncResources());
1389
1390 if (m_hmeSupported)
1391 {
1392 HmeParams hmeParams;
1393
1394 MOS_ZeroMemory(&hmeParams, sizeof(hmeParams));
1395 hmeParams.b4xMeDistortionBufferSupported = true;
1396 hmeParams.ps16xMeMvDataBuffer = &m_s16XMeMvDataBuffer;
1397 hmeParams.ps32xMeMvDataBuffer = &m_s32XMeMvDataBuffer;
1398 hmeParams.ps4xMeDistortionBuffer = &m_s4XMeDistortionBuffer;
1399 hmeParams.ps4xMeMvDataBuffer = &m_s4XMeMvDataBuffer;
1400 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources4xME(&hmeParams));
1401 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources16xME(&hmeParams));
1402 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources32xME(&hmeParams));
1403 }
1404
1405 // VDENC tile row store buffer
1406 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
1407 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1408 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
1409 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1410 allocParamsForBufferLinear.Format = Format_Buffer;
1411 allocParamsForBufferLinear.dwBytes = MOS_ROUNDUP_DIVIDE(m_frameWidth, 32) * CODECHAL_CACHELINE_SIZE * 2;
1412 allocParamsForBufferLinear.pBufName = "VDENC Tile Row Store Buffer";
1413
1414 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1415 m_osInterface,
1416 &allocParamsForBufferLinear,
1417 &m_vdencTileRowStoreBuffer),
1418 "Failed to allocate VDENC Tile Row Store Buffer");
1419
1420 MOS_ALLOC_GFXRES_PARAMS allocParamsForSurface;
1421 MOS_ZeroMemory(&allocParamsForSurface, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1422 allocParamsForSurface.Type = MOS_GFXRES_BUFFER;
1423 allocParamsForSurface.TileType = MOS_TILE_LINEAR;
1424 allocParamsForSurface.Format = Format_Buffer;
1425 allocParamsForSurface.dwBytes = m_numLcu * 4;
1426 allocParamsForSurface.pBufName = "VDEnc Cumulative CU Count Streamout Surface";
1427
1428 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1429 m_osInterface,
1430 &allocParamsForSurface,
1431 &m_vdencCumulativeCuCountStreamoutSurface),
1432 "Failed to allocate VDEnc Cumulative CU Count Streamout Surface");
1433
1434 // Move from CodechalVdencHevcState::AllocateEncResources()
1435
1436 // PAK stream-out buffer
1437 allocParamsForBufferLinear.dwBytes = CODECHAL_HEVC_PAK_STREAMOUT_SIZE;
1438 allocParamsForBufferLinear.pBufName = "Pak StreamOut Buffer";
1439 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1440 m_osInterface,
1441 &allocParamsForBufferLinear,
1442 &m_resStreamOutBuffer[0]),
1443 "Failed to allocate Pak Stream Out Buffer.");
1444
1445 // VDENC Intra Row Store Scratch buffer
1446 // 1 cacheline per MB
1447 // Double the size for Tile Replay
1448 uint32_t size = MOS_ROUNDUP_DIVIDE(m_frameWidth, MAX_LCU_SIZE) * CODECHAL_CACHELINE_SIZE * 2 * 2;
1449 CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
1450 m_standard, size, 1, vdencIntraRowStoreScratch, "vdencIntraRowStoreScratch"));
1451
1452 // VDENC Statistics buffer
1453 // Enabled for BRC only
1454 size = MOS_ALIGN_CEIL(m_vdencBrcStatsBufferSize * m_maxTileNumber, CODECHAL_PAGE_SIZE);
1455 CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
1456 m_standard, size, 1, vdencStats, "vdencStats"));
1457
1458 // end of CodechalVdencHevcState::AllocateEncResources()
1459
1460 if (m_enableSCC)
1461 {
1462 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1463 MOS_ALLOC_GFXRES_PARAMS allocParamsForBuffer2D;
1464 uint32_t alignedWidth, alignedHeight;
1465
1466 // Allocate the recon not filtered surface for IBC
1467 // First align to LCU size 64x64
1468 alignedWidth = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE);
1469 alignedHeight = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE);
1470
1471 MOS_ZeroMemory(&allocParamsForBuffer2D, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1472 allocParamsForBuffer2D.Type = MOS_GFXRES_2D;
1473 allocParamsForBuffer2D.TileType = MOS_TILE_Y;
1474 // default setting
1475 allocParamsForBuffer2D.Format = Format_NV12;
1476 allocParamsForBuffer2D.pBufName = "Recon not Filtered Surface";
1477 allocParamsForBuffer2D.dwWidth = alignedWidth;
1478 allocParamsForBuffer2D.dwHeight = alignedHeight;
1479
1480 // The format and size is dependent on chroma format and bit depth
1481 CODECHAL_ENCODE_ASSERT(m_bitDepth < 12);
1482
1483 if (HCP_CHROMA_FORMAT_YUV420 == m_chromaFormat)
1484 {
1485 if (10 == m_bitDepth)
1486 {
1487 if (m_mmcState && m_mmcState->IsMmcEnabled())
1488 {
1489 allocParamsForBuffer2D.dwWidth = alignedWidth * 2;
1490 }
1491 else
1492 {
1493 allocParamsForBuffer2D.Format = Format_P010;
1494 }
1495 }
1496 }
1497 else if (HCP_CHROMA_FORMAT_YUV444 == m_chromaFormat)
1498 {
1499 if (8 == m_bitDepth)
1500 {
1501 allocParamsForBuffer2D.Format = Format_AYUV;
1502 allocParamsForBuffer2D.dwWidth = alignedWidth >> 2;
1503 allocParamsForBuffer2D.dwHeight = alignedHeight * 3;
1504 }
1505 else
1506 {
1507 allocParamsForBuffer2D.Format = Format_Y410;
1508 allocParamsForBuffer2D.dwWidth = alignedWidth >> 1;
1509 allocParamsForBuffer2D.dwHeight = alignedHeight * 3;
1510 }
1511 }
1512 else
1513 {
1514 CODECHAL_ENCODE_ASSERTMESSAGE("4:2:2 is not supported for SCC feature!");
1515 eStatus = MOS_STATUS_INVALID_PARAMETER;
1516 return eStatus;
1517 }
1518
1519 if (m_mmcState && m_mmcState->IsMmcEnabled())
1520 {
1521 allocParamsForBuffer2D.bIsCompressible = true;
1522 allocParamsForBuffer2D.CompressionMode = MOS_MMC_MC;
1523 }
1524 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1525 m_osInterface,
1526 &allocParamsForBuffer2D,
1527 &m_vdencRecNotFilteredBuffer),
1528 "Failed to allocate Recon not filtered surface for IBC");
1529 }
1530
1531 return eStatus;
1532 }
1533
FreeEncResources()1534 MOS_STATUS CodechalVdencHevcStateG12::FreeEncResources()
1535 {
1536 CODECHAL_ENCODE_FUNCTION_ENTER;
1537
1538 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencTileRowStoreBuffer);
1539 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencCumulativeCuCountStreamoutSurface);
1540
1541 // Free ME resources
1542 HmeParams hmeParams;
1543
1544 MOS_ZeroMemory(&hmeParams, sizeof(hmeParams));
1545 hmeParams.ps16xMeMvDataBuffer = &m_s16XMeMvDataBuffer;
1546 hmeParams.ps32xMeMvDataBuffer = &m_s32XMeMvDataBuffer;
1547 hmeParams.ps4xMeDistortionBuffer = &m_s4XMeDistortionBuffer;
1548 hmeParams.ps4xMeMvDataBuffer = &m_s4XMeMvDataBuffer;
1549 DestroyMEResources(&hmeParams);
1550
1551 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencRecNotFilteredBuffer);
1552
1553 return CodechalVdencHevcState::FreeEncResources();
1554 }
1555
AllocateBrcResources()1556 MOS_STATUS CodechalVdencHevcStateG12::AllocateBrcResources()
1557 {
1558 CODECHAL_ENCODE_FUNCTION_ENTER;
1559 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1560
1561 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::AllocateBrcResources());
1562
1563 uint32_t* data = nullptr;
1564 MOS_LOCK_PARAMS lockFlagsWriteOnly;
1565 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
1566 lockFlagsWriteOnly.WriteOnly = 1;
1567
1568 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
1569 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1570 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
1571 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1572 allocParamsForBufferLinear.Format = Format_Buffer;
1573 allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
1574 allocParamsForBufferLinear.pBufName = "TileRowBRCSyncSemaphore";
1575
1576 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1577 m_osInterface,
1578 &allocParamsForBufferLinear,
1579 &m_resTileRowBRCsyncSemaphore),
1580 "Failed to create Tile Row BRC sync Semaphore Memory.");
1581
1582 CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint32_t *)m_osInterface->pfnLockResource(
1583 m_osInterface,
1584 &m_resTileRowBRCsyncSemaphore,
1585 &lockFlagsWriteOnly));
1586
1587 *data = 0;
1588
1589 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
1590 m_osInterface,
1591 &m_resTileRowBRCsyncSemaphore));
1592
1593 return eStatus;
1594 }
1595
FreeBrcResources()1596 MOS_STATUS CodechalVdencHevcStateG12::FreeBrcResources()
1597 {
1598 CODECHAL_ENCODE_FUNCTION_ENTER;
1599
1600 m_osInterface->pfnFreeResource(m_osInterface, &m_resTileRowBRCsyncSemaphore);
1601 return CodechalVdencHevcState::FreeBrcResources();
1602 }
1603
AllocateTileLevelBatch()1604 MOS_STATUS CodechalVdencHevcStateG12::AllocateTileLevelBatch()
1605 {
1606 CODECHAL_ENCODE_FUNCTION_ENTER;
1607
1608 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1609
1610 // Only allocate when the number of tile changed
1611 if (m_numTileBatchAllocated >= m_numTiles)
1612 {
1613 return eStatus;
1614 }
1615
1616 // Make it simple, free first if need reallocate
1617 if (m_numTileBatchAllocated > 0)
1618 {
1619 CODECHAL_ENCODE_CHK_STATUS_RETURN(FreeTileLevelBatch());
1620 }
1621
1622 // First allocate the batch buffer array
1623 for (int32_t idx = 0; idx < CODECHAL_VDENC_BRC_NUM_OF_PASSES; idx++)
1624 {
1625 if (m_tileLevelBatchBuffer[idx] == nullptr)
1626 {
1627 m_tileLevelBatchBuffer[idx] = (PMHW_BATCH_BUFFER)MOS_AllocAndZeroMemory(sizeof(MHW_BATCH_BUFFER) * m_numTiles);
1628
1629 if (nullptr == m_tileLevelBatchBuffer[idx])
1630 {
1631 CODECHAL_ENCODE_ASSERTMESSAGE("Allocate memory for tile batch buffer failed");
1632 return MOS_STATUS_NO_SPACE;
1633 }
1634 }
1635
1636 // Allocate the batch buffer for each tile
1637 uint32_t i = 0;
1638 for (i = 0; i < m_numTiles; i++)
1639 {
1640 MOS_ZeroMemory(&m_tileLevelBatchBuffer[idx][i], sizeof(MHW_BATCH_BUFFER));
1641 m_tileLevelBatchBuffer[idx][i].bSecondLevel = true;
1642 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
1643 m_osInterface,
1644 &m_tileLevelBatchBuffer[idx][i],
1645 nullptr,
1646 m_tileLevelBatchSize));
1647 }
1648 }
1649
1650 // Record the number of allocated batch buffer for tiles
1651 m_numTileBatchAllocated = m_numTiles;
1652 return eStatus;
1653 }
1654
FreeTileLevelBatch()1655 MOS_STATUS CodechalVdencHevcStateG12::FreeTileLevelBatch()
1656 {
1657 CODECHAL_ENCODE_FUNCTION_ENTER;
1658
1659 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1660
1661 // Free the batch buffer for each tile
1662 uint32_t i = 0;
1663 uint32_t j = 0;
1664 for (i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
1665 {
1666 for (j = 0; j < m_numTileBatchAllocated; j++)
1667 {
1668 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_FreeBb(m_osInterface, &m_tileLevelBatchBuffer[i][j], nullptr));
1669 }
1670
1671 MOS_FreeMemory(m_tileLevelBatchBuffer[i]);
1672 m_tileLevelBatchBuffer[i] = nullptr;
1673 }
1674
1675 // Reset the number of tile batch allocated
1676 m_numTileBatchAllocated = 0;
1677
1678 return eStatus;
1679 }
1680
AllocateTileRowLevelBRCBatch()1681 MOS_STATUS CodechalVdencHevcStateG12::AllocateTileRowLevelBRCBatch()
1682 {
1683 CODECHAL_ENCODE_FUNCTION_ENTER;
1684
1685 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1686
1687 // Only allocate when the number of tile row changed
1688 if (m_numTileRowBRCBatchAllocated >= m_numTileRows)
1689 {
1690 return eStatus;
1691 }
1692
1693 // Make it simple, free first if need reallocate
1694 if (m_numTileRowBRCBatchAllocated > 0)
1695 {
1696 CODECHAL_ENCODE_CHK_STATUS_RETURN(FreeTileRowLevelBRCBatch());
1697 }
1698
1699 // First allocate the batch buffer array
1700 for (int32_t idx = 0; idx < CODECHAL_VDENC_BRC_NUM_OF_PASSES; idx++)
1701 {
1702 if (m_TileRowBRCBatchBuffer[idx] == nullptr)
1703 {
1704 m_TileRowBRCBatchBuffer[idx] = (PMHW_BATCH_BUFFER)MOS_AllocAndZeroMemory(sizeof(MHW_BATCH_BUFFER) * m_numTileRows);
1705
1706 if (nullptr == m_TileRowBRCBatchBuffer[idx])
1707 {
1708 CODECHAL_ENCODE_ASSERTMESSAGE("Allocate memory for tile row level BRC batch buffer failed");
1709 return MOS_STATUS_NO_SPACE;
1710 }
1711 }
1712
1713 // Allocate the batch buffer for each tile row
1714 uint32_t i = 0;
1715 for (i = 0; i < m_numTileRows; i++)
1716 {
1717 MOS_ZeroMemory(&m_TileRowBRCBatchBuffer[idx][i], sizeof(MHW_BATCH_BUFFER));
1718 m_TileRowBRCBatchBuffer[idx][i].bSecondLevel = true;
1719 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
1720 m_osInterface,
1721 &m_TileRowBRCBatchBuffer[idx][i],
1722 nullptr,
1723 m_hwInterface->m_hucCommandBufferSize));
1724 }
1725 }
1726
1727 // Record the number of allocated batch buffer for tiles
1728 m_numTileRowBRCBatchAllocated = m_numTileRows;
1729 return eStatus;
1730 }
1731
FreeTileRowLevelBRCBatch()1732 MOS_STATUS CodechalVdencHevcStateG12::FreeTileRowLevelBRCBatch()
1733 {
1734 CODECHAL_ENCODE_FUNCTION_ENTER;
1735
1736 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1737
1738 // Free the batch buffer for each tile row
1739 uint32_t i = 0;
1740 uint32_t j = 0;
1741 for (i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
1742 {
1743 for (j = 0; j < m_numTileRowBRCBatchAllocated; j++)
1744 {
1745 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_FreeBb(m_osInterface, &m_TileRowBRCBatchBuffer[i][j], nullptr));
1746 }
1747
1748 MOS_FreeMemory(m_TileRowBRCBatchBuffer[i]);
1749 m_TileRowBRCBatchBuffer[i] = nullptr;
1750 }
1751
1752 // Reset the number of tile row BRC batch allocated
1753 m_numTileRowBRCBatchAllocated = 0;
1754
1755 return eStatus;
1756 }
1757
InitializePicture(const EncoderParams & params)1758 MOS_STATUS CodechalVdencHevcStateG12::InitializePicture(const EncoderParams& params)
1759 {
1760 CODECHAL_ENCODE_FUNCTION_ENTER;
1761
1762 m_numNAL = params.uiNumNalUnits;
1763 m_overallNALPayload = params.uiOverallNALPayload;
1764
1765 // common initilization
1766 return CodechalVdencHevcState::InitializePicture(params);
1767 }
1768
SetSequenceStructs()1769 MOS_STATUS CodechalVdencHevcStateG12::SetSequenceStructs()
1770 {
1771 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1772
1773 CODECHAL_ENCODE_FUNCTION_ENTER;
1774
1775 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
1776 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1777 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
1778 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1779 allocParamsForBufferLinear.Format = Format_Buffer;
1780 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE);
1781 allocParamsForBufferLinear.pBufName = "VDENC Read Batch Buffer";
1782
1783 uint32_t batchBufferSize = m_hwInterface->m_vdencReadBatchBufferSize +
1784 ENCODE_HEVC_VDENC_NUM_MAX_SLICES * (m_numNAL * mhw_vdbox_hcp_g12_X::HCP_PAK_INSERT_OBJECT_CMD::byteSize + m_overallNALPayload);
1785
1786 if (batchBufferSize > allocParamsForBufferLinear.dwBytes && allocParamsForBufferLinear.dwBytes != m_prevVdencReadBatchBufferSize)
1787 {
1788 m_hwInterface->m_vdencReadBatchBufferSize = batchBufferSize;
1789 m_hwInterface->m_vdenc2ndLevelBatchBufferSize = batchBufferSize;
1790 m_tileLevelBatchSize = batchBufferSize;
1791 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE);
1792 m_prevVdencReadBatchBufferSize = allocParamsForBufferLinear.dwBytes;
1793
1794 for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
1795 {
1796 for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
1797 {
1798 if (!Mos_ResourceIsNull(&m_vdencReadBatchBuffer[k][i]))
1799 {
1800 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencReadBatchBuffer[k][i]);
1801 }
1802 }
1803
1804 for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
1805 {
1806 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1807 m_osInterface,
1808 &allocParamsForBufferLinear,
1809 &m_vdencReadBatchBuffer[k][i]),
1810 "Failed to allocate VDENC Read Batch Buffer");
1811 }
1812 }
1813 }
1814
1815 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::SetSequenceStructs());
1816
1817 return eStatus;
1818 }
1819
SetPictureStructs()1820 MOS_STATUS CodechalVdencHevcStateG12::SetPictureStructs()
1821 {
1822 CODECHAL_ENCODE_FUNCTION_ENTER;
1823
1824 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1825
1826 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::SetPictureStructs());
1827
1828 if ((uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_chromaFormat &&
1829 (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat)
1830 {
1831 if (Format_YUY2 != m_reconSurface.Format)
1832 {
1833 eStatus = MOS_STATUS_INVALID_PARAMETER;
1834 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Recon surface format is not correct!");
1835 }
1836 else if (m_reconSurface.dwHeight < m_oriFrameHeight * 2 ||
1837 m_reconSurface.dwWidth < m_oriFrameWidth / 2)
1838 {
1839 eStatus = MOS_STATUS_INVALID_PARAMETER;
1840 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Recon surface allocation size is not correct!");
1841 }
1842 else
1843 {
1844 // update Recon surface to Variant format
1845 CodechalEncodeHevcBase::UpdateYUY2SurfaceInfo(&m_reconSurface, m_is10BitHevc);
1846 }
1847 }
1848
1849 // Frame level BRC pass set to one pass when tile replay is enabled
1850 if (m_enableTileReplay)
1851 {
1852 m_numPasses = 0;
1853 }
1854
1855 // Error concealment, disable IBC if slice coding type is I type
1856 if (m_enableSCC && m_hevcPicParams->pps_curr_pic_ref_enabled_flag)
1857 {
1858 for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
1859 {
1860 if (m_hevcSliceParams[slcCount].slice_type == CODECHAL_ENCODE_HEVC_I_SLICE)
1861 {
1862 m_hevcPicParams->pps_curr_pic_ref_enabled_flag = false;
1863 break;
1864 }
1865 }
1866 }
1867
1868 // EOS is not working on GEN12, disable it by setting below to false (WA)
1869 m_lastPicInSeq = false;
1870 m_lastPicInStream = false;
1871 return eStatus;
1872 }
1873
GetStatusReport(EncodeStatus * encodeStatus,EncodeStatusReport * encodeStatusReport)1874 MOS_STATUS CodechalVdencHevcStateG12::GetStatusReport(
1875 EncodeStatus *encodeStatus,
1876 EncodeStatusReport *encodeStatusReport)
1877 {
1878 CODECHAL_ENCODE_FUNCTION_ENTER;
1879
1880 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1881
1882 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus);
1883 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport);
1884
1885 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpVdencOutputs()));
1886
1887 // When tile replay is enabled with tile replay, need to report out the tile size and the bit stream is not continous
1888 if ((encodeStatusReport->UsedVdBoxNumber == 1) && (!m_enableTileReplay || (m_enableTileReplay && encodeStatusReport->NumberTilesInFrame == 1)))
1889 {
1890 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::GetStatusReport(encodeStatus, encodeStatusReport));
1891 return eStatus;
1892 }
1893
1894 // Allocate the tile size report memory
1895 encodeStatusReport->SizeOfTileInfoBuffer = encodeStatusReport->NumberTilesInFrame * sizeof(CodechalTileInfo);
1896 if (encodeStatusReport->pHEVCTileinfo)
1897 {
1898 MOS_FreeMemory(encodeStatusReport->pHEVCTileinfo);
1899 encodeStatusReport->pHEVCTileinfo = nullptr;
1900 }
1901 encodeStatusReport->pHEVCTileinfo = (CodechalTileInfo *)MOS_AllocAndZeroMemory(encodeStatusReport->SizeOfTileInfoBuffer);
1902 CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport->pHEVCTileinfo);
1903
1904 // In case of CQP, PAK integration kernel is not called, so used tile size record from HW
1905 // PAK integration kernel does not handle stitching for single pipe mode
1906 PCODECHAL_ENCODE_BUFFER tileSizeStatusReport = &m_tileRecordBuffer[encodeStatusReport->CurrOriginalPic.FrameIdx];
1907 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[encodeStatusReport->CurrOriginalPic.FrameIdx];
1908
1909 MOS_LOCK_PARAMS lockFlags;
1910 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1911 lockFlags.ReadOnly = 1;
1912 HCPPakHWTileSizeRecord_G12* tileStatusReport = (HCPPakHWTileSizeRecord_G12*)m_osInterface->pfnLockResource(
1913 m_osInterface,
1914 &tileSizeStatusReport->sResource,
1915 &lockFlags);
1916 CODECHAL_ENCODE_CHK_NULL_RETURN(tileStatusReport);
1917
1918 encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
1919 encodeStatusReport->PanicMode = false;
1920 encodeStatusReport->AverageQp = 0;
1921 encodeStatusReport->QpY = 0;
1922 encodeStatusReport->SuggestedQpYDelta = 0;
1923 encodeStatusReport->NumberPasses = 1;
1924 encodeStatusReport->bitstreamSize = 0;
1925 encodeStatus->ImageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQp = 0;
1926 encodeStatusReport->NumberSlices = 0;
1927
1928 uint32_t* sliceSize = nullptr;
1929
1930 // pSliceSize is set/ allocated only when dynamic slice is enabled. Cannot use SSC flag here, as it is an asynchronous call
1931 if (encodeStatus->sliceReport.pSliceSize)
1932 {
1933 sliceSize = (uint32_t*)m_osInterface->pfnLockResource(m_osInterface, encodeStatus->sliceReport.pSliceSize, &lockFlags);
1934 CODECHAL_ENCODE_CHK_NULL_RETURN(sliceSize);
1935 }
1936
1937 uint32_t totalCU = 0;
1938 uint32_t sliceCount = 0;
1939 double sumQp = 0.0;
1940 for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
1941 {
1942 if (tileStatusReport[i].Length == 0)
1943 {
1944 encodeStatusReport->CodecStatus = CODECHAL_STATUS_INCOMPLETE;
1945 return eStatus;
1946 }
1947 //update tile info with HW counter
1948 if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHwCounterIncrement(m_osInterface) && m_enableTileReplay)
1949 {
1950 if (Mos_ResourceIsNull(&m_resHwCountTileReplay))
1951 {
1952 CODECHAL_ENCODE_ASSERTMESSAGE("m_resHwCountTileReplay is not allocated");
1953 return MOS_STATUS_NULL_POINTER;
1954 }
1955
1956 MOS_LOCK_PARAMS LockFlagsNoOverWrite;
1957 MOS_ZeroMemory(&LockFlagsNoOverWrite, sizeof(MOS_LOCK_PARAMS));
1958 LockFlagsNoOverWrite.WriteOnly = 1;
1959 LockFlagsNoOverWrite.NoOverWrite = 1;
1960
1961 uint8_t* dataHWCountTileReplay = (uint8_t*)m_osInterface->pfnLockResource(
1962 m_osInterface,
1963 &m_resHwCountTileReplay,
1964 &LockFlagsNoOverWrite);
1965
1966 CODECHAL_ENCODE_CHK_NULL_RETURN(dataHWCountTileReplay);
1967 uint64_t *pAddress2Counter = (uint64_t *)(dataHWCountTileReplay + i * sizeof(HwCounter));
1968 encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.Count = *pAddress2Counter;
1969 encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.Count = SwapEndianness(encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.Count); //Report back in Big endian
1970 encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.IV = *(++pAddress2Counter);
1971 encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.IV = SwapEndianness(encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.IV); //Report back in Big endian
1972 CODECHAL_ENCODE_NORMALMESSAGE("tile = %d, hwCounterValue.Count = 0x%llx, hwCounterValue.IV = 0x%llx", i, encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.Count, encodeStatusReport->pHEVCTileinfo[i].HWCounterValue.IV);
1973 if (dataHWCountTileReplay)
1974 {
1975 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHwCountTileReplay);
1976 }
1977 }
1978 encodeStatusReport->pHEVCTileinfo[i].TileSizeInBytes = tileStatusReport[i].Length;
1979 // The offset only valid if there is no stream stitching
1980 encodeStatusReport->pHEVCTileinfo[i].TileBitStreamOffset = tileParams[i].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
1981 encodeStatusReport->pHEVCTileinfo[i].TileRowNum = i / tileParams[i].NumOfTileColumnsInFrame;
1982 encodeStatusReport->pHEVCTileinfo[i].TileColNum = i % tileParams[i].NumOfTileColumnsInFrame;
1983 encodeStatusReport->NumTileReported = i + 1;
1984 encodeStatusReport->bitstreamSize += tileStatusReport[i].Length;
1985 totalCU += (tileParams[i].TileHeightInMinCbMinus1 + 1) * (tileParams[i].TileWidthInMinCbMinus1 + 1);
1986 sumQp += tileStatusReport[i].Hcp_Qp_Status_Count;
1987
1988 if (sliceSize)
1989 {
1990 encodeStatusReport->pSliceSizes = (uint16_t*)sliceSize;
1991 encodeStatusReport->NumberSlices += (uint8_t)tileStatusReport[i].Hcp_Slice_Count_Tile;
1992 uint16_t prevCumulativeSliceSize = 0;
1993 // HW writes out a DW for each slice size. Copy in place the DW into 16bit fields expected by App
1994 for (uint32_t idx = 0; idx < tileStatusReport[i].Hcp_Slice_Count_Tile; idx++)
1995 {
1996 // PAK output the sliceSize at 16DW intervals.
1997 CODECHAL_ENCODE_CHK_NULL_RETURN(&sliceSize[sliceCount * 16]);
1998
1999 //convert cummulative slice size to individual, first slice may have PPS/SPS,
2000 uint32_t CurrAccumulatedSliceSize = sliceSize[sliceCount * 16];
2001 encodeStatusReport->pSliceSizes[sliceCount] = CurrAccumulatedSliceSize - prevCumulativeSliceSize;
2002 prevCumulativeSliceSize += encodeStatusReport->pSliceSizes[sliceCount];
2003 sliceCount++;
2004 }
2005 }
2006 }
2007
2008 if (sliceSize)
2009 {
2010 encodeStatusReport->SizeOfSliceSizesBuffer = sizeof(uint16_t) * encodeStatusReport->NumberSlices;
2011 encodeStatusReport->SliceSizeOverflow = (encodeStatus->sliceReport.SliceSizeOverflow >> 16) & 1;
2012 m_osInterface->pfnUnlockResource(m_osInterface, encodeStatus->sliceReport.pSliceSize);
2013 }
2014
2015 CODECHAL_ENCODE_CHK_STATUS_RETURN(CalculatePSNR(encodeStatus, encodeStatusReport));
2016
2017 if (encodeStatusReport->bitstreamSize == 0 ||
2018 encodeStatusReport->bitstreamSize >m_bitstreamUpperBound)
2019 {
2020 encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR;
2021 encodeStatusReport->bitstreamSize = 0;
2022 return MOS_STATUS_INVALID_FILE_SIZE;
2023 }
2024
2025 if (totalCU != 0)
2026 {
2027 encodeStatusReport->QpY = encodeStatusReport->AverageQp =
2028 (uint8_t)((sumQp / (double)totalCU) / 4.0); // due to TU is 4x4 and there are 4 TUs in one CU
2029 }
2030 else
2031 {
2032 return MOS_STATUS_INVALID_PARAMETER;
2033 }
2034
2035 if (m_enableTileStitchByHW)
2036 {
2037 if (tileStatusReport)
2038 {
2039 // clean-up the tile status report buffer
2040 MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * encodeStatusReport->NumberTilesInFrame);
2041 m_osInterface->pfnUnlockResource(m_osInterface, &tileSizeStatusReport->sResource);
2042 }
2043 return eStatus;
2044 }
2045
2046 //Driver stitching is not allowed for secure encode case
2047 if (!m_osInterface->osCpInterface->IsCpEnabled())
2048 {
2049 uint8_t *tempBsBuffer = nullptr, *bufPtr = nullptr;
2050 tempBsBuffer = bufPtr = (uint8_t*)MOS_AllocAndZeroMemory(encodeStatusReport->bitstreamSize);
2051 CODECHAL_ENCODE_CHK_NULL_RETURN(tempBsBuffer);
2052
2053 PCODEC_REF_LIST currRefList = encodeStatus->encodeStatusReport.pCurrRefList;
2054 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2055 lockFlags.ReadOnly = 1;
2056 uint8_t* bitstream = (uint8_t*)m_osInterface->pfnLockResource(
2057 m_osInterface,
2058 &currRefList->resBitstreamBuffer,
2059 &lockFlags);
2060
2061 if (bitstream == nullptr)
2062 {
2063 MOS_SafeFreeMemory(tempBsBuffer);
2064 return MOS_STATUS_NULL_POINTER;
2065 }
2066
2067 for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
2068 {
2069 uint32_t offset = tileParams[i].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
2070 uint32_t len = tileStatusReport[i].Length;
2071
2072 MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len);
2073 bufPtr += len;
2074 }
2075
2076 MOS_SecureMemcpy(bitstream, encodeStatusReport->bitstreamSize, tempBsBuffer, encodeStatusReport->bitstreamSize);
2077 MOS_ZeroMemory(&bitstream[encodeStatusReport->bitstreamSize], m_bitstreamUpperBound - encodeStatusReport->bitstreamSize);
2078
2079 if (bitstream)
2080 {
2081 m_osInterface->pfnUnlockResource(m_osInterface, &currRefList->resBitstreamBuffer);
2082 }
2083
2084 MOS_FreeMemory(tempBsBuffer);
2085 }
2086
2087 if (tileStatusReport)
2088 {
2089 // clean-up the tile status report buffer
2090 MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * encodeStatusReport->NumberTilesInFrame);
2091
2092 m_osInterface->pfnUnlockResource(m_osInterface, &tileSizeStatusReport->sResource);
2093 }
2094
2095 return eStatus;
2096 }
2097
ValidateRefFrameData(PCODEC_HEVC_ENCODE_SLICE_PARAMS slcParams)2098 MOS_STATUS CodechalVdencHevcStateG12::ValidateRefFrameData(PCODEC_HEVC_ENCODE_SLICE_PARAMS slcParams)
2099 {
2100 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2101 bool isRandomAccess = false;
2102
2103 CODECHAL_ENCODE_CHK_NULL_RETURN(slcParams);
2104
2105 if (slcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
2106 {
2107 if (slcParams->num_ref_idx_l0_active_minus1 != slcParams->num_ref_idx_l1_active_minus1)
2108 {
2109 isRandomAccess = true;
2110 }
2111
2112 for (auto j = 0; j < CODEC_MAX_NUM_REF_FRAME_HEVC; j++)
2113 {
2114 if (slcParams->RefPicList[0][j].PicEntry != slcParams->RefPicList[1][j].PicEntry)
2115 {
2116 isRandomAccess = true;
2117 }
2118 }
2119 }
2120
2121 if (isRandomAccess)
2122 {
2123 if (m_hevcPicParams->bEnableRollingIntraRefresh)
2124 {
2125 CODECHAL_ENCODE_ASSERT(false);
2126 eStatus = MOS_STATUS_INVALID_PARAMETER;
2127 }
2128 }
2129
2130 if (isRandomAccess && m_enableSCC)
2131 {
2132 CODECHAL_ENCODE_ASSERT(false);
2133 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
2134 }
2135
2136 uint8_t maxNumRef0 = isRandomAccess ? 2 : m_numMaxVdencL0Ref;
2137 uint8_t maxNumRef1 = isRandomAccess ? 1 : m_numMaxVdencL1Ref;
2138
2139 if (slcParams->num_ref_idx_l0_active_minus1 > maxNumRef0 - 1)
2140 {
2141 CODECHAL_ENCODE_ASSERT(false);
2142 slcParams->num_ref_idx_l0_active_minus1 = maxNumRef0 - 1;
2143 }
2144
2145 if (slcParams->num_ref_idx_l1_active_minus1 > maxNumRef1 - 1)
2146 {
2147 CODECHAL_ENCODE_ASSERT(false);
2148 slcParams->num_ref_idx_l1_active_minus1 = maxNumRef1 - 1;
2149 }
2150
2151 return eStatus;
2152 }
2153
UserFeatureKeyReport()2154 MOS_STATUS CodechalVdencHevcStateG12::UserFeatureKeyReport()
2155 {
2156 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2157
2158 CODECHAL_ENCODE_FUNCTION_ENTER;
2159
2160 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::UserFeatureKeyReport());
2161
2162 #if (_DEBUG || _RELEASE_INTERNAL)
2163 CodecHalEncode_WriteKey64(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE, m_kmdVeOveride.Value, m_osInterface->pOsContext);
2164 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENCODE_USED_VDBOX_NUM_ID, m_numPipe, m_osInterface->pOsContext);
2165 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENABLE_ENCODE_VE_CTXSCHEDULING_ID, MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface), m_osInterface->pOsContext);
2166 #endif
2167 return eStatus;
2168 }
2169
EncodeKernelFunctions()2170 MOS_STATUS CodechalVdencHevcStateG12::EncodeKernelFunctions()
2171 {
2172 CODECHAL_ENCODE_FUNCTION_ENTER;
2173
2174 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2175
2176 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
2177 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
2178 m_rawSurfaceToEnc,
2179 CodechalDbgAttr::attrEncodeRawInputSurface,
2180 "SrcSurf")));
2181
2182 CODECHAL_DEBUG_TOOL(
2183 PCODEC_PICTURE l0RefFrameList = m_hevcSliceParams->RefPicList[LIST_0];
2184 for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++)
2185 {
2186 CODEC_PICTURE refPic = l0RefFrameList[refIdx];
2187
2188 if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
2189 {
2190 // L0 references
2191 uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
2192 m_debugInterface->m_refIndex = (uint16_t)m_refList[refPicIdx]->iFieldOrderCnt[0];
2193 std::string refSurfName = "RefSurf_List0_POC" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex));
2194 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
2195 &m_refList[refPicIdx]->sRefBuffer,
2196 CodechalDbgAttr::attrReferenceSurfaces,
2197 refSurfName.data()))
2198 }
2199 }
2200
2201 if (!m_lowDelay)
2202 {
2203 PCODEC_PICTURE l1RefFrameList = m_hevcSliceParams->RefPicList[LIST_1];
2204 for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l1_active_minus1; refIdx++)
2205 {
2206 CODEC_PICTURE refPic = l1RefFrameList[refIdx];
2207
2208 if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
2209 {
2210 // L1 references
2211 uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
2212 m_debugInterface->m_refIndex = (uint16_t)m_refList[refPicIdx]->iFieldOrderCnt[0];
2213 std::string refSurfName = "RefSurf_List1_POC" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex));
2214 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
2215 &m_refList[refPicIdx]->sRefBuffer,
2216 CodechalDbgAttr::attrReferenceSurfaces,
2217 refSurfName.data()))
2218 }
2219 }
2220 });
2221
2222 auto singleTaskPhaseSupported = m_singleTaskPhaseSupported; // local variable to save current setting before overwriting
2223
2224 if (m_16xMeSupported)
2225 {
2226 // Enable SingleTaskPhase for now with SHME
2227 m_singleTaskPhaseSupported = true;
2228 m_maxBtCount = GetMaxBtCount();
2229
2230 CodechalEncodeCscDs::KernelParams cscScalingKernelParams;
2231 MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams));
2232
2233 cscScalingKernelParams.bLastTaskInPhaseCSC =
2234 cscScalingKernelParams.bLastTaskInPhase4xDS = false;
2235 cscScalingKernelParams.bLastTaskInPhase16xDS = !(m_32xMeSupported || m_hmeEnabled);
2236 cscScalingKernelParams.bLastTaskInPhase32xDS = !m_hmeEnabled;
2237
2238 m_firstTaskInPhase = true;
2239 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->SetHevcCscFlagAndRawColor());
2240 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->KernelFunctions(&cscScalingKernelParams));
2241
2242 CODECHAL_DEBUG_TOOL(
2243 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
2244 m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER),
2245 CodechalDbgAttr::attrReconstructedSurface,
2246 "4x_Scaled_Surf"));
2247
2248 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
2249 m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER),
2250 CodechalDbgAttr::attrReconstructedSurface,
2251 "16x_Scaled_Surf"));
2252
2253 if (m_b32XMeEnabled)
2254 {
2255 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
2256 m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER),
2257 CodechalDbgAttr::attrReconstructedSurface,
2258 "32x_Scaled_Surf"));
2259 }
2260 )
2261 }
2262
2263 if (m_b16XMeEnabled)
2264 {
2265 if (m_b32XMeEnabled)
2266 {
2267 //HME_P kernel for 32xME
2268 m_lastTaskInPhase = false;
2269 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_32x));
2270 }
2271
2272 //HME_P kernel for 16xME
2273 m_lastTaskInPhase = false;
2274 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_16x));
2275
2276 //StreamIn kernel, 4xME
2277 m_lastTaskInPhase = true;
2278 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_4x));
2279 }
2280
2281 // retrieve SingleTaskPhase setting (SAO will need STP enabled setting)
2282 m_singleTaskPhaseSupported = singleTaskPhaseSupported;
2283
2284 CODECHAL_DEBUG_TOOL(
2285 if (m_hmeEnabled) {
2286 CODECHAL_ME_OUTPUT_PARAMS meOutputParams;
2287
2288 MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams));
2289 meOutputParams.psMeMvBuffer = &m_s4XMeMvDataBuffer;
2290 meOutputParams.psMeBrcDistortionBuffer = nullptr;
2291 meOutputParams.psMeDistortionBuffer = &m_s4XMeDistortionBuffer;
2292 meOutputParams.b16xMeInUse = false;
2293 meOutputParams.b32xMeInUse = false;
2294
2295 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
2296 &meOutputParams.psMeMvBuffer->OsResource,
2297 CodechalDbgAttr::attrOutput,
2298 "MvData",
2299 meOutputParams.psMeMvBuffer->dwHeight *meOutputParams.psMeMvBuffer->dwPitch,
2300 CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 32), 64) * (m_downscaledFrameFieldHeightInMb4x * 4) : 0,
2301 CODECHAL_MEDIA_STATE_4X_ME));
2302
2303 //CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
2304 // &meOutputParams.psMeBrcDistortionBuffer->OsResource,
2305 // CodechalDbgAttr::attrOutput,
2306 // "BrcDist",
2307 // meOutputParams.psMeBrcDistortionBuffer->dwHeight *meOutputParams.psMeBrcDistortionBuffer->dwPitch,
2308 // CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64) * MOS_ALIGN_CEIL((m_downscaledFrameFieldHeightInMb4x * 4), 8) : 0,
2309 // CODECHAL_MEDIA_STATE_4X_ME));
2310 if (meOutputParams.psMeDistortionBuffer)
2311 {
2312 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
2313 &meOutputParams.psMeDistortionBuffer->OsResource,
2314 CodechalDbgAttr::attrOutput,
2315 "MeDist",
2316 meOutputParams.psMeDistortionBuffer->dwHeight *meOutputParams.psMeDistortionBuffer->dwPitch,
2317 CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64) * MOS_ALIGN_CEIL((m_downscaledFrameFieldHeightInMb4x * 4 * 10), 8) : 0,
2318 CODECHAL_MEDIA_STATE_4X_ME));
2319 }
2320 if (m_b16XMeEnabled)
2321 {
2322 MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams));
2323 meOutputParams.psMeMvBuffer = &m_s16XMeMvDataBuffer;
2324 meOutputParams.psMeBrcDistortionBuffer = nullptr;
2325 meOutputParams.psMeDistortionBuffer = nullptr;
2326 meOutputParams.b16xMeInUse = true;
2327 meOutputParams.b32xMeInUse = false;
2328
2329 CODECHAL_ENCODE_CHK_STATUS_RETURN(
2330 m_debugInterface->DumpBuffer(
2331 &meOutputParams.psMeMvBuffer->OsResource,
2332 CodechalDbgAttr::attrOutput,
2333 "MvData",
2334 meOutputParams.psMeMvBuffer->dwHeight *meOutputParams.psMeMvBuffer->dwPitch,
2335 CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb16x * 32), 64) * (m_downscaledFrameFieldHeightInMb16x * 4) : 0,
2336 CODECHAL_MEDIA_STATE_16X_ME));
2337 }
2338 if (m_b32XMeEnabled)
2339 {
2340 MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams));
2341 meOutputParams.psMeMvBuffer = &m_s32XMeMvDataBuffer;
2342 meOutputParams.psMeBrcDistortionBuffer = nullptr;
2343 meOutputParams.psMeDistortionBuffer = nullptr;
2344 meOutputParams.b16xMeInUse = false;
2345 meOutputParams.b32xMeInUse = true;
2346
2347 CODECHAL_ENCODE_CHK_STATUS_RETURN(
2348 m_debugInterface->DumpBuffer(
2349 &meOutputParams.psMeMvBuffer->OsResource,
2350 CodechalDbgAttr::attrOutput,
2351 "MvData",
2352 meOutputParams.psMeMvBuffer->dwHeight *meOutputParams.psMeMvBuffer->dwPitch,
2353 CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb32x * 32), 64) * (m_downscaledFrameFieldHeightInMb32x * 4) : 0,
2354 CODECHAL_MEDIA_STATE_32X_ME));
2355 }
2356
2357 MOS_ZeroMemory(&meOutputParams, sizeof(meOutputParams));
2358 meOutputParams.pResVdenStreamInBuffer = &(m_resVdencStreamInBuffer[m_currRecycledBufIdx]);
2359 meOutputParams.psMeMvBuffer = &m_s4XMeMvDataBuffer;
2360 meOutputParams.psMeDistortionBuffer = &m_s4XMeDistortionBuffer;
2361 meOutputParams.b16xMeInUse = false;
2362 meOutputParams.bVdencStreamInInUse = true;
2363 if (m_vdencStreamInEnabled) {
2364 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
2365 &m_resVdencStreamInBuffer[m_currRecycledBufIdx],
2366 CodechalDbgAttr::attrOutput,
2367 "StreaminData",
2368 m_picWidthInMb * m_picHeightInMb * CODECHAL_CACHELINE_SIZE,
2369 0,
2370 CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN));
2371 }
2372 })
2373 #endif
2374
2375 return eStatus;
2376 }
2377
ReadSliceSize(PMOS_COMMAND_BUFFER cmdBuffer)2378 MOS_STATUS CodechalVdencHevcStateG12::ReadSliceSize(PMOS_COMMAND_BUFFER cmdBuffer)
2379 {
2380 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2381
2382 CODECHAL_ENCODE_FUNCTION_ENTER;
2383
2384 // Use FrameStats buffer if in single pipe mode.
2385 if (m_numPipe == 1)
2386 {
2387 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::ReadSliceSize(cmdBuffer));
2388 return eStatus;
2389 }
2390
2391 // Report slice size to app only when dynamic scaling is enabled
2392 if (!m_hevcSeqParams->SliceSizeControl)
2393 {
2394 return eStatus;
2395 }
2396
2397 // In multi-tile multi-pipe mode, use PAK integration kernel output
2398 // PAK integration kernel accumulates frame statistics across tiles, which should be used to setup slice size report
2399 MOS_LOCK_PARAMS lockFlags;
2400 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2401 lockFlags.WriteOnly = true;
2402
2403 uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize + sizeof(uint32_t) * 2); // encodeStatus is offset by 2 DWs in the resource
2404 uint32_t sizeOfSliceSizesBuffer = MOS_ALIGN_CEIL(m_numLcu * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
2405
2406 if (IsFirstPipe())
2407 {
2408 if (IsFirstPass())
2409 {
2410 // Create/ Initialize slice report buffer once per frame, to be used across passes
2411 if (Mos_ResourceIsNull(&m_resSliceReport[m_encodeStatusBuf.wCurrIndex]))
2412 {
2413 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
2414 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
2415 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
2416 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
2417 allocParamsForBufferLinear.Format = Format_Buffer;
2418 allocParamsForBufferLinear.dwBytes = sizeOfSliceSizesBuffer;
2419
2420 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
2421 m_osInterface,
2422 &allocParamsForBufferLinear,
2423 &m_resSliceReport[m_encodeStatusBuf.wCurrIndex]),
2424 "Failed to create HEVC VDEnc Slice Report Buffer ");
2425 }
2426
2427 // Clear slice size structure to be sent in EncodeStatusReport buffer
2428 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex], &lockFlags);
2429 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
2430 MOS_ZeroMemory(data, sizeOfSliceSizesBuffer);
2431 m_osInterface->pfnUnlockResource(m_osInterface, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex]);
2432
2433 // Set slice size pointer in slice size structure
2434 MHW_MI_FLUSH_DW_PARAMS miFlushDwParams;
2435 MOS_ZeroMemory(&miFlushDwParams, sizeof(miFlushDwParams));
2436 miFlushDwParams.pOsResource = &m_encodeStatusBuf.resStatusBuffer;
2437 miFlushDwParams.dwResourceOffset = CODECHAL_OFFSETOF(EncodeStatusSliceReport, pSliceSize) + baseOffset + m_encodeStatusBuf.dwSliceReportOffset;
2438 miFlushDwParams.dwDataDW1 = (uint32_t)((uint64_t)&m_resSliceReport[m_encodeStatusBuf.wCurrIndex] & 0xFFFFFFFF);
2439 miFlushDwParams.dwDataDW2 = (uint32_t)(((uint64_t)&m_resSliceReport[m_encodeStatusBuf.wCurrIndex] & 0xFFFFFFFF00000000) >> 32);
2440 miFlushDwParams.bQWordEnable = 1;
2441 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
2442 cmdBuffer,
2443 &miFlushDwParams));
2444 }
2445
2446 // Copy Slize size data buffer from PAK to be sent back to App
2447 CODECHAL_ENCODE_CHK_STATUS_RETURN(CopyDataBlock(cmdBuffer,
2448 &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
2449 m_hevcTileStatsOffset.uiHevcSliceStreamout,
2450 &m_resSliceReport[m_encodeStatusBuf.wCurrIndex],
2451 0,
2452 sizeOfSliceSizesBuffer));
2453
2454 MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
2455 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS));
2456 miCpyMemMemParams.presSrc = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Slice size overflow is in m_resFrameStatStreamOutBuffer DW0[16]
2457 miCpyMemMemParams.dwSrcOffset = m_hevcFrameStatsOffset.uiHevcPakStatistics;
2458 miCpyMemMemParams.presDst = &m_encodeStatusBuf.resStatusBuffer;
2459 miCpyMemMemParams.dwDstOffset = baseOffset + m_encodeStatusBuf.dwSliceReportOffset; // Slice size overflow is at DW0 EncodeStatusSliceReport
2460 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
2461 }
2462
2463 return eStatus;
2464 }
2465
ExecutePictureLevel()2466 MOS_STATUS CodechalVdencHevcStateG12::ExecutePictureLevel()
2467 {
2468 CODECHAL_ENCODE_FUNCTION_ENTER;
2469
2470 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2471
2472 int32_t currentPass = GetCurrentPass();
2473 int32_t currentPipe = GetCurrentPipe();
2474
2475 if (IsFirstPipe() && IsFirstPass())
2476 {
2477 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData(m_tileParams[m_virtualEngineBbIndex]));
2478 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileStatistics());
2479 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRoundingValues());
2480 }
2481
2482 if (m_hevcPicParams->bUsedAsRef || m_brcEnabled)
2483 {
2484 if (m_currRefSync == nullptr)
2485 {
2486 m_currRefSync = &m_refSync[m_currMbCodeIdx];
2487 }
2488 }
2489 else
2490 {
2491 m_currRefSync = nullptr;
2492 }
2493
2494 if (m_lookaheadPass && (m_hevcSeqParams->MaxAdaptiveGopPicSize > 0))
2495 {
2496 bool forceIntra = m_intraInterval >= m_hevcSeqParams->MaxAdaptiveGopPicSize;
2497 if ((!IsFirstPass() || forceIntra) && (m_hevcPicParams->CodingType != I_TYPE))
2498 {
2499 m_vdencStreamInEnabled = true;
2500 }
2501
2502 if (!m_lookaheadAdaptiveI)
2503 {
2504 m_intraInterval = forceIntra ? 1 : m_intraInterval + 1;
2505 }
2506 }
2507
2508 m_firstTaskInPhase = m_singleTaskPhaseSupported ? IsFirstPass() : false;
2509 m_lastTaskInPhase = m_singleTaskPhaseSupported ? IsLastPass() : true;
2510
2511 // Per frame maximum HuC kernels is 5 - BRC Init, BRC Update, PAK Int, BRC Update, PAK Int
2512 m_hucCommandsSize = m_hwInterface->m_hucCommandBufferSize * 5;
2513
2514 PerfTagSetting perfTag;
2515 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE);
2516
2517 if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()) \
2518 {
2519 CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum");
2520 eStatus = MOS_STATUS_INVALID_PARAMETER;
2521 return eStatus;
2522 }
2523
2524 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifyCommandBufferSize());
2525
2526 if (!m_singleTaskPhaseSupportedInPak)
2527 {
2528 // Command buffer or patch list size are too small and so we cannot submit multiple pass of PAKs together
2529 m_firstTaskInPhase = true;
2530 m_lastTaskInPhase = true;
2531 }
2532
2533 if (m_lookaheadPass)
2534 {
2535 if (m_swLaMode != nullptr)
2536 {
2537 m_lastTaskInPhase = true;
2538 }
2539 else
2540 {
2541 m_lastTaskInPhase = !m_singleTaskPhaseSupported;
2542 }
2543 }
2544 else if (m_swBrcMode != nullptr)
2545 {
2546 m_lastTaskInPhase = !IsFirstPass();
2547 }
2548
2549 // PAK pass type for each pass: VDEnc+PAK vs. PAK-only
2550 SetPakPassType();
2551
2552 bool pakOnlyMultipassEnable = false;
2553
2554 bool panicEnabled = (m_brcEnabled) && (m_panicEnable) && (IsLastPass()) && !m_pakOnlyPass;
2555
2556 uint32_t rollingILimit = (m_hevcPicParams->bEnableRollingIntraRefresh == ROLLING_I_ROW) ? MOS_ROUNDUP_DIVIDE(m_frameHeight, 32) : (m_hevcPicParams->bEnableRollingIntraRefresh == ROLLING_I_COLUMN) ? MOS_ROUNDUP_DIVIDE(m_frameWidth, 32) : 0;
2557
2558 m_refList[m_currReconstructedPic.FrameIdx]->rollingIntraRefreshedPosition =
2559 CodecHal_Clip3(0, rollingILimit, m_hevcPicParams->IntraInsertionLocation + m_hevcPicParams->IntraInsertionSize);
2560
2561 // For ACQP / BRC, update pic params rolling intra reference location here before cmd buffer is prepared.
2562 PCODEC_PICTURE l0RefFrameList = m_hevcSliceParams->RefPicList[LIST_0];
2563 for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++)
2564 {
2565 CODEC_PICTURE refPic = l0RefFrameList[refIdx];
2566
2567 if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
2568 {
2569 uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
2570 m_hevcPicParams->RollingIntraReferenceLocation[refIdx] = m_refList[refPicIdx]->rollingIntraRefreshedPosition;
2571 }
2572 }
2573
2574 if (IsFirstPass())
2575 {
2576 MOS_COMMAND_BUFFER cmdBuffer;
2577 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2578 MHW_MI_MMIOREGISTERS mmioRegister;
2579 bool validMmio = m_hwInterface->GetMfxInterface()->ConvertToMiRegister(m_vdboxIndex, mmioRegister);
2580 if (validMmio)
2581 {
2582 HalOcaInterface::On1stLevelBBStart(
2583 cmdBuffer,
2584 *m_hwInterface->GetOsInterface()->pOsContext,
2585 m_hwInterface->GetOsInterface()->CurrentGpuContextHandle,
2586 *m_hwInterface->GetMiInterface(),
2587 mmioRegister);
2588 }
2589 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2590 }
2591
2592 if (m_numPipe >= 2)
2593 {
2594 // Send Cmd Buffer Header for VE in last pipe only
2595 MOS_COMMAND_BUFFER cmdBuffer;
2596 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2597 bool requestFrameTracking = m_singleTaskPhaseSupported ? IsFirstPass() : IsLastPass();
2598 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
2599
2600 MHW_MI_FORCE_WAKEUP_PARAMS forceWakeupParams;
2601 MOS_ZeroMemory(&forceWakeupParams, sizeof(MHW_MI_FORCE_WAKEUP_PARAMS));
2602 forceWakeupParams.bMFXPowerWellControl = true;
2603 forceWakeupParams.bMFXPowerWellControlMask = true;
2604 forceWakeupParams.bHEVCPowerWellControl = true;
2605 forceWakeupParams.bHEVCPowerWellControlMask = true;
2606 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiForceWakeupCmd(
2607 &cmdBuffer,
2608 &forceWakeupParams));
2609
2610 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2611 }
2612 else if (IsFirstPass())
2613 {
2614 // Send force wake command for VDBOX
2615 MOS_COMMAND_BUFFER cmdBuffer;
2616 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2617
2618 MHW_MI_FORCE_WAKEUP_PARAMS forceWakeupParams;
2619 MOS_ZeroMemory(&forceWakeupParams, sizeof(MHW_MI_FORCE_WAKEUP_PARAMS));
2620 forceWakeupParams.bMFXPowerWellControl = true;
2621 forceWakeupParams.bMFXPowerWellControlMask = true;
2622 forceWakeupParams.bHEVCPowerWellControl = true;
2623 forceWakeupParams.bHEVCPowerWellControlMask = true;
2624 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiForceWakeupCmd(
2625 &cmdBuffer,
2626 &forceWakeupParams));
2627 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2628 }
2629
2630 if (m_numPipe >= 2 && IsFirstPass())
2631 {
2632 MOS_COMMAND_BUFFER cmdBuffer;
2633 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2634
2635 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStopCmd(&cmdBuffer));
2636
2637 //HW Semaphore cmd to make sure all pipes start encode at the same time
2638 for (uint32_t i = 0; i < m_numPipe; i++)
2639 {
2640 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(
2641 &m_resPipeStartSemaMem[i].sResource,
2642 1,
2643 MHW_MI_ATOMIC_INC,
2644 &cmdBuffer));
2645 }
2646 auto pipeNum = GetCurrentPipe();
2647 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
2648 &m_resPipeStartSemaMem[pipeNum].sResource,
2649 &cmdBuffer,
2650 m_numPipe));
2651
2652 //clean HW semaphore memory
2653 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSemaphoreMem(
2654 &m_resPipeStartSemaMem[pipeNum].sResource,
2655 &cmdBuffer,
2656 0x0));
2657
2658 //Start Watchdog Timer
2659 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStartCmd(&cmdBuffer));
2660
2661 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2662 }
2663
2664 if (m_vdencHucUsed && IsFirstPipe())
2665 {
2666 // STF: HuC+VDEnc+PAK single BB, non-STF: HuC Init/HuC Update/(VDEnc+PAK) in separate BBs
2667 uint16_t callType = CODECHAL_ENCODE_PERFTAG_CALL_BRC_INIT_RESET;
2668 if (m_singleTaskPhaseSupported)
2669 {
2670 perfTag.CallType = IsFirstPass() ? CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE : CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE_SECOND_PASS;
2671 }
2672 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, callType);
2673
2674 m_resVdencBrcUpdateDmemBufferPtr[0] = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakInfo);
2675
2676 // Invoke BRC init/reset FW
2677 if (m_brcInit || m_brcReset)
2678 {
2679 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcInitReset());
2680 }
2681
2682 if (!m_singleTaskPhaseSupported)
2683 {
2684 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE);
2685 }
2686
2687 // Invoke BRC update FW
2688 // When tile replay is enabled, BRC update is also called at tile row level
2689 if (m_enableTileReplay)
2690 {
2691 m_FrameLevelBRCForTileRow = true;
2692 m_TileRowLevelBRC = false;
2693 }
2694 else
2695 {
2696 m_FrameLevelBRCForTileRow = false;
2697 m_TileRowLevelBRC = false;
2698 }
2699 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcUpdate());
2700
2701 m_brcInit = m_brcReset = false;
2702 }
2703
2704 MOS_COMMAND_BUFFER cmdBuffer;
2705 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2706
2707 if ((!m_singleTaskPhaseSupported || (m_firstTaskInPhase && !m_hevcVdencAcqpEnabled)) && (m_numPipe == 1))
2708 {
2709 // Send command buffer header at the beginning (OS dependent)
2710 // frame tracking tag is only added in the last command buffer header
2711 bool requestFrameTracking = m_singleTaskPhaseSupported ?
2712 m_firstTaskInPhase :
2713 ((m_lookaheadPass && (!m_swLaMode || (m_currPass < m_numPasses))) ? false : m_lastTaskInPhase);
2714 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
2715 }
2716
2717 // clean-up per VDBOX semaphore memory
2718 if (currentPipe < 0)
2719 {
2720 eStatus = MOS_STATUS_INVALID_PARAMETER;
2721 return eStatus;
2722 }
2723
2724 // Ensure the previous BRC Update is done, before executing PAK
2725 if (m_vdencHucUsed && (m_numPipe >= 2))
2726 {
2727 int32_t currentPass = GetCurrentPass() + 1;
2728 if (IsFirstPipe())
2729 {
2730 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSemaphoreMem(
2731 &m_resBrcPakSemaphoreMem.sResource,
2732 &cmdBuffer,
2733 currentPass));
2734 }
2735 else
2736 {
2737 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(
2738 &m_resBrcPakSemaphoreMem.sResource,
2739 &cmdBuffer,
2740 currentPass));
2741
2742 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSemaphoreMem(
2743 &m_resBrcPakSemaphoreMem.sResource,
2744 &cmdBuffer,
2745 0x0));
2746 }
2747 }
2748
2749 if ((!IsFirstPass()) && m_vdencHuCConditional2ndPass)
2750 {
2751 MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
2752
2753 // Insert conditional batch buffer end
2754 MOS_ZeroMemory(
2755 &miConditionalBatchBufferEndParams,
2756 sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
2757
2758 // VDENC uses HuC FW generated semaphore for conditional 2nd pass
2759 miConditionalBatchBufferEndParams.presSemaphoreBuffer =
2760 &m_resPakMmioBuffer;
2761
2762 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
2763 &cmdBuffer,
2764 &miConditionalBatchBufferEndParams));
2765
2766 auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
2767 CODECHAL_ENCODE_CHK_NULL_RETURN(mmioRegisters);
2768 uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource
2769
2770 // Write back the HCP image control register for RC6 may clean it out
2771 MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
2772 MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
2773 miLoadRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2774 miLoadRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOffset;
2775 miLoadRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2776 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(&cmdBuffer, &miLoadRegMemParams));
2777
2778 MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
2779 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2780 miStoreRegMemParams.presStoreBuffer = &m_vdencBrcBuffers.resBrcPakStatisticBuffer[m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite];
2781 miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
2782 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2783 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2784
2785 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2786 miStoreRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2787 miStoreRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset;
2788 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2789 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2790 }
2791
2792 if (IsFirstPass() && m_osInterface->bTagResourceSync)
2793 {
2794 // This is a short term WA to solve the sync tag issue: the sync tag write for PAK is inserted at the end of 2nd pass PAK BB
2795 // which may be skipped in multi-pass PAK enabled case. The idea here is to insert the previous frame's tag at the beginning
2796 // of the BB and keep the current frame's tag at the end of the BB. There will be a delay for tag update but it should be fine
2797 // as long as Dec/VP/Enc won't depend on this PAK so soon.
2798
2799 PMOS_RESOURCE globalGpuContextSyncTagBuffer = nullptr;
2800
2801 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource(
2802 m_osInterface,
2803 globalGpuContextSyncTagBuffer));
2804 CODECHAL_ENCODE_CHK_NULL_RETURN(globalGpuContextSyncTagBuffer);
2805
2806 MHW_MI_STORE_DATA_PARAMS params;
2807 params.pOsResource = globalGpuContextSyncTagBuffer;
2808 params.dwResourceOffset = m_osInterface->pfnGetGpuStatusTagOffset(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
2809 uint32_t value = m_osInterface->pfnGetGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
2810 params.dwValue = (value > 0) ? (value - 1) : 0;
2811 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, ¶ms));
2812 }
2813
2814 if (IsFirstPipe())
2815 {
2816 if (IsFirstPass())
2817 {
2818 // Check other dependent VDBOXs if they are ready
2819 // The inter frame sync method was changed, remove this first, to be tuned
2820 // CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitForVDBOX(&cmdBuffer));
2821
2822 // clean-up HW semaphore memory
2823 if (m_currRefSync && !Mos_ResourceIsNull(&m_currRefSync->resSemaphoreMem.sResource))
2824 {
2825 // Ensure this semaphore is not used before. If yes, wait until it is done.
2826 // The inter frame sync method was changed, remove this first, to be tuned
2827 // CODECHAL_ENCODE_CHK_STATUS_RETURN(
2828 // SendHWWaitCommand(&pCurrRefSync->resSemaphoreMem.sResource, &cmdBuffer, 1));
2829
2830 MHW_MI_STORE_DATA_PARAMS storeDataParams;
2831 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
2832 storeDataParams.pOsResource = &m_currRefSync->resSemaphoreMem.sResource;
2833 storeDataParams.dwResourceOffset = 0;
2834 storeDataParams.dwValue = 0;
2835
2836 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
2837 &cmdBuffer,
2838 &storeDataParams));
2839 }
2840 }
2841
2842 if (!m_lookaheadPass || m_swLaMode)
2843 {
2844 CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2845 }
2846 }
2847
2848 PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 pipeModeSelectParams = dynamic_cast<PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12>(m_vdencInterface->CreateMhwVdboxPipeModeSelectParams());
2849 ENCODE_CHK_NULL_RETURN(pipeModeSelectParams);
2850
2851 auto release_func = [&]()
2852 {
2853 m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams);
2854 pipeModeSelectParams = nullptr;
2855 };
2856
2857 SetHcpPipeModeSelectParams(*pipeModeSelectParams);
2858
2859 // HCP_PIPE_SELECT can not be generated by FW in BRC mode for GEN11+
2860 {
2861 MHW_VDBOX_VDENC_CONTROL_STATE_PARAMS vdencControlStateParams;
2862 MHW_MI_VD_CONTROL_STATE_PARAMS vdControlStateParams;
2863
2864 //set up VDENC_CONTROL_STATE command
2865 {
2866 MOS_ZeroMemory(&vdencControlStateParams, sizeof(MHW_VDBOX_VDENC_CONTROL_STATE_PARAMS));
2867 vdencControlStateParams.bVdencInitialization = true;
2868 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(
2869 static_cast<MhwVdboxVdencInterfaceG12X*>(m_vdencInterface)->AddVdencControlStateCmd(&cmdBuffer, &vdencControlStateParams), release_func);
2870 }
2871
2872 //set up VD_CONTROL_STATE command
2873 {
2874 MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
2875 vdControlStateParams.initialization = true;
2876 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(
2877 static_cast<MhwMiInterfaceG12*>(m_miInterface)->AddMiVdControlStateCmd(&cmdBuffer, &vdControlStateParams), release_func);
2878 }
2879
2880 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams), release_func);
2881 }
2882
2883 MHW_VDBOX_SURFACE_PARAMS srcSurfaceParams;
2884 SetHcpSrcSurfaceParams(srcSurfaceParams);
2885 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &srcSurfaceParams), release_func);
2886
2887 MHW_VDBOX_SURFACE_PARAMS reconSurfaceParams{};
2888 SetHcpReconSurfaceParams(reconSurfaceParams);
2889
2890 #ifdef _MMC_SUPPORTED
2891 // Recon P010v MMC state set from RC for compression write
2892 MOS_MEMCOMP_STATE tempMmcState = reconSurfaceParams.mmcState;
2893 if (m_reconSurface.Format == Format_P010 && MmcEnable(tempMmcState))
2894 {
2895 reconSurfaceParams.mmcState = MOS_MEMCOMP_RC;
2896 }
2897 #endif
2898 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &reconSurfaceParams), release_func); //this is for Recon surf cmd set
2899
2900 MHW_VDBOX_SURFACE_PARAMS refSurfaceParams{};
2901 SetHcpRefSurfaceParams(refSurfaceParams); //it set MMC state and MMCFormat
2902
2903 // Add the surface state for reference picture, GEN12 HW change
2904 *m_pipeBufAddrParams = {};
2905 SetHcpPipeBufAddrParams(*m_pipeBufAddrParams);
2906
2907 #ifdef _MMC_SUPPORTED
2908 if (m_enableSCC && m_hevcPicParams->pps_curr_pic_ref_enabled_flag)
2909 {
2910 refSurfaceParams.mmcSkipMask = (1 << m_slotForRecNotFiltered); //add this for ref
2911 }
2912 #endif
2913
2914 if (m_mmcState->IsMmcEnabled())
2915 {
2916
2917 refSurfaceParams.refsMmcEnable = 0;
2918 refSurfaceParams.refsMmcType = 0;
2919 refSurfaceParams.dwCompressionFormat = 0;
2920
2921 //add for B frame support
2922 if (m_pictureCodingType != I_TYPE)
2923 {
2924 for (uint8_t i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
2925 {
2926 if (i < CODEC_MAX_NUM_REF_FRAME_HEVC &&
2927 m_picIdx[i].bValid && m_currUsedRefPic[i])
2928 {
2929 uint8_t idx = m_picIdx[i].ucPicIdx;
2930 uint8_t frameStoreId = m_refIdxMapping[i];
2931
2932 MOS_MEMCOMP_STATE mmcState = MOS_MEMCOMP_DISABLED;
2933 ENCODE_CHK_STATUS_RETURN(m_mmcState->GetSurfaceMmcState(const_cast<PMOS_SURFACE>(&m_refList[idx]->sRefReconBuffer), &mmcState));
2934 refSurfaceParams.refsMmcEnable |= (mmcState == MOS_MEMCOMP_RC || mmcState == MOS_MEMCOMP_MC) ? (1 << frameStoreId) : 0;
2935 refSurfaceParams.refsMmcType |= (mmcState == MOS_MEMCOMP_RC) ? (1 << frameStoreId) : 0;
2936 if (mmcState == MOS_MEMCOMP_RC || mmcState == MOS_MEMCOMP_MC)
2937 {
2938 ENCODE_CHK_STATUS_RETURN(m_mmcState->GetSurfaceMmcFormat(const_cast<PMOS_SURFACE>(&m_refList[idx]->sRefReconBuffer), &refSurfaceParams.dwCompressionFormat));
2939 }
2940 }
2941 }
2942 }
2943 }
2944
2945 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &refSurfaceParams), release_func);
2946
2947 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(AddHcpPipeBufAddrCmd(&cmdBuffer), release_func);
2948
2949 MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
2950 SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
2951 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams), release_func);
2952
2953 MHW_VDBOX_QM_PARAMS fqmParams, qmParams;
2954 SetHcpQmStateParams(fqmParams, qmParams);
2955 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpFqmStateCmd(&cmdBuffer, &fqmParams), release_func);
2956 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpQmStateCmd(&cmdBuffer, &qmParams), release_func);
2957
2958 SetVdencPipeModeSelectParams(*pipeModeSelectParams);
2959 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams), release_func);
2960
2961 MHW_VDBOX_SURFACE_PARAMS dsSurfaceParams[2] = {};
2962 SetVdencSurfaceStateParams(srcSurfaceParams, refSurfaceParams, dsSurfaceParams[0], dsSurfaceParams[1]);
2963 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencSrcSurfaceStateCmd(&cmdBuffer, &srcSurfaceParams), release_func);
2964 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &refSurfaceParams), release_func); // this is for Ref, no mmc related setting
2965 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencDsRefSurfaceStateCmd(&cmdBuffer, &dsSurfaceParams[0], 2), release_func);
2966
2967 SetVdencPipeBufAddrParams(*m_pipeBufAddrParams);
2968 m_pipeBufAddrParams->pRawSurfParam = &srcSurfaceParams;
2969 m_pipeBufAddrParams->pDecodedReconParam = &reconSurfaceParams;
2970 #ifdef _MMC_SUPPORTED
2971 m_mmcState->SetPipeBufAddr(m_pipeBufAddrParams);
2972 #endif
2973 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencPipeBufAddrCmd(&cmdBuffer, m_pipeBufAddrParams), release_func);
2974
2975 MHW_VDBOX_HEVC_PIC_STATE_G12 picStateParams;
2976 SetHcpPicStateParams(picStateParams);
2977
2978 if (m_vdencHucUsed && (!m_hevcPicParams->tiles_enabled_flag))
2979 {
2980 // 2nd level batch buffer
2981 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
2982 HalOcaInterface::OnSubLevelBBStart(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource, 0, true, 0);
2983 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]), release_func);
2984
2985 // save offset for next 2nd level batch buffer usage
2986 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset += m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
2987 }
2988 // When tile is enabled, below commands are needed for each tile instead of each picture
2989 else if (!m_hevcPicParams->tiles_enabled_flag)
2990 {
2991 SetAddCommands(CODECHAL_CMD1, &cmdBuffer, true, m_roundInterValue, m_roundIntraValue, m_lowDelay);
2992
2993 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpPicStateCmd(&cmdBuffer, &picStateParams), release_func);
2994
2995 SetAddCommands(CODECHAL_CMD2, &cmdBuffer, true, m_roundInterValue, m_roundIntraValue, m_lowDelay, m_refIdxMapping, m_slotForRecNotFiltered);
2996 }
2997
2998 // Send HEVC_VP9_RDOQ_STATE command
2999 if (m_hevcRdoqEnabled && !m_hevcPicParams->tiles_enabled_flag)
3000 {
3001 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&cmdBuffer, &picStateParams), release_func);
3002 }
3003
3004 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(ReturnCommandBuffer(&cmdBuffer), release_func);
3005
3006 m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams);
3007
3008 return eStatus;
3009 }
3010
ExecuteSliceLevel()3011 MOS_STATUS CodechalVdencHevcStateG12::ExecuteSliceLevel()
3012 {
3013 CODECHAL_ENCODE_FUNCTION_ENTER;
3014
3015 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3016
3017 if (!m_hevcPicParams->tiles_enabled_flag)
3018 {
3019 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::ExecuteSliceLevel());
3020
3021 if (m_lookaheadPass)
3022 {
3023 CODECHAL_ENCODE_CHK_STATUS_RETURN(AnalyzeLookaheadStats());
3024
3025 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
3026 &m_vdencLaStatsBuffer,
3027 CodechalDbgAttr::attrVdencOutput,
3028 "_LookaheadStats",
3029 m_brcLooaheadStatsBufferSize,
3030 0,
3031 CODECHAL_NUM_MEDIA_STATES)));
3032 }
3033 }
3034 else
3035 {
3036 if (m_vdencHucUsed && m_enableTileReplay)
3037 {
3038 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncWithTileRowLevelBRC());
3039 }
3040 else
3041 {
3042 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncTileLevel());
3043 }
3044 }
3045
3046 return eStatus;
3047 }
3048
EncTileLevel()3049 MOS_STATUS CodechalVdencHevcStateG12::EncTileLevel()
3050 {
3051 CODECHAL_ENCODE_FUNCTION_ENTER;
3052
3053 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3054
3055 int32_t currentPipe = GetCurrentPipe();
3056 int32_t currentPass = GetCurrentPass();
3057
3058 if (currentPipe < 0 || currentPass < 0)
3059 {
3060 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid pipe number or pass number");
3061 return MOS_STATUS_INVALID_PARAMETER;
3062 }
3063
3064 // Currently this implementation is only for CQP, single pass
3065 // Allocate more tile batch when try multiple passes
3066 if (IsFirstPass() && IsFirstPipe() && (!m_osInterface->bUsesPatchList))
3067 {
3068 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileLevelBatch());
3069 }
3070
3071 PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 pipeModeSelectParams = dynamic_cast<PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12>(m_vdencInterface->CreateMhwVdboxPipeModeSelectParams());
3072 ENCODE_CHK_NULL_RETURN(pipeModeSelectParams);
3073
3074 auto release_func = [&]()
3075 {
3076 m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams);
3077 pipeModeSelectParams = nullptr;
3078 };
3079
3080 SetHcpPipeModeSelectParams(*pipeModeSelectParams);
3081 SetVdencPipeModeSelectParams(*pipeModeSelectParams);
3082
3083 MHW_VDBOX_HEVC_SLICE_STATE_G12 sliceState;
3084 SetHcpSliceStateCommonParams(sliceState);
3085
3086 MOS_COMMAND_BUFFER cmdBuffer;
3087 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(GetCommandBuffer(&cmdBuffer), release_func);
3088
3089 MHW_MI_VD_CONTROL_STATE_PARAMS vdControlStateParams;
3090 uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
3091 uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
3092 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
3093
3094 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
3095
3096 // Construct The third level batch buffer
3097 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(ConstructTLB(&m_thirdLevelBatchBuffer), release_func);
3098
3099 for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
3100 {
3101 for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
3102 {
3103 PCODEC_ENCODER_SLCDATA slcData = m_slcData;
3104 uint32_t slcCount, idx, sliceNumInTile = 0;
3105
3106 idx = tileRow * numTileColumns + tileCol;
3107
3108 if ((m_numPipe > 1) && (tileCol != currentPipe))
3109 {
3110 continue;
3111 }
3112
3113 MOS_COMMAND_BUFFER tileBatchBuf;
3114 PMOS_COMMAND_BUFFER tempCmdBuf = &cmdBuffer;
3115 uint8_t *data = nullptr;
3116
3117 // Move tile level commands to first level command buffer when use patch list.
3118 if (!m_osInterface->bUsesPatchList)
3119 {
3120 MOS_LOCK_PARAMS lockFlags;
3121 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3122 lockFlags.WriteOnly = true;
3123
3124 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(m_tileLevelBatchBuffer[currentPass][idx].OsResource), &lockFlags);
3125 CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(data, release_func);
3126
3127 MOS_ZeroMemory(&tileBatchBuf, sizeof(tileBatchBuf));
3128 tileBatchBuf.pCmdBase = tileBatchBuf.pCmdPtr = (uint32_t *)data;
3129 tileBatchBuf.iRemaining = m_tileLevelBatchSize;
3130
3131 HalOcaInterface::OnSubLevelBBStart(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, &m_tileLevelBatchBuffer[m_tileRowPass][idx].OsResource, 0, true, 0);
3132 // Add batch buffer start for tile
3133 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_tileLevelBatchBuffer[currentPass][idx]), release_func);
3134
3135 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, &cmdBuffer), release_func);
3136
3137 if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHwCounterIncrement(m_osInterface) && m_enableTileReplay)
3138 {
3139 CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(m_hwInterface->GetCpInterface(), release_func);
3140
3141 // Lazy allocation
3142 if (Mos_ResourceIsNull(&m_resHwCountTileReplay))
3143 {
3144 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3145 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3146 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3147 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3148 allocParamsForBufferLinear.Format = Format_Buffer;
3149
3150 uint32_t maxTileRow = MOS_ROUNDUP_DIVIDE(m_frameHeight, CODECHAL_HEVC_MIN_TILE_SIZE);
3151 uint32_t maxTileColumn = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE);
3152
3153 allocParamsForBufferLinear.dwBytes = maxTileRow*maxTileColumn*(sizeof(HwCounter));
3154 allocParamsForBufferLinear.pBufName = "HWCounter";
3155 allocParamsForBufferLinear.bIsPersistent = true;
3156 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_osInterface->pfnAllocateResource(
3157 m_osInterface,
3158 &allocParamsForBufferLinear,
3159 &m_resHwCountTileReplay),
3160 release_func);
3161 allocParamsForBufferLinear.bIsPersistent = false;
3162 }
3163
3164 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hwInterface->GetCpInterface()->ReadEncodeCounterFromHW(
3165 m_osInterface,
3166 &tileBatchBuf,
3167 &m_resHwCountTileReplay,
3168 (uint16_t)idx),
3169 release_func);
3170 }
3171
3172 tempCmdBuf = &tileBatchBuf;
3173 }
3174
3175 // Construct the tile batch
3176 // To be moved to one sub function later
3177 // HCP Lock for multiple pipe mode
3178 if (m_numPipe > 1)
3179 {
3180 MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
3181 vdControlStateParams.scalableModePipeLock = true;
3182 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(static_cast<MhwMiInterfaceG12 *>(m_miInterface)->AddMiVdControlStateCmd(tempCmdBuf, &vdControlStateParams), release_func);
3183 }
3184 // VDENC_PIPE_MODE_SELECT
3185 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(tempCmdBuf, pipeModeSelectParams), release_func);
3186 // HCP_PIPE_MODE_SELECT
3187 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(tempCmdBuf, pipeModeSelectParams), release_func);
3188
3189 // 3rd level batch buffer
3190 if (m_hevcVdencAcqpEnabled || m_brcEnabled)
3191 {
3192 HalOcaInterface::OnSubLevelBBStart(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource, 0, true, 0);
3193 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
3194 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiBatchBufferStartCmd(tempCmdBuf, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]), release_func);
3195 if (m_hevcRdoqEnabled)
3196 {
3197 MHW_VDBOX_HEVC_PIC_STATE_G12 picStateParams;
3198 SetHcpPicStateParams(picStateParams);
3199 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(tempCmdBuf, &picStateParams), release_func);
3200 }
3201 }
3202 else
3203 {
3204 HalOcaInterface::OnSubLevelBBStart(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, &m_thirdLevelBatchBuffer.OsResource, 0, true, 0);
3205 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiBatchBufferStartCmd(tempCmdBuf, &m_thirdLevelBatchBuffer), release_func);
3206 }
3207
3208 // HCP_TILE_CODING commmand
3209 // Set Tile replay related parameters
3210 tileParams[idx].IsFirstPass = IsFirstPass();
3211 tileParams[idx].IsLastPass = IsLastPass();
3212 tileParams[idx].bTileReplayEnable = m_enableTileReplay;
3213 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(static_cast<MhwVdboxHcpInterfaceG12 *>(m_hcpInterface)->AddHcpTileCodingCmd(tempCmdBuf, &tileParams[idx]), release_func);
3214
3215 for (slcCount = 0; slcCount < m_numSlices; slcCount++)
3216 {
3217 bool lastSliceInTile = false, sliceInTile = false;
3218
3219 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(IsSliceInTile(slcCount,
3220 &tileParams[idx],
3221 &sliceInTile,
3222 &lastSliceInTile),
3223 release_func);
3224
3225 if (!sliceInTile)
3226 {
3227 continue;
3228 }
3229
3230 if (m_hevcVdencAcqpEnabled || m_brcEnabled)
3231 {
3232 // save offset for next 2nd level batch buffer usage
3233 // This is because we don't know how many times HCP_WEIGHTOFFSET_STATE & HCP_PAK_INSERT_OBJECT will be inserted for each slice
3234 // dwVdencBatchBufferPerSliceConstSize: constant size for each slice
3235 // m_vdencBatchBufferPerSliceVarSize: variable size for each slice
3236
3237 // starting location for executing slice level cmds
3238 // To do: Improvize to only add current slice wSlcCount
3239 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
3240
3241 for (uint32_t j = 0; j < slcCount; j++)
3242 {
3243 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset
3244 += (m_hwInterface->m_vdencBatchBufferPerSliceConstSize + m_vdencBatchBufferPerSliceVarSize[j]);
3245 }
3246
3247 }
3248
3249 SetHcpSliceStateParams(sliceState, slcData, (uint16_t)slcCount, tileParams, lastSliceInTile, idx);
3250
3251 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(SendHwSliceEncodeCommand(tempCmdBuf, &sliceState), release_func);
3252
3253 // Send VD_PIPELINE_FLUSH command for each slice
3254 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
3255 vdPipelineFlushParams.Flags.bWaitDoneMFX = 1;
3256 vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1;
3257 vdPipelineFlushParams.Flags.bFlushVDENC = 1;
3258 vdPipelineFlushParams.Flags.bFlushHEVC = 1;
3259 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
3260 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(tempCmdBuf, &vdPipelineFlushParams), release_func);
3261
3262 sliceNumInTile++;
3263 } // end of slice
3264
3265 if (0 == sliceNumInTile)
3266 {
3267 // One tile must have at least one slice
3268 CODECHAL_ENCODE_ASSERT(false);
3269 eStatus = MOS_STATUS_INVALID_PARAMETER;
3270 break;
3271 }
3272
3273 if (sliceNumInTile > 1 && (numTileColumns > 1 || numTileRows > 1))
3274 {
3275 CODECHAL_ENCODE_ASSERTMESSAGE("Multi-slices in a tile is not supported!");
3276 release_func();
3277 return MOS_STATUS_INVALID_PARAMETER;
3278 }
3279
3280 //HCP unLock for multiple pipe mode
3281 if (m_numPipe > 1)
3282 {
3283 MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
3284 vdControlStateParams.scalableModePipeUnlock = true;
3285 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(static_cast<MhwMiInterfaceG12 *>(m_miInterface)->AddMiVdControlStateCmd(tempCmdBuf, &vdControlStateParams), release_func);
3286 }
3287
3288 // Send VD_PIPELINE_FLUSH command
3289 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
3290 vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
3291 vdPipelineFlushParams.Flags.bFlushHEVC = 1;
3292 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
3293 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(tempCmdBuf, &vdPipelineFlushParams), release_func);
3294
3295 // Send MI_FLUSH command
3296 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3297 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3298 flushDwParams.bVideoPipelineCacheInvalidate = true;
3299 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiFlushDwCmd(tempCmdBuf, &flushDwParams), release_func);
3300
3301 // Update head pointer for capture mode
3302 if (m_CaptureModeEnable && IsLastPipe())
3303 {
3304 MHW_MI_LOAD_REGISTER_IMM_PARAMS registerImmParams;
3305 MOS_ZeroMemory(®isterImmParams, sizeof(registerImmParams));
3306 registerImmParams.dwData = 1;
3307 registerImmParams.dwRegister = m_VdboxVDENCRegBase[currentPipe] + 0x90;
3308 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(tempCmdBuf, ®isterImmParams), release_func);
3309 }
3310
3311 if (!m_osInterface->bUsesPatchList)
3312 {
3313 // Add batch buffer end at the end of each tile batch, 2nd level batch buffer
3314 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiBatchBufferEnd(tempCmdBuf, nullptr), release_func);
3315
3316 std::string pakPassName = "PAK_PASS[" + std::to_string(GetCurrentPass()) + "]_PIPE[" + std::to_string(GetCurrentPipe()) + "]_TILELEVEL";
3317 CODECHAL_DEBUG_TOOL(
3318 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_debugInterface->DumpCmdBuffer(
3319 tempCmdBuf,
3320 CODECHAL_NUM_MEDIA_STATES,
3321 pakPassName.data()),
3322 release_func);)
3323
3324 if (data)
3325 {
3326 m_osInterface->pfnUnlockResource(m_osInterface, &(m_tileLevelBatchBuffer[currentPass][idx].OsResource));
3327 }
3328 }
3329
3330 } // end of row tile
3331 } // end of column tile
3332
3333 m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams);
3334
3335 // Insert end of sequence/stream if set
3336 // To be moved to slice level?
3337 if ((m_lastPicInStream || m_lastPicInSeq) && IsLastPipe())
3338 {
3339 MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
3340 MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
3341 pakInsertObjectParams.bLastPicInSeq = m_lastPicInSeq;
3342 pakInsertObjectParams.bLastPicInStream = m_lastPicInStream;
3343 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&cmdBuffer, &pakInsertObjectParams));
3344 }
3345
3346 // Send VD_CONTROL_STATE (Memory Implict Flush)
3347 MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
3348 vdControlStateParams.memoryImplicitFlush = true;
3349 CODECHAL_ENCODE_CHK_STATUS_RETURN(
3350 static_cast<MhwMiInterfaceG12*>(m_miInterface)->AddMiVdControlStateCmd(&cmdBuffer, &vdControlStateParams));
3351
3352
3353 // Send VD_PIPELINE_FLUSH command
3354 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
3355 vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
3356 vdPipelineFlushParams.Flags.bFlushHEVC = 1;
3357 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
3358 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
3359
3360 // Send MI_FLUSH command
3361 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3362 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3363 flushDwParams.bVideoPipelineCacheInvalidate = true;
3364 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
3365
3366 // Set the HW semaphore to indicate current pipe done
3367 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3368 flushDwParams.bVideoPipelineCacheInvalidate = true;
3369 if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[currentPipe].sResource))
3370 {
3371 flushDwParams.pOsResource = &m_resVdBoxSemaphoreMem[currentPipe].sResource;
3372 flushDwParams.dwDataDW1 = currentPass + 1;
3373 }
3374 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
3375
3376 if (IsFirstPipe())
3377 {
3378 // first pipe needs to ensure all other pipes are ready
3379 for (uint32_t i = 0; i < m_numPipe; i++)
3380 {
3381 if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[i].sResource))
3382 {
3383 CODECHAL_ENCODE_CHK_STATUS_RETURN(
3384 SendHWWaitCommand(&m_resVdBoxSemaphoreMem[i].sResource,
3385 &cmdBuffer,
3386 currentPass + 1));
3387 CODECHAL_ENCODE_CHK_STATUS_RETURN(
3388 SetSemaphoreMem(&m_resVdBoxSemaphoreMem[i].sResource,
3389 &cmdBuffer,
3390 0x0));
3391 }
3392 }
3393
3394 // Whenever ACQP/ BRC is enabled with tiling, PAK Integration kernel is needed.
3395 // ACQP/ BRC need PAK integration kernel to aggregate statistics
3396 if (m_vdencHucUsed)
3397 {
3398 CODECHAL_ENCODE_CHK_STATUS_RETURN(HucPakIntegrate(&cmdBuffer));
3399 }
3400
3401 // Use HW stitch commands only in the scalable mode
3402 // For single pipe with tile replay, stitch also needed
3403 if (m_enableTileStitchByHW)
3404 {
3405 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP && !m_hevcVdencAcqpEnabled)
3406 {
3407 CODECHAL_ENCODE_CHK_STATUS_RETURN(HucPakIntegrateStitch(&cmdBuffer));
3408 }
3409 // 2nd level BB buffer for stitching cmd
3410 // current location to add cmds in 2nd level batch buffer
3411 m_HucStitchCmdBatchBuffer.iCurrent = 0;
3412 // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
3413 m_HucStitchCmdBatchBuffer.dwOffset = 0;
3414 HalOcaInterface::OnSubLevelBBStart(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, &m_HucStitchCmdBatchBuffer.OsResource, 0, true, 0);
3415 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_HucStitchCmdBatchBuffer));
3416 // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases
3417 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, m_osInterface->osCpInterface->IsCpEnabled() ? true : false));
3418 }
3419
3420 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(&cmdBuffer));
3421
3422 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSliceSize(&cmdBuffer));
3423
3424 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
3425
3426 if (m_numPipe <= 1) // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer
3427 {
3428 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
3429
3430 // BRC PAK statistics different for each pass
3431 if (m_brcEnabled)
3432 {
3433 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStats(&cmdBuffer));
3434 }
3435 }
3436
3437 MHW_MI_STORE_DATA_PARAMS storeDataParams;
3438 // Signal HW semaphore for the reference frame dependency (i.e., current coding frame waits for the reference frame being ready)
3439 if (m_currRefSync && !Mos_ResourceIsNull(&m_currRefSync->resSemaphoreMem.sResource))
3440 {
3441 // the reference frame semaphore must be set in each pass because of the conditional BRC batch buffer. Some BRC passes could be skipped.
3442 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3443 storeDataParams.pOsResource = &m_currRefSync->resSemaphoreMem.sResource;
3444 storeDataParams.dwResourceOffset = 0;
3445 storeDataParams.dwValue = 1;
3446
3447 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiStoreDataImmCmd(
3448 &cmdBuffer,
3449 &storeDataParams));
3450 }
3451 }
3452
3453 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
3454 {
3455 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
3456 }
3457
3458 std::string pakPassName = "PAK_PASS[" + std::to_string(GetCurrentPass()) + "]_PIPE[" + std::to_string(GetCurrentPipe()) + "]";
3459 CODECHAL_DEBUG_TOOL(
3460 CODECHAL_ENCODE_CHK_STATUS_RETURN( m_debugInterface->DumpCmdBuffer(
3461 &cmdBuffer,
3462 CODECHAL_NUM_MEDIA_STATES,
3463 pakPassName.data()));)
3464
3465 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
3466
3467 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
3468 {
3469 bool nullRendering = m_videoContextUsesNullHw;
3470
3471 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, nullRendering));
3472
3473 CODECHAL_DEBUG_TOOL(
3474 CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpHucDebugOutputBuffers());
3475 if (m_mmcState)
3476 {
3477 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
3478 }
3479 )
3480
3481 if (IsFirstPipe() &&
3482 IsLastPass() &&
3483 m_signalEnc &&
3484 m_currRefSync &&
3485 !Mos_ResourceIsNull(&m_resSyncObjectVideoContextInUse))
3486 {
3487 // signal semaphore
3488 MOS_SYNC_PARAMS syncParams;
3489 syncParams = g_cInitSyncParams;
3490 syncParams.GpuContext = m_videoContext;
3491 syncParams.presSyncResource = &m_currRefSync->resSyncObject;
3492
3493 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
3494 m_currRefSync->uiSemaphoreObjCount++;
3495 m_currRefSync->bInUsed = true;
3496 }
3497 }
3498
3499 // Reset parameters for next PAK execution
3500 if (IsLastPipe() &&
3501 IsLastPass())
3502 {
3503 if (!m_singleTaskPhaseSupported)
3504 {
3505 m_osInterface->pfnResetPerfBufferID(m_osInterface);
3506 }
3507
3508 m_currPakSliceIdx = (m_currPakSliceIdx + 1) % CODECHAL_HEVC_NUM_PAK_SLICE_BATCH_BUFFERS;
3509
3510 m_newPpsHeader = 0;
3511 m_newSeqHeader = 0;
3512 m_frameNum++;
3513 }
3514
3515 return eStatus;
3516 }
3517
EncWithTileRowLevelBRC()3518 MOS_STATUS CodechalVdencHevcStateG12::EncWithTileRowLevelBRC()
3519 {
3520 CODECHAL_ENCODE_FUNCTION_ENTER;
3521
3522 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3523
3524 int32_t currentPipe = GetCurrentPipe();
3525 int32_t currentPass = GetCurrentPass();
3526
3527 if (currentPipe < 0 || currentPass < 0)
3528 {
3529 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid pipe number or pass number");
3530 return MOS_STATUS_INVALID_PARAMETER;
3531 }
3532
3533 // Revisit the buffer reuse for multiple frames later
3534 if (IsFirstPass() && IsFirstPipe())
3535 {
3536 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileLevelBatch());
3537 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileRowLevelBRCBatch());
3538 }
3539
3540 PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 pipeModeSelectParams = dynamic_cast<PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12>(m_vdencInterface->CreateMhwVdboxPipeModeSelectParams());
3541 ENCODE_CHK_NULL_RETURN(pipeModeSelectParams);
3542
3543 auto release_func = [&]()
3544 {
3545 m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams);
3546 pipeModeSelectParams = nullptr;
3547 };
3548
3549 SetHcpPipeModeSelectParams(*pipeModeSelectParams);
3550 SetVdencPipeModeSelectParams(*pipeModeSelectParams);
3551
3552 MHW_VDBOX_HEVC_SLICE_STATE_G12 sliceState;
3553 SetHcpSliceStateCommonParams(sliceState);
3554
3555 MOS_COMMAND_BUFFER cmdBuffer;
3556 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(GetCommandBuffer(&cmdBuffer), release_func);
3557
3558 MHW_MI_VD_CONTROL_STATE_PARAMS vdControlStateParams;
3559 uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
3560 uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
3561
3562 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
3563 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
3564
3565 m_FrameLevelBRCForTileRow = false;
3566 m_TileRowLevelBRC = true;
3567
3568 for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
3569 {
3570 for (m_tileRowPass = 0; m_tileRowPass < m_NumPassesForTileReplay; m_tileRowPass++)
3571 {
3572 for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
3573 {
3574 PCODEC_ENCODER_SLCDATA slcData = m_slcData;
3575 uint32_t slcCount, idx, sliceNumInTile = 0;
3576
3577 idx = tileRow * numTileColumns + tileCol;
3578
3579 if ((m_numPipe > 1) && (tileCol != currentPipe))
3580 {
3581 continue;
3582 }
3583
3584 MOS_LOCK_PARAMS lockFlags;
3585 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3586 lockFlags.WriteOnly = true;
3587
3588 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(m_tileLevelBatchBuffer[m_tileRowPass][idx].OsResource), &lockFlags);
3589 CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(data, release_func);
3590
3591 MOS_COMMAND_BUFFER tileBatchBuf;
3592 MOS_ZeroMemory(&tileBatchBuf, sizeof(tileBatchBuf));
3593 tileBatchBuf.pCmdBase = tileBatchBuf.pCmdPtr = (uint32_t *)data;
3594 tileBatchBuf.iRemaining = m_tileLevelBatchSize;
3595
3596 // Add batch buffer start for tile
3597 HalOcaInterface::OnSubLevelBBStart(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, &m_tileLevelBatchBuffer[m_tileRowPass][idx].OsResource, 0, true, 0);
3598 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_tileLevelBatchBuffer[m_tileRowPass][idx]), release_func);
3599
3600 if (m_numPipe > 1)
3601 {
3602 //wait for last tile row BRC update completion
3603 if ((!IsFirstPipe()) && (!IsFirstPassForTileReplay()))
3604 {
3605 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(SendHWWaitCommand(&m_resTileRowBRCsyncSemaphore, &tileBatchBuf, 0xFF), release_func);
3606 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(SetSemaphoreMem(&m_resTileRowBRCsyncSemaphore, &tileBatchBuf, 0x0), release_func);
3607 }
3608 }
3609
3610 // Add conditional batch buffer end before tile row level second pass
3611 // To unify the single pipe and multiple pipe cases, add this for each tile
3612
3613 // To add the sync logic here to make sure the previous tile row BRC update is done
3614
3615 if (!IsFirstPassForTileReplay())
3616 {
3617 MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS miEnhancedConditionalBatchBufferEndParams;
3618
3619 MOS_ZeroMemory(
3620 &miEnhancedConditionalBatchBufferEndParams,
3621 sizeof(MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
3622
3623 // VDENC uses HuC FW generated semaphore for conditional 2nd pass
3624 miEnhancedConditionalBatchBufferEndParams.presSemaphoreBuffer =
3625 &m_resPakMmioBuffer;
3626
3627 miEnhancedConditionalBatchBufferEndParams.dwParamsType = MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS::ENHANCED_PARAMS;
3628 miEnhancedConditionalBatchBufferEndParams.enableEndCurrentBatchBuffLevel = true;
3629
3630 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
3631 &tileBatchBuf,
3632 (PMHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS)(&miEnhancedConditionalBatchBufferEndParams)),
3633 release_func);
3634 }
3635
3636 // counter should be read after conditional batch buffer
3637 // in case second pass is not executed then counter should not be read
3638 if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHwCounterIncrement(m_osInterface) && m_enableTileReplay)
3639 {
3640 CODECHAL_ENCODE_CHK_NULL_WITH_DESTROY_RETURN(m_hwInterface->GetCpInterface(), release_func);
3641
3642 // Lazy allocation
3643 if (Mos_ResourceIsNull(&m_resHwCountTileReplay))
3644 {
3645 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3646 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3647 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3648 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3649 allocParamsForBufferLinear.Format = Format_Buffer;
3650
3651 uint32_t maxTileRow = MOS_ROUNDUP_DIVIDE(m_frameHeight, CODECHAL_HEVC_MIN_TILE_SIZE);
3652 uint32_t maxTileColumn = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE);
3653
3654 allocParamsForBufferLinear.dwBytes = maxTileRow*maxTileColumn*(sizeof(HwCounter));
3655 allocParamsForBufferLinear.pBufName = "HWCounter";
3656 allocParamsForBufferLinear.bIsPersistent = true;
3657 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_osInterface->pfnAllocateResource(
3658 m_osInterface,
3659 &allocParamsForBufferLinear,
3660 &m_resHwCountTileReplay),
3661 release_func);
3662 allocParamsForBufferLinear.bIsPersistent = false;
3663 }
3664
3665 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hwInterface->GetCpInterface()->ReadEncodeCounterFromHW(
3666 m_osInterface,
3667 &tileBatchBuf,
3668 &m_resHwCountTileReplay,
3669 (uint16_t)idx),
3670 release_func);
3671 }
3672
3673 // Construct the tile batch
3674 // To be moved to one sub function later
3675 // HCP Lock for multiple pipe mode
3676 if (m_numPipe > 1)
3677 {
3678 MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
3679 vdControlStateParams.scalableModePipeLock = true;
3680 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(static_cast<MhwMiInterfaceG12 *>(m_miInterface)->AddMiVdControlStateCmd(&tileBatchBuf, &vdControlStateParams), release_func);
3681 }
3682
3683 // VDENC_PIPE_MODE_SELECT
3684 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(&tileBatchBuf, pipeModeSelectParams), release_func);
3685 // HCP_PIPE_MODE_SELECT
3686 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&tileBatchBuf, pipeModeSelectParams), release_func);
3687
3688 // 3nd level batch buffer
3689 if (m_hevcVdencAcqpEnabled || m_brcEnabled)
3690 {
3691 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
3692 HalOcaInterface::OnSubLevelBBStart(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource, 0, true, 0);
3693 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&tileBatchBuf, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]), release_func);
3694
3695 if (m_hevcRdoqEnabled)
3696 {
3697 MHW_VDBOX_HEVC_PIC_STATE_G12 picStateParams;
3698 SetHcpPicStateParams(picStateParams);
3699 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&tileBatchBuf, &picStateParams), release_func);
3700 }
3701 }
3702
3703 // HCP_TILE_CODING commmand
3704 // Set Tile replay related parameters
3705 tileParams[idx].IsFirstPass = IsFirstPassForTileReplay();
3706 tileParams[idx].IsLastPass = IsLastPassForTileReplay();
3707 tileParams[idx].bTileReplayEnable = m_enableTileReplay;
3708 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(static_cast<MhwVdboxHcpInterfaceG12 *>(m_hcpInterface)->AddHcpTileCodingCmd(&tileBatchBuf, &tileParams[idx]), release_func);
3709
3710 for (slcCount = 0; slcCount < m_numSlices; slcCount++)
3711 {
3712 bool lastSliceInTile = false, sliceInTile = false;
3713
3714 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(IsSliceInTile(slcCount,
3715 &tileParams[idx],
3716 &sliceInTile,
3717 &lastSliceInTile),
3718 release_func);
3719
3720 if (!sliceInTile)
3721 {
3722 continue;
3723 }
3724
3725 if (m_hevcVdencAcqpEnabled || m_brcEnabled)
3726 {
3727 // save offset for next 2nd level batch buffer usage
3728 // This is because we don't know how many times HCP_WEIGHTOFFSET_STATE & HCP_PAK_INSERT_OBJECT will be inserted for each slice
3729 // dwVdencBatchBufferPerSliceConstSize: constant size for each slice
3730 // m_vdencBatchBufferPerSliceVarSize: variable size for each slice
3731
3732 // starting location for executing slice level cmds
3733 // To do: Improvize to only add current slice wSlcCount
3734 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
3735
3736 for (uint32_t j = 0; j < slcCount; j++)
3737 {
3738 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset
3739 += (m_hwInterface->m_vdencBatchBufferPerSliceConstSize + m_vdencBatchBufferPerSliceVarSize[j]);
3740 }
3741 }
3742
3743 SetHcpSliceStateParams(sliceState, slcData, (uint16_t)slcCount, tileParams, lastSliceInTile, idx);
3744
3745 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(SendHwSliceEncodeCommand(&tileBatchBuf, &sliceState), release_func);
3746
3747 // Send VD_PIPELINE_FLUSH command for each slice
3748 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
3749 vdPipelineFlushParams.Flags.bWaitDoneMFX = 1;
3750 vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1;
3751 vdPipelineFlushParams.Flags.bFlushVDENC = 1;
3752 vdPipelineFlushParams.Flags.bFlushHEVC = 1;
3753 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
3754 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&tileBatchBuf, &vdPipelineFlushParams), release_func);
3755
3756 sliceNumInTile++;
3757 } // end of slice
3758
3759 if (0 == sliceNumInTile)
3760 {
3761 // One tile must have at least one slice
3762 CODECHAL_ENCODE_ASSERT(false);
3763 eStatus = MOS_STATUS_INVALID_PARAMETER;
3764 break;
3765 }
3766
3767 if (sliceNumInTile > 1 && (numTileColumns > 1 || numTileRows > 1))
3768 {
3769 CODECHAL_ENCODE_ASSERTMESSAGE("Multi-slices in a tile is not supported!");
3770 release_func();
3771 return MOS_STATUS_INVALID_PARAMETER;
3772 }
3773
3774 //HCP unLock for multiple pipe mode
3775 if (m_numPipe > 1)
3776 {
3777 MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
3778 vdControlStateParams.scalableModePipeUnlock = true;
3779 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(static_cast<MhwMiInterfaceG12 *>(m_miInterface)->AddMiVdControlStateCmd(&tileBatchBuf, &vdControlStateParams), release_func);
3780 }
3781
3782 // Send VD_PIPELINE_FLUSH command
3783 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
3784 vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
3785 vdPipelineFlushParams.Flags.bFlushHEVC = 1;
3786 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
3787 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&tileBatchBuf, &vdPipelineFlushParams), release_func);
3788
3789 // Send MI_FLUSH command
3790 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3791 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3792 flushDwParams.bVideoPipelineCacheInvalidate = true;
3793 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiFlushDwCmd(&tileBatchBuf, &flushDwParams), release_func);
3794
3795 // Add batch buffer end at the end of each tile batch, 2nd level batch buffer
3796 (&m_tileLevelBatchBuffer[m_tileRowPass][idx])->iCurrent = tileBatchBuf.iOffset;
3797 (&m_tileLevelBatchBuffer[m_tileRowPass][idx])->iRemaining = tileBatchBuf.iRemaining;
3798 (&m_tileLevelBatchBuffer[m_tileRowPass][idx])->pData = data;
3799 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &m_tileLevelBatchBuffer[m_tileRowPass][idx]), release_func);
3800
3801 if (data)
3802 {
3803 m_osInterface->pfnUnlockResource(m_osInterface, &(m_tileLevelBatchBuffer[m_tileRowPass][idx].OsResource));
3804 }
3805 } // end of row tile
3806
3807 // Set the semaphore for tile row BRC update
3808 if ((m_numPipe > 1) && (!IsFirstPipe()) && (!IsLastPassForTileReplay()))
3809 {
3810 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(
3811 SetSemaphoreMem(
3812 &m_resVdBoxSemaphoreMem[currentPipe].sResource,
3813 &cmdBuffer,
3814 0xFF),
3815 release_func);
3816 }
3817
3818 //turn on protection again in case conditionalbatchbufferexit turns off the protection
3819 if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHwCounterIncrement(m_osInterface) && m_enableTileReplay)
3820 {
3821 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, &cmdBuffer), release_func);
3822 }
3823
3824 // Run tile row based BRC on pipe 0
3825 if (IsFirstPipe() && (!IsLastPassForTileReplay()))
3826 {
3827 m_CurrentTileRow = tileRow;
3828 m_CurrentPassForTileReplay = m_tileRowPass;
3829 m_CurrentPassForOverAll++;
3830
3831 // Before tile row BRC update, make sure all pipes are complete
3832 if (m_numPipe > 1)
3833 {
3834 for (uint32_t i = 1; i < m_numPipe; i++)
3835 {
3836 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(SendHWWaitCommand(&m_resVdBoxSemaphoreMem[i].sResource, &cmdBuffer, 0xFF), release_func);
3837 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(SetSemaphoreMem(&m_resVdBoxSemaphoreMem[i].sResource, &cmdBuffer, 0x0), release_func);
3838 }
3839 }
3840
3841 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(HuCBrcTileRowUpdate(&cmdBuffer), release_func);
3842 }
3843
3844 //turn on protection again in case conditionalbatchbufferexit turns off the protection
3845 if (m_osInterface->osCpInterface->IsCpEnabled() && m_hwInterface->GetCpInterface()->IsHwCounterIncrement(m_osInterface) && m_enableTileReplay)
3846 {
3847 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, &cmdBuffer), release_func);
3848 }
3849
3850 //Refresh counter after every tilerowpass
3851 if (m_tileRowPass < m_NumPassesForTileReplay - 1)
3852 {
3853 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hwInterface->GetCpInterface()->RefreshCounter(m_osInterface, &cmdBuffer), release_func);
3854 }
3855 }
3856
3857 // Update head pointer for capture mode
3858 if (m_CaptureModeEnable && IsLastPipe())
3859 {
3860 MHW_MI_LOAD_REGISTER_IMM_PARAMS registerImmParams;
3861 MOS_ZeroMemory(®isterImmParams, sizeof(registerImmParams));
3862 registerImmParams.dwData = 1;
3863 registerImmParams.dwRegister = m_VdboxVDENCRegBase[currentPipe] + 0x90;
3864 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_miInterface->AddMiLoadRegisterImmCmd(&cmdBuffer, ®isterImmParams), release_func);
3865 }
3866
3867 //refresh encode counter after every rowpass
3868 if (tileRow < numTileRows - 1)
3869 {
3870 CODECHAL_ENCODE_CHK_STATUS_WITH_DESTROY_RETURN(m_hwInterface->GetCpInterface()->RefreshCounter(m_osInterface, &cmdBuffer), release_func);
3871 }
3872 }
3873
3874 m_vdencInterface->ReleaseMhwVdboxPipeModeSelectParams(pipeModeSelectParams);
3875
3876 // Insert end of sequence/stream if se
3877 // To be moved to slice level?
3878 if ((m_lastPicInStream || m_lastPicInSeq) && IsLastPipe())
3879 {
3880 MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
3881 MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
3882 pakInsertObjectParams.bLastPicInSeq = m_lastPicInSeq;
3883 pakInsertObjectParams.bLastPicInStream = m_lastPicInStream;
3884 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&cmdBuffer, &pakInsertObjectParams));
3885 }
3886
3887 // Send VD_CONTROL_STATE (Memory Implict Flush)
3888 MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
3889 vdControlStateParams.memoryImplicitFlush = true;
3890 CODECHAL_ENCODE_CHK_STATUS_RETURN(
3891 static_cast<MhwMiInterfaceG12*>(m_miInterface)->AddMiVdControlStateCmd(&cmdBuffer, &vdControlStateParams));
3892
3893 // Send VD_PIPELINE_FLUSH command
3894 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
3895 vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
3896 vdPipelineFlushParams.Flags.bFlushHEVC = 1;
3897 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
3898 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
3899
3900 // Send MI_FLUSH command
3901 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
3902 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3903 flushDwParams.bVideoPipelineCacheInvalidate = true;
3904 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
3905
3906 // Set the HW semaphore to indicate current pipe done
3907 if (m_numPipe > 1)
3908 {
3909 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
3910 flushDwParams.bVideoPipelineCacheInvalidate = true;
3911 if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[currentPipe].sResource))
3912 {
3913 flushDwParams.pOsResource = &m_resVdBoxSemaphoreMem[currentPipe].sResource;
3914 flushDwParams.dwDataDW1 = 0xFF;
3915 }
3916 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
3917 }
3918
3919 if (IsFirstPipe())
3920 {
3921 // first pipe needs to ensure all other pipes are ready
3922 if (m_numPipe > 1)
3923 {
3924 for (uint32_t i = 0; i < m_numPipe; i++)
3925 {
3926 if (!Mos_ResourceIsNull(&m_resVdBoxSemaphoreMem[i].sResource))
3927 {
3928 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand(&m_resVdBoxSemaphoreMem[i].sResource, &cmdBuffer, 0xFF));
3929 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSemaphoreMem(&m_resVdBoxSemaphoreMem[i].sResource, &cmdBuffer, 0x0));
3930 }
3931 }
3932 }
3933
3934 // Whenever ACQP/ BRC is enabled with tiling, PAK Integration kernel is needed.
3935 // ACQP/ BRC need PAK integration kernel to aggregate statistics
3936 if (m_vdencHucUsed)
3937 {
3938 CODECHAL_ENCODE_CHK_STATUS_RETURN(HucPakIntegrate(&cmdBuffer));
3939 }
3940
3941 // Use HW stitch commands only in the scalable mode
3942 // For single pipe with tile replay, stitch also needed
3943 if (m_enableTileStitchByHW)
3944 {
3945 // 2nd level BB buffer for stitching cmd
3946 // current location to add cmds in 2nd level batch buffer
3947 m_HucStitchCmdBatchBuffer.iCurrent = 0;
3948 // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
3949 m_HucStitchCmdBatchBuffer.dwOffset = 0;
3950 HalOcaInterface::OnSubLevelBBStart(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, &m_HucStitchCmdBatchBuffer.OsResource, 0, true, 0);
3951 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_HucStitchCmdBatchBuffer));
3952 // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases
3953 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, m_osInterface->osCpInterface->IsCpEnabled() ? true : false));
3954 }
3955
3956 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(&cmdBuffer));
3957
3958 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
3959
3960 if (m_numPipe <= 1) // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer
3961 {
3962 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
3963
3964 // BRC PAK statistics different for each pass
3965 if (m_brcEnabled)
3966 {
3967 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStats(&cmdBuffer));
3968 }
3969 }
3970
3971 MHW_MI_STORE_DATA_PARAMS storeDataParams;
3972 // Signal HW semaphore for the reference frame dependency (i.e., current coding frame waits for the reference frame being ready)
3973 if (m_currRefSync && !Mos_ResourceIsNull(&m_currRefSync->resSemaphoreMem.sResource))
3974 {
3975 // the reference frame semaphore must be set in each pass because of the conditional BRC batch buffer. Some BRC passes could be skipped.
3976 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3977 storeDataParams.pOsResource = &m_currRefSync->resSemaphoreMem.sResource;
3978 storeDataParams.dwResourceOffset = 0;
3979 storeDataParams.dwValue = 1;
3980
3981 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiStoreDataImmCmd(
3982 &cmdBuffer,
3983 &storeDataParams));
3984 }
3985 }
3986
3987 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase || (m_numPipe >= 2))
3988 {
3989 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
3990 }
3991
3992 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
3993
3994 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
3995 {
3996 bool nullRendering = m_videoContextUsesNullHw;
3997
3998 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, nullRendering));
3999
4000 CODECHAL_DEBUG_TOOL(
4001 if (m_mmcState)
4002 {
4003 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
4004 }
4005 )
4006
4007 if (IsFirstPipe() &&
4008 IsLastPass() &&
4009 m_signalEnc &&
4010 m_currRefSync &&
4011 !Mos_ResourceIsNull(&m_resSyncObjectVideoContextInUse))
4012 {
4013 // signal semaphore
4014 MOS_SYNC_PARAMS syncParams;
4015 syncParams = g_cInitSyncParams;
4016 syncParams.GpuContext = m_videoContext;
4017 syncParams.presSyncResource = &m_currRefSync->resSyncObject;
4018
4019 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
4020 m_currRefSync->uiSemaphoreObjCount++;
4021 m_currRefSync->bInUsed = true;
4022 }
4023 }
4024
4025 // Reset parameters for next PAK execution
4026 if (IsLastPipe() &&
4027 IsLastPass())
4028 {
4029 if (!m_singleTaskPhaseSupported)
4030 {
4031 m_osInterface->pfnResetPerfBufferID(m_osInterface);
4032 }
4033
4034 m_currPakSliceIdx = (m_currPakSliceIdx + 1) % CODECHAL_HEVC_NUM_PAK_SLICE_BATCH_BUFFERS;
4035
4036 m_newPpsHeader = 0;
4037 m_newSeqHeader = 0;
4038 m_frameNum++;
4039 }
4040
4041 return eStatus;
4042 }
4043
ConstructBatchBufferHuCBRC(PMOS_RESOURCE batchBuffer)4044 MOS_STATUS CodechalVdencHevcStateG12::ConstructBatchBufferHuCBRC(PMOS_RESOURCE batchBuffer)
4045 {
4046 CODECHAL_ENCODE_FUNCTION_ENTER;
4047
4048 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4049
4050 CODECHAL_ENCODE_CHK_NULL_RETURN(m_slcData);
4051 CODECHAL_ENCODE_CHK_NULL_RETURN(batchBuffer);
4052
4053 MOS_LOCK_PARAMS lockFlags;
4054 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4055 lockFlags.WriteOnly = true;
4056
4057 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, batchBuffer, &lockFlags);
4058 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4059
4060 MOS_COMMAND_BUFFER constructedCmdBuf;
4061 MOS_ZeroMemory(&constructedCmdBuf, sizeof(constructedCmdBuf));
4062 constructedCmdBuf.pCmdBase = constructedCmdBuf.pCmdPtr = (uint32_t *)data;
4063 constructedCmdBuf.iRemaining = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE);
4064
4065 // 1st Group : PIPE_MODE_SELECT
4066 // set PIPE_MODE_SELECT command
4067 // This is not needed for GEN11/GEN12 as single pass SAO is supported
4068 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 pipeModeSelectParams;
4069 pipeModeSelectParams.Mode = m_mode;
4070 pipeModeSelectParams.bVdencEnabled = true;
4071 pipeModeSelectParams.bAdvancedRateControlEnable = true;
4072 pipeModeSelectParams.bRdoqEnable = m_hevcRdoqEnabled;
4073 pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
4074 pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY;
4075 pipeModeSelectParams.bStreamOutEnabled = 1;
4076 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&constructedCmdBuf, &pipeModeSelectParams));
4077
4078 MHW_BATCH_BUFFER TempBatchBuffer;
4079 MOS_ZeroMemory(&TempBatchBuffer, sizeof(MHW_BATCH_BUFFER));
4080 TempBatchBuffer.iSize = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE);
4081 TempBatchBuffer.pData = data;
4082
4083 // set MI_BATCH_BUFFER_END command
4084 int32_t cmdBufOffset = constructedCmdBuf.iOffset;
4085
4086 TempBatchBuffer.iCurrent = constructedCmdBuf.iOffset;
4087 TempBatchBuffer.iRemaining = constructedCmdBuf.iRemaining;
4088 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &TempBatchBuffer));
4089 constructedCmdBuf.pCmdPtr += (TempBatchBuffer.iCurrent - constructedCmdBuf.iOffset) / 4;
4090 constructedCmdBuf.iOffset = TempBatchBuffer.iCurrent;
4091 constructedCmdBuf.iRemaining = TempBatchBuffer.iRemaining;
4092
4093 m_miBatchBufferEndCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
4094 CODECHAL_ENCODE_ASSERT(m_hwInterface->m_vdencBatchBuffer1stGroupSize == constructedCmdBuf.iOffset);
4095
4096 SetAddCommands(CODECHAL_CMD1, &constructedCmdBuf, true, m_roundInterValue, m_roundIntraValue, m_lowDelay);
4097 m_picStateCmdStartInBytes = constructedCmdBuf.iOffset;
4098
4099 // set HCP_PIC_STATE command
4100 MHW_VDBOX_HEVC_PIC_STATE_G12 hevcPicState;
4101 SetHcpPicStateParams(hevcPicState);
4102 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPicStateCmd(&constructedCmdBuf, &hevcPicState));
4103 m_cmd2StartInBytes = constructedCmdBuf.iOffset;
4104
4105 SetAddCommands(CODECHAL_CMD2, &constructedCmdBuf, true, m_roundInterValue, m_roundIntraValue, m_lowDelay, m_refIdxMapping, m_slotForRecNotFiltered);
4106
4107 // set MI_BATCH_BUFFER_END command
4108 TempBatchBuffer.iCurrent = constructedCmdBuf.iOffset;
4109 TempBatchBuffer.iRemaining = constructedCmdBuf.iRemaining;
4110 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &TempBatchBuffer));
4111 constructedCmdBuf.pCmdPtr += (TempBatchBuffer.iCurrent - constructedCmdBuf.iOffset) / 4;
4112 constructedCmdBuf.iOffset = TempBatchBuffer.iCurrent;
4113 constructedCmdBuf.iRemaining = TempBatchBuffer.iRemaining;
4114
4115 CODECHAL_ENCODE_ASSERT(m_hwInterface->m_vdencBatchBuffer2ndGroupSize + m_hwInterface->m_vdencBatchBuffer1stGroupSize
4116 == constructedCmdBuf.iOffset);
4117
4118 // 3rd Group : HCP_WEIGHTSOFFSETS_STATE + HCP_SLICE_STATE + HCP_PAK_INSERT_OBJECT + VDENC_WEIGHT_OFFSETS_STATE
4119 MHW_VDBOX_HEVC_SLICE_STATE_G12 sliceState;
4120 SetHcpSliceStateCommonParams(sliceState);
4121
4122 // slice level cmds for each slice
4123 PCODEC_ENCODER_SLCDATA slcData = m_slcData;
4124 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
4125
4126 for (uint32_t startLCU = 0, slcCount = 0; slcCount < m_numSlices; slcCount++)
4127 {
4128 bool lastSliceInTile = false, sliceInTile = false;
4129
4130 if (IsFirstPass())
4131 {
4132 slcData[slcCount].CmdOffset = startLCU * (m_hcpInterface->GetHcpPakObjSize()) * sizeof(uint32_t);
4133 }
4134
4135 uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
4136 uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
4137 uint32_t idx = 0;
4138 for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
4139 {
4140 for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
4141 {
4142 idx = tileRow * numTileColumns + tileCol;
4143 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
4144 &tileParams[idx],
4145 &sliceInTile,
4146 &lastSliceInTile));
4147
4148 if (sliceInTile)
4149 {
4150 break;
4151 }
4152 }
4153 if (sliceInTile)
4154 {
4155 break;
4156 }
4157 }
4158
4159 SetHcpSliceStateParams(sliceState, slcData, (uint16_t)slcCount, tileParams, lastSliceInTile, idx);
4160
4161 m_vdencBatchBufferPerSliceVarSize[slcCount] = 0;
4162
4163 // set HCP_WEIGHTOFFSET_STATE command
4164 // This slice level command is issued, if the weighted_pred_flag or weighted_bipred_flag equals one.
4165 // If zero, then this command is not issued.
4166 if (m_hevcVdencWeightedPredEnabled)
4167 {
4168 MHW_VDBOX_HEVC_WEIGHTOFFSET_PARAMS hcpWeightOffsetParams;
4169 MOS_ZeroMemory(&hcpWeightOffsetParams, sizeof(hcpWeightOffsetParams));
4170 // HuC based WP ignores App based weights
4171 if (!m_hevcPicParams->bEnableGPUWeightedPrediction)
4172 {
4173 for (auto k = 0; k < 2; k++) // k=0: LIST_0, k=1: LIST_1
4174 {
4175 // Luma, Chroma Offset
4176 for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
4177 {
4178 hcpWeightOffsetParams.LumaOffsets[k][i] = (int16_t)m_hevcSliceParams->luma_offset[k][i];
4179 // Cb, Cr
4180 for (auto j = 0; j < 2; j++)
4181 {
4182 hcpWeightOffsetParams.ChromaOffsets[k][i][j] = (int16_t)m_hevcSliceParams->chroma_offset[k][i][j];
4183 }
4184 }
4185
4186 // Luma Weight
4187 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
4188 &hcpWeightOffsetParams.LumaWeights[k],
4189 sizeof(hcpWeightOffsetParams.LumaWeights[k]),
4190 &m_hevcSliceParams->delta_luma_weight[k],
4191 sizeof(m_hevcSliceParams->delta_luma_weight[k])));
4192 // Chroma Weight
4193 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
4194 &hcpWeightOffsetParams.ChromaWeights[k],
4195 sizeof(hcpWeightOffsetParams.ChromaWeights[k]),
4196 &m_hevcSliceParams->delta_chroma_weight[k],
4197 sizeof(m_hevcSliceParams->delta_chroma_weight[k])));
4198 }
4199 }
4200
4201 // 1st HCP_WEIGHTOFFSET_STATE cmd - P & B
4202 if (m_hevcSliceParams->slice_type == CODECHAL_ENCODE_HEVC_P_SLICE || m_hevcSliceParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
4203 {
4204 hcpWeightOffsetParams.ucList = LIST_0;
4205
4206 cmdBufOffset = constructedCmdBuf.iOffset;
4207 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpWeightOffsetStateCmd(&constructedCmdBuf, nullptr, &hcpWeightOffsetParams));
4208 m_hcpWeightOffsetStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
4209 // 1st HcpWeightOffset cmd is not always inserted (except weighted prediction + P, B slices)
4210 m_vdencBatchBufferPerSliceVarSize[slcCount] += m_hcpWeightOffsetStateCmdSize;
4211 }
4212
4213 // 2nd HCP_WEIGHTOFFSET_STATE cmd - B only
4214 if (m_hevcSliceParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
4215 {
4216 hcpWeightOffsetParams.ucList = LIST_1;
4217
4218 cmdBufOffset = constructedCmdBuf.iOffset;
4219 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpWeightOffsetStateCmd(&constructedCmdBuf, nullptr, &hcpWeightOffsetParams));
4220 m_hcpWeightOffsetStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
4221 // 2nd HcpWeightOffset cmd is not always inserted (except weighted prediction + B slices)
4222 m_vdencBatchBufferPerSliceVarSize[slcCount] += m_hcpWeightOffsetStateCmdSize;
4223 }
4224 }
4225
4226 // set HCP_SLICE_STATE command
4227 cmdBufOffset = constructedCmdBuf.iOffset;
4228 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSliceStateCmd(&constructedCmdBuf, &sliceState));
4229 m_hcpSliceStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
4230
4231 // set 1st HCP_PAK_INSERT_OBJECT command
4232 // insert AU, SPS, PPS headers before first slice header
4233 if (sliceState.bInsertBeforeSliceHeaders)
4234 {
4235 uint32_t maxBytesInPakInsertObjCmd = ((2 << 11) - 1) * 4; // 12 bits for DwordLength field in PAK_INSERT_OBJ cmd
4236 m_1stPakInsertObjectCmdSize = 0;
4237
4238 for (auto i = 0; i < HEVC_MAX_NAL_UNIT_TYPE; i++)
4239 {
4240 uint32_t nalUnitPosiSize = sliceState.ppNalUnitParams[i]->uiSize;
4241 uint32_t nalUnitPosiOffset = sliceState.ppNalUnitParams[i]->uiOffset;
4242
4243 while (nalUnitPosiSize > 0)
4244 {
4245 uint32_t bitSize = MOS_MIN(maxBytesInPakInsertObjCmd * 8, nalUnitPosiSize * 8);
4246 uint32_t offSet = nalUnitPosiOffset;
4247
4248 MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
4249 MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
4250 pakInsertObjectParams.bEmulationByteBitsInsert = sliceState.ppNalUnitParams[i]->bInsertEmulationBytes;
4251 pakInsertObjectParams.uiSkipEmulationCheckCount = sliceState.ppNalUnitParams[i]->uiSkipEmulationCheckCount;
4252 pakInsertObjectParams.pBsBuffer = sliceState.pBsBuffer;
4253 pakInsertObjectParams.dwBitSize = bitSize;
4254 pakInsertObjectParams.dwOffset = offSet;
4255
4256 if (nalUnitPosiSize > maxBytesInPakInsertObjCmd)
4257 {
4258 nalUnitPosiSize -= maxBytesInPakInsertObjCmd;
4259 nalUnitPosiOffset += maxBytesInPakInsertObjCmd;
4260 }
4261 else
4262 {
4263 nalUnitPosiSize = 0;
4264 }
4265
4266 cmdBufOffset = constructedCmdBuf.iOffset;
4267 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&constructedCmdBuf, &pakInsertObjectParams));
4268
4269 // this info needed again in BrcUpdate HuC FW const
4270 m_1stPakInsertObjectCmdSize += (constructedCmdBuf.iOffset - cmdBufOffset);
4271 }
4272 }
4273 // 1st PakInsertObject cmd is not always inserted for each slice
4274 m_vdencBatchBufferPerSliceVarSize[slcCount] += m_1stPakInsertObjectCmdSize;
4275 }
4276
4277 // set 2nd HCP_PAK_INSERT_OBJECT command
4278 // Insert slice header
4279 MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
4280 MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
4281 pakInsertObjectParams.bLastHeader = true;
4282 pakInsertObjectParams.bEmulationByteBitsInsert = true;
4283
4284 // App does the slice header packing, set the skip count passed by the app
4285 pakInsertObjectParams.uiSkipEmulationCheckCount = sliceState.uiSkipEmulationCheckCount;
4286 pakInsertObjectParams.pBsBuffer = sliceState.pBsBuffer;
4287 pakInsertObjectParams.dwBitSize = sliceState.dwLength;
4288 pakInsertObjectParams.dwOffset = sliceState.dwOffset;
4289
4290 // For HEVC VDEnc Dynamic Slice
4291 if (m_hevcSeqParams->SliceSizeControl)
4292 {
4293 pakInsertObjectParams.bLastHeader = false;
4294 pakInsertObjectParams.bEmulationByteBitsInsert = false;
4295 pakInsertObjectParams.dwBitSize = m_hevcSliceParams->BitLengthSliceHeaderStartingPortion;
4296 pakInsertObjectParams.bResetBitstreamStartingPos = true;
4297 }
4298
4299 uint32_t byteSize = (pakInsertObjectParams.dwBitSize + 7) >> 3;
4300 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(
4301 &constructedCmdBuf,
4302 &pakInsertObjectParams));
4303
4304 // 2nd PakInsertObject cmd is always inserted for each slice
4305 // so already reflected in dwVdencBatchBufferPerSliceConstSize
4306 m_vdencBatchBufferPerSliceVarSize[slcCount] += (MOS_ALIGN_CEIL(byteSize, sizeof(uint32_t))) / sizeof(uint32_t) * 4;
4307
4308 // set 3rd HCP_PAK_INSERT_OBJECT command
4309 if (m_hevcSeqParams->SliceSizeControl)
4310 {
4311 // Send HCP_PAK_INSERT_OBJ command. For dynamic slice, we are skipping the beginning part of slice header.
4312 pakInsertObjectParams.bLastHeader = true;
4313 pakInsertObjectParams.dwBitSize = sliceState.dwLength - m_hevcSliceParams->BitLengthSliceHeaderStartingPortion;
4314 pakInsertObjectParams.dwOffset += ((m_hevcSliceParams->BitLengthSliceHeaderStartingPortion + 7) / 8); // Skips the first 5 bytes which is Start Code + Nal Unit Header
4315 pakInsertObjectParams.bResetBitstreamStartingPos = true;
4316
4317 cmdBufOffset = constructedCmdBuf.iOffset;
4318 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(
4319 &constructedCmdBuf,
4320 &pakInsertObjectParams));
4321 // 3rd PakInsertObject cmd is not always inserted for each slice
4322 m_vdencBatchBufferPerSliceVarSize[slcCount] += (constructedCmdBuf.iOffset - cmdBufOffset);
4323 }
4324
4325 // set VDENC_WEIGHT_OFFSETS_STATE command
4326 MHW_VDBOX_VDENC_WEIGHT_OFFSET_PARAMS vdencWeightOffsetParams;
4327 MOS_ZeroMemory(&vdencWeightOffsetParams, sizeof(vdencWeightOffsetParams));
4328 vdencWeightOffsetParams.bWeightedPredEnabled = m_hevcVdencWeightedPredEnabled;
4329 vdencWeightOffsetParams.isLowDelay = m_lowDelay;
4330
4331 if (vdencWeightOffsetParams.bWeightedPredEnabled)
4332 {
4333 uint8_t lumaLog2WeightDenom = m_hevcPicParams->bEnableGPUWeightedPrediction ? 6 : m_hevcSliceParams->luma_log2_weight_denom;
4334 vdencWeightOffsetParams.dwDenom = 1 << lumaLog2WeightDenom;
4335
4336 if (!m_hevcPicParams->bEnableGPUWeightedPrediction)
4337 {
4338 // Luma Offsets
4339 for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
4340 {
4341 vdencWeightOffsetParams.LumaOffsets[0][i] = (int16_t)m_hevcSliceParams->luma_offset[0][i];
4342 vdencWeightOffsetParams.LumaOffsets[1][i] = (int16_t)m_hevcSliceParams->luma_offset[1][i];
4343 }
4344
4345 // Luma Weights
4346 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(MOS_SecureMemcpy(
4347 &vdencWeightOffsetParams.LumaWeights[0],
4348 sizeof(vdencWeightOffsetParams.LumaWeights[0]),
4349 &m_hevcSliceParams->delta_luma_weight[0],
4350 sizeof(m_hevcSliceParams->delta_luma_weight[0])),
4351 "Failed to copy luma weight 0 memory.");
4352
4353 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(MOS_SecureMemcpy(
4354 &vdencWeightOffsetParams.LumaWeights[1],
4355 sizeof(vdencWeightOffsetParams.LumaWeights[1]),
4356 &m_hevcSliceParams->delta_luma_weight[1],
4357 sizeof(m_hevcSliceParams->delta_luma_weight[1])),
4358 "Failed to copy luma weight 1 memory.");
4359 }
4360 }
4361
4362 cmdBufOffset = constructedCmdBuf.iOffset;
4363 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWeightsOffsetsStateCmd(
4364 &constructedCmdBuf,
4365 nullptr,
4366 &vdencWeightOffsetParams));
4367 m_vdencWeightOffsetStateCmdSize = constructedCmdBuf.iOffset - cmdBufOffset;
4368
4369 // set MI_BATCH_BUFFER_END command
4370 TempBatchBuffer.iCurrent = constructedCmdBuf.iOffset;
4371 TempBatchBuffer.iRemaining = constructedCmdBuf.iRemaining;
4372 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &TempBatchBuffer));
4373 constructedCmdBuf.pCmdPtr += (TempBatchBuffer.iCurrent - constructedCmdBuf.iOffset) / 4;
4374 constructedCmdBuf.iOffset = TempBatchBuffer.iCurrent;
4375 constructedCmdBuf.iRemaining = TempBatchBuffer.iRemaining;
4376
4377 m_vdencBatchBufferPerSliceVarSize[slcCount] += ENCODE_VDENC_HEVC_PADDING_DW_SIZE * 4;
4378 for (auto i = 0; i < ENCODE_VDENC_HEVC_PADDING_DW_SIZE ; i++)
4379 {
4380 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiNoop(&constructedCmdBuf, nullptr));
4381 }
4382 startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice;
4383 }
4384
4385 if (data)
4386 {
4387 m_osInterface->pfnUnlockResource(m_osInterface, batchBuffer);
4388 }
4389
4390 return eStatus;
4391 }
4392
ConstructTLB(PMHW_BATCH_BUFFER thirdLevelBatchBuffer)4393 MOS_STATUS CodechalVdencHevcStateG12::ConstructTLB(PMHW_BATCH_BUFFER thirdLevelBatchBuffer)
4394 {
4395 CODECHAL_ENCODE_FUNCTION_ENTER;
4396
4397 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4398
4399 CODECHAL_ENCODE_CHK_NULL_RETURN(thirdLevelBatchBuffer);
4400
4401 MHW_VDBOX_HEVC_PIC_STATE_G12 picStateParams;
4402 SetHcpPicStateParams(picStateParams);
4403
4404 MOS_LOCK_PARAMS lockFlags;
4405 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4406 lockFlags.WriteOnly = true;
4407
4408 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(thirdLevelBatchBuffer->OsResource), &lockFlags);
4409 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4410
4411 MOS_COMMAND_BUFFER constructedCmdBuf;
4412 MOS_ZeroMemory(&constructedCmdBuf, sizeof(constructedCmdBuf));
4413 constructedCmdBuf.pCmdBase = constructedCmdBuf.pCmdPtr = (uint32_t *)data;
4414 constructedCmdBuf.iRemaining = m_thirdLBSize;
4415
4416 SetAddCommands(CODECHAL_CMD1, &constructedCmdBuf, true, m_roundInterValue, m_roundIntraValue, m_lowDelay);
4417
4418 // HCP_PIC_STATE
4419 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPicStateCmd(&constructedCmdBuf, &picStateParams));
4420
4421 SetAddCommands(CODECHAL_CMD2, &constructedCmdBuf, true, m_roundInterValue, m_roundIntraValue, m_lowDelay, m_refIdxMapping, m_slotForRecNotFiltered);
4422
4423 // Send HEVC_VP9_RDOQ_STATE command
4424 if (m_hevcRdoqEnabled)
4425 {
4426 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&constructedCmdBuf, &picStateParams));
4427 }
4428
4429 thirdLevelBatchBuffer->iCurrent = constructedCmdBuf.iOffset;
4430 thirdLevelBatchBuffer->iRemaining = constructedCmdBuf.iRemaining;
4431 thirdLevelBatchBuffer->pData = data;
4432 // set MI_BATCH_BUFFER_END command
4433 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, thirdLevelBatchBuffer));
4434
4435 std::string pakPassName = "PAK_PASS[" + std::to_string(GetCurrentPass()) + "]_PIPE[" + std::to_string(GetCurrentPipe()) + "]_TLB";
4436 CODECHAL_DEBUG_TOOL(
4437 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
4438 &constructedCmdBuf,
4439 CODECHAL_NUM_MEDIA_STATES,
4440 pakPassName.data()));)
4441
4442 if (data)
4443 {
4444 m_osInterface->pfnUnlockResource(m_osInterface, &(thirdLevelBatchBuffer->OsResource));
4445 }
4446
4447 return eStatus;
4448 }
4449
SetDmemHuCBrcInitReset()4450 MOS_STATUS CodechalVdencHevcStateG12::SetDmemHuCBrcInitReset()
4451 {
4452 CODECHAL_ENCODE_FUNCTION_ENTER;
4453
4454 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4455
4456 MOS_LOCK_PARAMS lockFlagsWriteOnly;
4457 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
4458 lockFlagsWriteOnly.WriteOnly = true;
4459
4460 // Setup BrcInit DMEM
4461 auto hucVdencBrcInitDmem = (PCODECHAL_VDENC_HEVC_HUC_BRC_INIT_DMEM_G12)m_osInterface->pfnLockResource(
4462 m_osInterface, &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx], &lockFlagsWriteOnly);
4463 CODECHAL_ENCODE_CHK_NULL_RETURN(hucVdencBrcInitDmem);
4464 MOS_ZeroMemory(hucVdencBrcInitDmem, sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_INIT_DMEM_G12));
4465
4466 hucVdencBrcInitDmem->BRCFunc_U32 = (m_enableTileReplay ? 1 : 0) << 7; //bit0 0: Init; 1: Reset, bit7 0: frame-based; 1: tile-based
4467 hucVdencBrcInitDmem->UserMaxFrame = GetProfileLevelMaxFrameSize();
4468 hucVdencBrcInitDmem->InitBufFull_U32 = MOS_MIN(m_hevcSeqParams->InitVBVBufferFullnessInBit, m_hevcSeqParams->VBVBufferSizeInBit);
4469 hucVdencBrcInitDmem->BufSize_U32 = m_hevcSeqParams->VBVBufferSizeInBit;
4470 hucVdencBrcInitDmem->TargetBitrate_U32 = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS; // map DDI params(in Kbits) to huc (in bits)
4471 hucVdencBrcInitDmem->MaxRate_U32 = m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS;
4472 hucVdencBrcInitDmem->MinRate_U32 = 0;
4473 hucVdencBrcInitDmem->FrameRateM_U32 = m_hevcSeqParams->FrameRate.Numerator;
4474 hucVdencBrcInitDmem->FrameRateD_U32 = m_hevcSeqParams->FrameRate.Denominator;
4475 hucVdencBrcInitDmem->ACQP_U32 = 0;
4476 if (m_hevcSeqParams->UserMaxPBFrameSize > 0)
4477 {
4478 //Backup CodingType as need to set it as B_Tpye to get MaxFrameSize for P/B frames.
4479 auto CodingTypeTemp = m_hevcPicParams->CodingType;
4480 m_hevcPicParams->CodingType = B_TYPE;
4481 hucVdencBrcInitDmem->ProfileLevelMaxFramePB_U32 = GetProfileLevelMaxFrameSize();
4482 m_hevcPicParams->CodingType = CodingTypeTemp;
4483 }
4484 else
4485 {
4486 hucVdencBrcInitDmem->ProfileLevelMaxFramePB_U32 = hucVdencBrcInitDmem->UserMaxFrame;
4487 }
4488
4489 if (m_brcEnabled)
4490 {
4491 switch (m_hevcSeqParams->RateControlMethod)
4492 {
4493 case RATECONTROL_ICQ:
4494 hucVdencBrcInitDmem->BRCFlag = 0;
4495 hucVdencBrcInitDmem->ACQP_U32 = m_hevcSeqParams->ICQQualityFactor;
4496 break;
4497 case RATECONTROL_CBR:
4498 hucVdencBrcInitDmem->BRCFlag = 1;
4499 break;
4500 case RATECONTROL_VBR:
4501 hucVdencBrcInitDmem->BRCFlag = 2;
4502 hucVdencBrcInitDmem->ACQP_U32 = 0;
4503 break;
4504 case RATECONTROL_VCM:
4505 hucVdencBrcInitDmem->BRCFlag = 3;
4506 break;
4507 case RATECONTROL_QVBR:
4508 hucVdencBrcInitDmem->BRCFlag = 2;
4509 hucVdencBrcInitDmem->ACQP_U32 = m_hevcSeqParams->ICQQualityFactor;;
4510 break;
4511 default:
4512 break;
4513 }
4514
4515 // Low Delay BRC
4516 if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)
4517 {
4518 hucVdencBrcInitDmem->BRCFlag = 5;
4519 }
4520
4521 switch (m_hevcSeqParams->MBBRC)
4522 {
4523 case mbBrcInternal:
4524 case mbBrcEnabled:
4525 hucVdencBrcInitDmem->CuQpCtrl_U8 = 3;
4526 break;
4527 case mbBrcDisabled:
4528 hucVdencBrcInitDmem->CuQpCtrl_U8 = 0;
4529 break;
4530 default:
4531 break;
4532 }
4533 }
4534 else if (m_hevcVdencAcqpEnabled)
4535 {
4536 hucVdencBrcInitDmem->BRCFlag = 0;
4537
4538 // 0=No CUQP; 1=CUQP for I-frame; 2=CUQP for P/B-frame
4539 // bit operation, bit 1 for I-frame, bit 2 for P/B frame
4540 // In VDENC mode, the field "Cu_Qp_Delta_Enabled_Flag" should always be set to 1.
4541 if (m_hevcSeqParams->QpAdjustment)
4542 {
4543 hucVdencBrcInitDmem->CuQpCtrl_U8 = 3; // wPictureCodingType I:0, P:1, B:2
4544 }
4545 else
4546 {
4547 hucVdencBrcInitDmem->CuQpCtrl_U8 = 0; // wPictureCodingType I:0, P:1, B:2
4548 }
4549 }
4550
4551 hucVdencBrcInitDmem->SSCFlag = m_hevcSeqParams->SliceSizeControl;
4552
4553 hucVdencBrcInitDmem->FrameWidth_U16 = (uint16_t)m_frameWidth;
4554 hucVdencBrcInitDmem->FrameHeight_U16 = (uint16_t)m_frameHeight;
4555
4556 hucVdencBrcInitDmem->MinQP_U8 = m_hevcPicParams->BRCMinQp < 10 ? 10 : m_hevcPicParams->BRCMinQp; // Setting values from arch spec
4557 hucVdencBrcInitDmem->MaxQP_U8 = m_hevcPicParams->BRCMaxQp < 10 ? 51 : (m_hevcPicParams->BRCMaxQp > 51 ? 51 : m_hevcPicParams->BRCMaxQp); // Setting values from arch spec
4558
4559 hucVdencBrcInitDmem->BRCPyramidEnable_U8 = 0;
4560
4561 //QP modulation settings
4562 m_hevcSeqParams->GopRefDist = m_hevcSeqParams->GopRefDist == 0 ? 1 : m_hevcSeqParams->GopRefDist;
4563 bool bAllowedPyramid = m_hevcSeqParams->GopRefDist != 3;
4564 uint16_t intraPeriod = m_hevcSeqParams->GopPicSize > 4001 ? 4000 : m_hevcSeqParams->GopPicSize - 1;
4565 intraPeriod = ((intraPeriod + m_hevcSeqParams->GopRefDist - 1) / m_hevcSeqParams->GopRefDist) * m_hevcSeqParams->GopRefDist;
4566
4567 if (m_hevcSeqParams->HierarchicalFlag && bAllowedPyramid)
4568 {
4569 hucVdencBrcInitDmem->GopP_U16 = intraPeriod/m_hevcSeqParams->GopRefDist;
4570 hucVdencBrcInitDmem->GopB_U16 = hucVdencBrcInitDmem->GopP_U16;
4571 hucVdencBrcInitDmem->GopB1_U16 = ((hucVdencBrcInitDmem->GopP_U16 + hucVdencBrcInitDmem->GopB_U16) == intraPeriod) ? 0 : hucVdencBrcInitDmem->GopB_U16 * 2;
4572 hucVdencBrcInitDmem->GopB2_U16 = intraPeriod - hucVdencBrcInitDmem->GopP_U16 - hucVdencBrcInitDmem->GopB_U16 - hucVdencBrcInitDmem->GopB1_U16;
4573
4574 hucVdencBrcInitDmem->MaxBRCLevel_U8 = hucVdencBrcInitDmem->GopB1_U16 == 0 ? HEVC_BRC_FRAME_TYPE_B : (hucVdencBrcInitDmem->GopB2_U16 == 0 ? HEVC_BRC_FRAME_TYPE_B1 : HEVC_BRC_FRAME_TYPE_B2);
4575 hucVdencBrcInitDmem->BRCPyramidEnable_U8 = 1;
4576 }
4577 else //FlatB or LDB
4578 {
4579 hucVdencBrcInitDmem->GopP_U16 = intraPeriod/m_hevcSeqParams->GopRefDist;
4580 hucVdencBrcInitDmem->GopB_U16 = intraPeriod - hucVdencBrcInitDmem->GopP_U16;
4581 hucVdencBrcInitDmem->MaxBRCLevel_U8 = hucVdencBrcInitDmem->GopB_U16 == 0? HEVC_BRC_FRAME_TYPE_P_OR_LB : HEVC_BRC_FRAME_TYPE_B;
4582 }
4583
4584 hucVdencBrcInitDmem->LumaBitDepth_U8 = m_hevcSeqParams->bit_depth_luma_minus8 + 8;
4585 hucVdencBrcInitDmem->ChromaBitDepth_U8 = m_hevcSeqParams->bit_depth_chroma_minus8 + 8;
4586
4587 if (m_hevcSeqParams->SourceBitDepth == ENCODE_HEVC_BIT_DEPTH_10)
4588 {
4589 hucVdencBrcInitDmem->LumaBitDepth_U8 = 10;
4590 hucVdencBrcInitDmem->ChromaBitDepth_U8 = 10;
4591 }
4592
4593 if ((hucVdencBrcInitDmem->LowDelayMode_U8 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)))
4594 {
4595 MOS_SecureMemcpy(hucVdencBrcInitDmem->DevThreshPB0_S8, 8 * sizeof(int8_t), (void *)m_lowdelayDevThreshPB, 8 * sizeof(int8_t));
4596 MOS_SecureMemcpy(hucVdencBrcInitDmem->DevThreshVBR0_S8, 8 * sizeof(int8_t), (void*)m_lowdelayDevThreshVBR, 8 * sizeof(int8_t));
4597 MOS_SecureMemcpy(hucVdencBrcInitDmem->DevThreshI0_S8, 8 * sizeof(int8_t), (void*)m_lowdelayDevThreshI, 8 * sizeof(int8_t));
4598 }
4599 else
4600 {
4601 uint64_t inputbitsperframe = uint64_t(hucVdencBrcInitDmem->MaxRate_U32*100. / (hucVdencBrcInitDmem->FrameRateM_U32 * 100.0 / hucVdencBrcInitDmem->FrameRateD_U32));
4602 if (m_brcEnabled && !hucVdencBrcInitDmem->BufSize_U32)
4603 {
4604 CODECHAL_ENCODE_ASSERTMESSAGE("VBV BufSize should not be 0 for BRC case\n");
4605 eStatus = MOS_STATUS_INVALID_PARAMETER;
4606 }
4607 uint64_t vbvsz = hucVdencBrcInitDmem->BufSize_U32;
4608 double bps_ratio = inputbitsperframe / (vbvsz / m_devStdFPS);
4609 if (bps_ratio < m_bpsRatioLow) bps_ratio = m_bpsRatioLow;
4610 if (bps_ratio > m_bpsRatioHigh) bps_ratio = m_bpsRatioHigh;
4611
4612 for (int i = 0; i < m_numDevThreshlds / 2; i++) {
4613 hucVdencBrcInitDmem->DevThreshPB0_S8[i] = (signed char)(m_negMultPB*pow(m_devThreshPBFPNEG[i], bps_ratio));
4614 hucVdencBrcInitDmem->DevThreshPB0_S8[i + m_numDevThreshlds / 2] = (signed char)(m_postMultPB*pow(m_devThreshPBFPPOS[i], bps_ratio));
4615
4616 hucVdencBrcInitDmem->DevThreshI0_S8[i] = (signed char)(m_negMultPB*pow(m_devThreshIFPNEG[i], bps_ratio));
4617 hucVdencBrcInitDmem->DevThreshI0_S8[i + m_numDevThreshlds / 2] = (signed char)(m_postMultPB*pow(m_devThreshIFPPOS[i], bps_ratio));
4618
4619 hucVdencBrcInitDmem->DevThreshVBR0_S8[i] = (signed char)(m_negMultPB*pow(m_devThreshVBRNEG[i], bps_ratio));
4620 hucVdencBrcInitDmem->DevThreshVBR0_S8[i + m_numDevThreshlds / 2] = (signed char)(m_posMultVBR*pow(m_devThreshVBRPOS[i], bps_ratio));
4621 }
4622 }
4623
4624 MOS_SecureMemcpy(hucVdencBrcInitDmem->InstRateThreshP0_S8, 4 * sizeof(int8_t), (void *)m_instRateThreshP0, 4 * sizeof(int8_t));
4625 MOS_SecureMemcpy(hucVdencBrcInitDmem->InstRateThreshB0_S8, 4 * sizeof(int8_t), (void *)m_instRateThreshB0, 4 * sizeof(int8_t));
4626 MOS_SecureMemcpy(hucVdencBrcInitDmem->InstRateThreshI0_S8, 4 * sizeof(int8_t), (void *)m_instRateThreshI0, 4 * sizeof(int8_t));
4627
4628 if (m_brcEnabled)
4629 {
4630 // initQPIP, initQPB values will be used for BRC in the future
4631 int32_t initQPIP = 0, initQPB = 0;
4632 ComputeVDEncInitQP(initQPIP, initQPB);
4633 hucVdencBrcInitDmem->InitQPIP_U8 = (uint8_t)initQPIP;
4634 hucVdencBrcInitDmem->InitQPB_U8 = (uint8_t)initQPB;
4635 }
4636 else
4637 {
4638 hucVdencBrcInitDmem->InitQPIP_U8 = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
4639 hucVdencBrcInitDmem->InitQPB_U8 = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
4640 }
4641
4642 hucVdencBrcInitDmem->TopFrmSzThrForAdapt2Pass_U8 = 32;
4643 hucVdencBrcInitDmem->BotFrmSzThrForAdapt2Pass_U8 = 24;
4644
4645 MOS_SecureMemcpy(hucVdencBrcInitDmem->EstRateThreshP0_U8, 7 * sizeof(uint8_t), (void*)m_estRateThreshP0, 7 * sizeof(uint8_t));
4646 MOS_SecureMemcpy(hucVdencBrcInitDmem->EstRateThreshB0_U8, 7 * sizeof(uint8_t), (void*)m_estRateThreshB0, 7 * sizeof(uint8_t));
4647 MOS_SecureMemcpy(hucVdencBrcInitDmem->EstRateThreshI0_U8, 7 * sizeof(uint8_t), (void*)m_estRateThreshI0, 7 * sizeof(uint8_t));
4648
4649 if (m_vdencStreamInEnabled && m_hevcPicParams->NumROI && !m_vdencNativeROIEnabled)
4650 {
4651 hucVdencBrcInitDmem->StreamInROIEnable_U8 = 1;
4652 hucVdencBrcInitDmem->StreamInSurfaceEnable_U8 = 1;
4653 }
4654
4655 hucVdencBrcInitDmem->TopQPDeltaThrForAdapt2Pass_U8 = 2;
4656 hucVdencBrcInitDmem->BotQPDeltaThrForAdapt2Pass_U8 = 1;
4657
4658 if ((m_hevcSeqParams->SlidingWindowSize != 0) && (m_hevcSeqParams->MaxBitRatePerSlidingWindow != 0))
4659 {
4660 hucVdencBrcInitDmem->SlidingWindow_Size_U32 = m_hevcSeqParams->SlidingWindowSize;
4661 hucVdencBrcInitDmem->SLIDINGWINDOW_MaxRateRatio = m_hevcSeqParams->MaxBitRatePerSlidingWindow * 100 / m_hevcSeqParams->TargetBitRate;
4662 }
4663 else
4664 {
4665 if (m_hevcSeqParams->FrameRate.Denominator == 0)
4666 {
4667 CODECHAL_ENCODE_ASSERTMESSAGE("FrameRate.Deminator is zero!");
4668 return MOS_STATUS_INVALID_PARAMETER;
4669 }
4670 uint32_t framerate = m_hevcSeqParams->FrameRate.Numerator / m_hevcSeqParams->FrameRate.Denominator;
4671 hucVdencBrcInitDmem->SlidingWindow_Size_U32 = MOS_MIN(framerate, 60);
4672 hucVdencBrcInitDmem->SLIDINGWINDOW_MaxRateRatio = 120;
4673 }
4674
4675 // Tile Row based BRC
4676 if (m_enableTileReplay)
4677 {
4678 uint32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
4679 uint32_t residual = (1 << shift) - 1;
4680 hucVdencBrcInitDmem->SlideWindowRC = 0; //Reserved for now
4681 hucVdencBrcInitDmem->MaxLogCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
4682 hucVdencBrcInitDmem->FrameWidthInLCU = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift;
4683 hucVdencBrcInitDmem->FrameHeightInLCU = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift;
4684 }
4685
4686 // Long term reference
4687 hucVdencBrcInitDmem->LongTermRefEnable_U8 = true;
4688 hucVdencBrcInitDmem->LongTermRefMsdk_U8 = true;
4689 hucVdencBrcInitDmem->IsLowDelay_U8 = m_lowDelay;
4690
4691 hucVdencBrcInitDmem->LookaheadDepth_U8 = m_lookaheadDepth;
4692
4693 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx]);
4694
4695 return eStatus;
4696 }
4697
SetConstDataHuCBrcUpdate()4698 MOS_STATUS CodechalVdencHevcStateG12::SetConstDataHuCBrcUpdate()
4699 {
4700 CODECHAL_ENCODE_FUNCTION_ENTER;
4701
4702 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4703
4704 MOS_LOCK_PARAMS lockFlagsWriteOnly;
4705 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
4706 lockFlagsWriteOnly.WriteOnly = true;
4707
4708 auto hucConstData = (PCODECHAL_VDENC_HEVC_HUC_BRC_CONSTANT_DATA_G12)m_osInterface->pfnLockResource(
4709 m_osInterface, &m_vdencBrcConstDataBuffer[m_currRecycledBufIdx], &lockFlagsWriteOnly);
4710 CODECHAL_ENCODE_CHK_NULL_RETURN(hucConstData);
4711
4712 MOS_SecureMemcpy(hucConstData->SLCSZ_THRDELTAI_U16, sizeof(m_hucConstantData), m_hucConstantData, sizeof(m_hucConstantData));
4713
4714 MOS_SecureMemcpy(hucConstData->RDQPLambdaI, sizeof(m_rdQpLambdaI), m_rdQpLambdaI, sizeof(m_rdQpLambdaI));
4715 MOS_SecureMemcpy(hucConstData->RDQPLambdaP, sizeof(m_rdQpLambdaP), m_rdQpLambdaP, sizeof(m_rdQpLambdaP));
4716
4717 if (m_hevcVisualQualityImprovement)
4718 {
4719 MOS_SecureMemcpy(hucConstData->SADQPLambdaI, sizeof(m_sadQpLambdaI), m_sadQpLambdaI_VQI, sizeof(m_sadQpLambdaI_VQI));
4720 MOS_SecureMemcpy(hucConstData->PenaltyForIntraNonDC32x32PredMode, sizeof(m_penaltyForIntraNonDC32x32PredMode), m_penaltyForIntraNonDC32x32PredMode_VQI, sizeof(m_penaltyForIntraNonDC32x32PredMode_VQI));
4721 }
4722 else
4723 {
4724 MOS_SecureMemcpy(hucConstData->SADQPLambdaI, sizeof(m_sadQpLambdaI), m_sadQpLambdaI, sizeof(m_sadQpLambdaI));
4725 MOS_SecureMemcpy(hucConstData->PenaltyForIntraNonDC32x32PredMode, sizeof(m_penaltyForIntraNonDC32x32PredMode), m_penaltyForIntraNonDC32x32PredMode, sizeof(m_penaltyForIntraNonDC32x32PredMode));
4726 }
4727
4728 MOS_SecureMemcpy(hucConstData->SADQPLambdaP, sizeof(m_sadQpLambdaP), m_sadQpLambdaP, sizeof(m_sadQpLambdaP));
4729
4730 if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)
4731 {
4732 const int numEstrateThreshlds = 7;
4733
4734 for (int i = 0; i < numEstrateThreshlds + 1; i++)
4735 {
4736 for (int j = 0; j < m_numDevThreshlds + 1; j++)
4737 {
4738 hucConstData->FrmSzAdjTabI_S8[(numEstrateThreshlds + 1)*j + i] = m_lowdelayDeltaFrmszI[j][i];
4739 hucConstData->FrmSzAdjTabP_S8[(numEstrateThreshlds + 1)*j + i] = m_lowdelayDeltaFrmszP[j][i];
4740 hucConstData->FrmSzAdjTabB_S8[(numEstrateThreshlds + 1)*j + i] = m_lowdelayDeltaFrmszB[j][i];
4741 }
4742 }
4743 }
4744
4745 // ModeCosts depends on frame type
4746 if (m_pictureCodingType == I_TYPE)
4747 {
4748 MOS_SecureMemcpy(hucConstData->ModeCosts, sizeof(m_hucModeCostsIFrame), m_hucModeCostsIFrame, sizeof(m_hucModeCostsIFrame));
4749 }
4750 else
4751 {
4752 MOS_SecureMemcpy(hucConstData->ModeCosts, sizeof(m_hucModeCostsPbFrame), m_hucModeCostsPbFrame, sizeof(m_hucModeCostsPbFrame));
4753 }
4754
4755 // starting location in batch buffer for each slice
4756 uint32_t baseLocation = m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
4757 uint32_t currentLocation = baseLocation;
4758
4759 auto slcData = m_slcData;
4760 // HCP_WEIGHTSOFFSETS_STATE + HCP_SLICE_STATE + HCP_PAK_INSERT_OBJECT + VDENC_WEIGHT_OFFSETS_STATE
4761 for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
4762 {
4763 auto hevcSlcParams = &m_hevcSliceParams[slcCount];
4764 // HuC FW require unit in Bytes
4765 hucConstData->Slice[slcCount].SizeOfCMDs
4766 = (uint16_t)(m_hwInterface->m_vdencBatchBufferPerSliceConstSize + m_vdencBatchBufferPerSliceVarSize[slcCount]);
4767
4768 // HCP_WEIGHTOFFSET_STATE cmd
4769 if (m_hevcVdencWeightedPredEnabled)
4770 {
4771 // 1st HCP_WEIGHTOFFSET_STATE cmd - P & B
4772 if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_P_SLICE || hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
4773 {
4774 hucConstData->Slice[slcCount].HcpWeightOffsetL0_StartInBytes = (uint16_t)currentLocation; // HCP_WEIGHTOFFSET_L0 starts in byte from beginning of the SLB. 0xFFFF means unavailable in SLB
4775 currentLocation += m_hcpWeightOffsetStateCmdSize;
4776 }
4777
4778 // 2nd HCP_WEIGHTOFFSET_STATE cmd - B
4779 if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
4780 {
4781 hucConstData->Slice[slcCount].HcpWeightOffsetL1_StartInBytes = (uint16_t)currentLocation; // HCP_WEIGHTOFFSET_L1 starts in byte from beginning of the SLB. 0xFFFF means unavailable in SLB
4782 currentLocation += m_hcpWeightOffsetStateCmdSize;
4783 }
4784 }
4785 else
4786 {
4787 // 0xFFFF means unavailable in SLB
4788 hucConstData->Slice[slcCount].HcpWeightOffsetL0_StartInBytes = 0xFFFF;
4789 hucConstData->Slice[slcCount].HcpWeightOffsetL1_StartInBytes = 0xFFFF;
4790 }
4791
4792 // HCP_SLICE_STATE cmd
4793 hucConstData->Slice[slcCount].SliceState_StartInBytes = (uint16_t)currentLocation; // HCP_WEIGHTOFFSET is not needed
4794 currentLocation += m_hcpSliceStateCmdSize;
4795
4796 // VDENC_WEIGHT_OFFSETS_STATE cmd
4797 hucConstData->Slice[slcCount].VdencWeightOffset_StartInBytes // VdencWeightOffset cmd is the last one expect BatchBufferEnd cmd
4798 = (uint16_t)(baseLocation + hucConstData->Slice[slcCount].SizeOfCMDs - m_vdencWeightOffsetStateCmdSize - m_miBatchBufferEndCmdSize - ENCODE_VDENC_HEVC_PADDING_DW_SIZE * 4);
4799
4800 // logic from PakInsertObject cmd
4801 uint32_t bitSize = (m_hevcSeqParams->SliceSizeControl) ? (hevcSlcParams->BitLengthSliceHeaderStartingPortion) : slcData[slcCount].BitSize; // 40 for HEVC VDEnc Dynamic Slice
4802 uint32_t byteSize = (bitSize + 7) >> 3;
4803 uint32_t sliceHeaderSizeInBytes = (bitSize + 7) >> 3;
4804 // 1st PakInsertObject cmd with AU, SPS, PPS headers only exists for the first slice
4805 if (slcCount == 0)
4806 {
4807 // assumes that there is no 3rd PakInsertObject cmd for SSC
4808 currentLocation += m_1stPakInsertObjectCmdSize;
4809 }
4810
4811 hucConstData->Slice[slcCount].SliceHeaderPIO_StartInBytes = (uint16_t)currentLocation;
4812
4813 // HuC FW requires true slice header size in bits without byte alignment
4814 hucConstData->Slice[slcCount].SliceHeader_SizeInBits = (uint16_t)(sliceHeaderSizeInBytes * 8);
4815 if (!IsFirstPass())
4816 {
4817 PBSBuffer bsBuffer = &m_bsBuffer;
4818 CODECHAL_ENCODE_CHK_NULL_RETURN(bsBuffer);
4819 CODECHAL_ENCODE_CHK_NULL_RETURN(bsBuffer->pBase);
4820 uint8_t *sliceHeaderLastByte = (uint8_t*)(bsBuffer->pBase + slcData[slcCount].SliceOffset + sliceHeaderSizeInBytes - 1);
4821 for (auto i = 0; i < 8; i++)
4822 {
4823 uint8_t mask = 1 << i;
4824 if (*sliceHeaderLastByte & mask)
4825 {
4826 hucConstData->Slice[slcCount].SliceHeader_SizeInBits -= (i + 1);
4827 break;
4828 }
4829 }
4830 }
4831
4832 if (m_hevcVdencWeightedPredEnabled)
4833 {
4834 hucConstData->Slice[slcCount].WeightTable_StartInBits = (uint16_t)hevcSlcParams->PredWeightTableBitOffset;
4835 hucConstData->Slice[slcCount].WeightTable_EndInBits = (uint16_t)(hevcSlcParams->PredWeightTableBitOffset + (hevcSlcParams->PredWeightTableBitLength));
4836 }
4837 else
4838 {
4839 // number of bits from beginning of slice header, 0xffff means not awailable
4840 hucConstData->Slice[slcCount].WeightTable_StartInBits = 0xFFFF;
4841 hucConstData->Slice[slcCount].WeightTable_EndInBits = 0xFFFF;
4842 }
4843
4844 baseLocation += hucConstData->Slice[slcCount].SizeOfCMDs;
4845 currentLocation = baseLocation;
4846 }
4847
4848 if (m_lookaheadDepth > 0)
4849 {
4850 hucConstData->UPD_LA_TargetFulness_U32 = m_targetBufferFulness;
4851
4852 uint8_t QpStrength = (uint8_t)(m_hevcPicParams->QpModulationStrength + (m_hevcPicParams->QpModulationStrength >> 1));
4853 if (!m_initDeltaQP)
4854 {
4855 hucConstData->UPD_deltaQP = (m_prevQpModulationStrength + QpStrength + 1) >> 1;
4856 }
4857 else
4858 {
4859 hucConstData->UPD_deltaQP = QpStrength;
4860
4861 if (IsLastPass())
4862 {
4863 m_initDeltaQP = false;
4864 }
4865 }
4866
4867 m_prevQpModulationStrength = hucConstData->UPD_deltaQP;
4868 }
4869
4870 hucConstData->UPD_TR_TargetSize_U32 = m_hevcPicParams->TargetFrameSize << 3;// byte to bit
4871 hucConstData->UPD_TCBRC_SCENARIO_U8 = m_tcbrcQualityBoost;
4872
4873 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcConstDataBuffer[m_currRecycledBufIdx]);
4874
4875 return eStatus;
4876 }
4877
SetDmemHuCBrcUpdate()4878 MOS_STATUS CodechalVdencHevcStateG12::SetDmemHuCBrcUpdate()
4879 {
4880 CODECHAL_ENCODE_FUNCTION_ENTER;
4881
4882 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4883
4884 MOS_LOCK_PARAMS lockFlagsWriteOnly;
4885 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
4886 lockFlagsWriteOnly.WriteOnly = true;
4887 uint32_t currentPass = m_enableTileReplay ? m_CurrentPassForOverAll : GetCurrentPass();
4888
4889 // Program update DMEM
4890 auto hucVdencBrcUpdateDmem = (PCODECHAL_VDENC_HEVC_HUC_BRC_UPDATE_DMEM_G12)m_osInterface->pfnLockResource(
4891 m_osInterface, &m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][currentPass], &lockFlagsWriteOnly);
4892 CODECHAL_ENCODE_CHK_NULL_RETURN(hucVdencBrcUpdateDmem);
4893 MOS_ZeroMemory(hucVdencBrcUpdateDmem, sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_UPDATE_DMEM_G12));
4894
4895 hucVdencBrcUpdateDmem->TARGETSIZE_U32 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)? m_hevcSeqParams->InitVBVBufferFullnessInBit :
4896 MOS_MIN(m_hevcSeqParams->InitVBVBufferFullnessInBit, m_hevcSeqParams->VBVBufferSizeInBit);
4897 hucVdencBrcUpdateDmem->FrameID_U32 = m_storeData; // frame number
4898 MOS_SecureMemcpy(hucVdencBrcUpdateDmem->startGAdjFrame_U16, 4 * sizeof(uint16_t), (void*)m_startGAdjFrame, 4 * sizeof(uint16_t));
4899 hucVdencBrcUpdateDmem->TargetSliceSize_U16 = (uint16_t)m_hevcPicParams->MaxSliceSizeInBytes;
4900 auto slbSliceSize = (m_hwInterface->m_vdenc2ndLevelBatchBufferSize - m_hwInterface->m_vdencBatchBuffer1stGroupSize -
4901 m_hwInterface->m_vdencBatchBuffer2ndGroupSize) / ENCODE_HEVC_VDENC_NUM_MAX_SLICES;
4902 hucVdencBrcUpdateDmem->SLB_Data_SizeInBytes = (uint16_t)(slbSliceSize * m_numSlices +
4903 m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize);
4904 hucVdencBrcUpdateDmem->PIPE_MODE_SELECT_StartInBytes = 0xFFFF; // HuC need not need to modify the pipe mode select command in Gen11+
4905 hucVdencBrcUpdateDmem->CMD1_StartInBytes = (uint16_t)m_hwInterface->m_vdencBatchBuffer1stGroupSize;
4906 hucVdencBrcUpdateDmem->PIC_STATE_StartInBytes = (uint16_t)m_picStateCmdStartInBytes;
4907 hucVdencBrcUpdateDmem->CMD2_StartInBytes = (uint16_t)m_cmd2StartInBytes;
4908
4909 if (m_prevStoreData != m_storeData)
4910 {
4911 m_prevStoreData = m_storeData;
4912
4913 int32_t oldestIdx = -1;
4914 int32_t selectedSlot = -1;
4915 uint32_t oldestAge = 0;
4916 for (int i = 0; i < CODECHAL_ENCODE_HEVC_VDENC_WP_DATA_BLOCK_NUMBER; i++)
4917 {
4918 if (slotInfo[i].isUsed == true && slotInfo[i].isRef)
4919 {
4920 slotInfo[i].age++;
4921 if (slotInfo[i].age >= oldestAge)
4922 {
4923 oldestAge = slotInfo[i].age;
4924 oldestIdx = i;
4925 }
4926 }
4927 if ((selectedSlot == -1) && (slotInfo[i].isUsed == false || !slotInfo[i].isRef))
4928 {
4929 selectedSlot = i;
4930 }
4931 }
4932
4933 if (selectedSlot == -1)
4934 {
4935 selectedSlot = oldestIdx;
4936 }
4937
4938 if (selectedSlot == -1)
4939 {
4940 CODECHAL_ENCODE_ASSERTMESSAGE("No valid ref slot index");
4941 return MOS_STATUS_INVALID_PARAMETER;
4942 }
4943
4944 slotInfo[selectedSlot].age = 0;
4945 slotInfo[selectedSlot].poc = m_hevcPicParams->CurrPicOrderCnt;
4946 slotInfo[selectedSlot].isUsed = true;
4947 slotInfo[selectedSlot].isRef = m_hevcPicParams->bUsedAsRef;
4948
4949 m_curPicSlot = selectedSlot;
4950 }
4951
4952 hucVdencBrcUpdateDmem->Current_Data_Offset = m_curPicSlot * m_weightHistSize;
4953
4954 for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++)
4955 {
4956 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][refIdx];
4957 auto refPOC = m_hevcPicParams->RefFramePOCList[refPic.FrameIdx];
4958 for (int i = 0; i < CODECHAL_ENCODE_HEVC_VDENC_WP_DATA_BLOCK_NUMBER; i++)
4959 {
4960 if (slotInfo[i].poc == refPOC)
4961 {
4962 hucVdencBrcUpdateDmem->Ref_Data_Offset[refIdx] = i * m_weightHistSize;
4963 break;
4964 }
4965 }
4966 }
4967
4968 for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l1_active_minus1; refIdx++)
4969 {
4970 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_1][refIdx];
4971 auto refPOC = m_hevcPicParams->RefFramePOCList[refPic.FrameIdx];
4972 for (int i = 0; i < CODECHAL_ENCODE_HEVC_VDENC_WP_DATA_BLOCK_NUMBER; i++)
4973 {
4974 if (slotInfo[i].poc == refPOC)
4975 {
4976 hucVdencBrcUpdateDmem->Ref_Data_Offset[refIdx + m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1] = i * m_weightHistSize;
4977 break;
4978 }
4979 }
4980 }
4981
4982 hucVdencBrcUpdateDmem->MaxNumSliceAllowed_U16 = (uint16_t)GetMaxAllowedSlices(m_hevcSeqParams->Level);
4983
4984 if (m_FrameLevelBRCForTileRow)
4985 {
4986 hucVdencBrcUpdateDmem->OpMode_U8 = 0x4;
4987 }
4988 else if (m_TileRowLevelBRC)
4989 {
4990 hucVdencBrcUpdateDmem->OpMode_U8 = 0x8;
4991 }
4992 else
4993 {
4994 hucVdencBrcUpdateDmem->OpMode_U8 // 1: BRC (including ACQP), 2: Weighted prediction (should not be enabled in first pass)
4995 = (m_hevcVdencWeightedPredEnabled && m_hevcPicParams->bEnableGPUWeightedPrediction && !IsFirstPass()) ? 3 : 1; // 01: BRC, 10: WP never used, 11: BRC + WP
4996 }
4997
4998 bool bAllowedPyramid = m_hevcSeqParams->GopRefDist != 3;
4999
5000 if (m_pictureCodingType == I_TYPE)
5001 {
5002 hucVdencBrcUpdateDmem->CurrentFrameType_U8 = HEVC_BRC_FRAME_TYPE_I;
5003 }
5004 else if (m_hevcSeqParams->HierarchicalFlag && bAllowedPyramid)
5005 {
5006 if (m_hevcPicParams->HierarchLevelPlus1 > 0)
5007 {
5008 std::map<int, HEVC_BRC_FRAME_TYPE> hierchLevelPlus1_to_brclevel{
5009 {1, HEVC_BRC_FRAME_TYPE_P_OR_LB},
5010 {2, HEVC_BRC_FRAME_TYPE_B},
5011 {3, HEVC_BRC_FRAME_TYPE_B1},
5012 {4, HEVC_BRC_FRAME_TYPE_B2}};
5013 hucVdencBrcUpdateDmem->CurrentFrameType_U8 = hierchLevelPlus1_to_brclevel.count(m_hevcPicParams->HierarchLevelPlus1) ? hierchLevelPlus1_to_brclevel[m_hevcPicParams->HierarchLevelPlus1] : HEVC_BRC_FRAME_TYPE_INVALID;
5014 //Invalid HierarchLevelPlus1 or LBD frames at level 3 eror check.
5015 if ((hucVdencBrcUpdateDmem->CurrentFrameType_U8 == HEVC_BRC_FRAME_TYPE_INVALID) ||
5016 (m_hevcSeqParams->LowDelayMode && hucVdencBrcUpdateDmem->CurrentFrameType_U8 == HEVC_BRC_FRAME_TYPE_B2))
5017 {
5018 CODECHAL_ENCODE_ASSERTMESSAGE("HEVC_BRC_FRAME_TYPE_INVALID or LBD picture doesn't support Level 4\n");
5019 return MOS_STATUS_INVALID_PARAMETER;
5020 }
5021 }
5022 else if(!m_hevcSeqParams->LowDelayMode) //RA
5023 {
5024 //if L0/L1 both points to previous frame, then its LBD otherwise its is level 1 RA B.
5025 auto B_or_LDB_brclevel = m_lowDelay ? HEVC_BRC_FRAME_TYPE_P_OR_LB : HEVC_BRC_FRAME_TYPE_B;
5026 std::map<int, HEVC_BRC_FRAME_TYPE> codingtype_to_brclevel{
5027 {P_TYPE, HEVC_BRC_FRAME_TYPE_P_OR_LB},
5028 {B_TYPE, B_or_LDB_brclevel},
5029 {B1_TYPE, HEVC_BRC_FRAME_TYPE_B1},
5030 {B2_TYPE, HEVC_BRC_FRAME_TYPE_B2}};
5031 hucVdencBrcUpdateDmem->CurrentFrameType_U8 = codingtype_to_brclevel.count(m_pictureCodingType) ? codingtype_to_brclevel[m_pictureCodingType] : HEVC_BRC_FRAME_TYPE_INVALID;
5032 //Invalid CodingType.
5033 if (hucVdencBrcUpdateDmem->CurrentFrameType_U8 == HEVC_BRC_FRAME_TYPE_INVALID)
5034 {
5035 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid CodingType\n");
5036 return MOS_STATUS_INVALID_PARAMETER;
5037 }
5038 }
5039 else //LDB
5040 {
5041 hucVdencBrcUpdateDmem->CurrentFrameType_U8 = HEVC_BRC_FRAME_TYPE_P_OR_LB; //No Hierarchical info for LDB, treated as flat case
5042 }
5043 }
5044 else // FlatB or LDB
5045 {
5046 hucVdencBrcUpdateDmem->CurrentFrameType_U8 = m_lowDelay ? HEVC_BRC_FRAME_TYPE_P_OR_LB : HEVC_BRC_FRAME_TYPE_B;
5047 }
5048
5049 // Num_Ref_L1 should be always same as Num_Ref_L0
5050 hucVdencBrcUpdateDmem->Num_Ref_L0_U8 = m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1;
5051 hucVdencBrcUpdateDmem->Num_Ref_L1_U8 = m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1;
5052 hucVdencBrcUpdateDmem->Num_Slices = (uint8_t)m_hevcPicParams->NumSlices;
5053
5054 // CQP_QPValue_U8 setting is needed since ACQP is also part of ICQ
5055 hucVdencBrcUpdateDmem->CQP_QPValue_U8 = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
5056 hucVdencBrcUpdateDmem->CQP_FracQP_U8 = 0;
5057 if (m_hevcPicParams->BRCPrecision == 1)
5058 {
5059 hucVdencBrcUpdateDmem->MaxNumPass_U8 = 1;
5060 }
5061 else
5062 {
5063 hucVdencBrcUpdateDmem->MaxNumPass_U8 = CODECHAL_VDENC_BRC_NUM_OF_PASSES;
5064 }
5065
5066 MOS_SecureMemcpy(hucVdencBrcUpdateDmem->gRateRatioThreshold_U8, 7 * sizeof(uint8_t), (void*)m_rateRatioThreshold, 7 * sizeof(uint8_t));
5067 MOS_SecureMemcpy(hucVdencBrcUpdateDmem->startGAdjMult_U8, 5 * sizeof(uint8_t), (void*)m_startGAdjMult, 5 * sizeof(uint8_t));
5068 MOS_SecureMemcpy(hucVdencBrcUpdateDmem->startGAdjDiv_U8, 5 * sizeof(uint8_t), (void*)m_startGAdjDiv, 5 * sizeof(uint8_t));
5069 MOS_SecureMemcpy(hucVdencBrcUpdateDmem->gRateRatioThresholdQP_U8, 8 * sizeof(uint8_t), (void*)m_rateRatioThresholdQP, 8 * sizeof(uint8_t));
5070
5071 hucVdencBrcUpdateDmem->IPAverageCoeff_U8 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW) ? 0 : 64;
5072 hucVdencBrcUpdateDmem->CurrentPass_U8 = (uint8_t)currentPass;
5073
5074 if ((m_hevcVdencAcqpEnabled && m_hevcSeqParams->QpAdjustment) || (m_brcEnabled && (m_hevcSeqParams->MBBRC != 2)))
5075 {
5076 hucVdencBrcUpdateDmem->DeltaQPForSadZone0_S8 = -1;
5077 hucVdencBrcUpdateDmem->DeltaQPForSadZone1_S8 = 0;
5078 hucVdencBrcUpdateDmem->DeltaQPForSadZone2_S8 = 1;
5079 hucVdencBrcUpdateDmem->DeltaQPForSadZone3_S8 = 2;
5080 hucVdencBrcUpdateDmem->DeltaQPForMvZero_S8 = 3;
5081 hucVdencBrcUpdateDmem->DeltaQPForMvZone0_S8 = -2;
5082 hucVdencBrcUpdateDmem->DeltaQPForMvZone1_S8 = 0;
5083 hucVdencBrcUpdateDmem->DeltaQPForMvZone2_S8 = 2;
5084 }
5085
5086 if (m_hevcVdencWeightedPredEnabled)
5087 {
5088 hucVdencBrcUpdateDmem->LumaLog2WeightDenom_S8 = 6;
5089 hucVdencBrcUpdateDmem->ChromaLog2WeightDenom_S8 = 6;
5090 }
5091
5092 // chroma weights are not confirmed to be supported from HW team yet
5093 hucVdencBrcUpdateDmem->DisabledFeature_U8 = 0; // bit mask, 1 (bit0): disable chroma weight setting
5094
5095 hucVdencBrcUpdateDmem->SlidingWindow_Enable_U8 = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_LOW);
5096 hucVdencBrcUpdateDmem->LOG_LCU_Size_U8 = 6;
5097 hucVdencBrcUpdateDmem->ReEncodePositiveQPDeltaThr_S8 = 4;
5098 hucVdencBrcUpdateDmem->ReEncodeNegativeQPDeltaThr_S8 = -5;
5099 hucVdencBrcUpdateDmem->SceneChgPrevIntraPctThreshold_U8 = 96;
5100 hucVdencBrcUpdateDmem->SceneChgCurIntraPctThreshold_U8 = 192;
5101
5102 // SCC is in conflict with PAK only pass
5103 if (m_enableSCC)
5104 {
5105 hucVdencBrcUpdateDmem->ReEncodePositiveQPDeltaThr_S8 = 0;
5106 hucVdencBrcUpdateDmem->ReEncodeNegativeQPDeltaThr_S8 = 0;
5107 }
5108
5109 // reset skip frame statistics
5110 m_numSkipFrames = 0;
5111 m_sizeSkipFrames = 0;
5112
5113 // For tile row based BRC
5114 if (m_TileRowLevelBRC)
5115 {
5116 hucVdencBrcUpdateDmem->MaxNumTileHuCCallMinus1 = m_hevcPicParams->num_tile_rows_minus1;
5117 hucVdencBrcUpdateDmem->TileHucCallIndex = (uint8_t)m_CurrentTileRow;
5118 hucVdencBrcUpdateDmem->TileHuCCallPassIndex = m_CurrentPassForTileReplay + 1;
5119 hucVdencBrcUpdateDmem->TileHuCCallPassMax = m_NumPassesForTileReplay;
5120
5121 // Need change App to pass real max bit rate rather than to enlarge it with 1000
5122 if (m_hevcSeqParams->FrameRate.Numerator)
5123 {
5124 hucVdencBrcUpdateDmem->TxSizeInBitsPerFrame = (uint32_t)(((uint32_t)m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS *
5125 m_hevcSeqParams->FrameRate.Denominator + (m_hevcSeqParams->FrameRate.Numerator >> 1)) /
5126 m_hevcSeqParams->FrameRate.Numerator);
5127 }
5128 else
5129 {
5130 hucVdencBrcUpdateDmem->TxSizeInBitsPerFrame = (uint32_t)(((uint32_t)m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS + 15) / 30);
5131 }
5132
5133 uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
5134 uint32_t startIdx = m_CurrentTileRow * numTileColumns;
5135 uint32_t endIdx = startIdx + numTileColumns - 1;
5136 uint32_t LCUsInTile = 0;
5137
5138 for (uint32_t idx = 0; idx < numTileColumns; idx ++)
5139 {
5140 LCUsInTile += m_hevcPicParams->tile_row_height[m_CurrentTileRow] * m_hevcPicParams->tile_column_width[idx];
5141 }
5142
5143 hucVdencBrcUpdateDmem->StartTileIdx = (uint8_t)startIdx;
5144 hucVdencBrcUpdateDmem->EndTileIdx = (uint8_t)endIdx;
5145 hucVdencBrcUpdateDmem->TileSizeInLCU = (uint16_t)LCUsInTile;
5146 }
5147 else if (m_FrameLevelBRCForTileRow)
5148 {
5149 hucVdencBrcUpdateDmem->MaxNumTileHuCCallMinus1 = m_hevcPicParams->num_tile_rows_minus1;
5150 hucVdencBrcUpdateDmem->TileHucCallIndex = 0;
5151 hucVdencBrcUpdateDmem->TileHuCCallPassIndex = 0;
5152 hucVdencBrcUpdateDmem->TileHuCCallPassMax = m_NumPassesForTileReplay;
5153
5154 // Need change App to pass real max bit rate rather than to enlarge it with 1000
5155 if (m_hevcSeqParams->FrameRate.Numerator)
5156 {
5157 hucVdencBrcUpdateDmem->TxSizeInBitsPerFrame = (uint32_t)(((uint32_t)m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS *
5158 m_hevcSeqParams->FrameRate.Denominator + (m_hevcSeqParams->FrameRate.Numerator >> 1)) /
5159 m_hevcSeqParams->FrameRate.Numerator);
5160 }
5161 else
5162 {
5163 hucVdencBrcUpdateDmem->TxSizeInBitsPerFrame = (uint32_t)(((uint32_t)m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS + 15) / 30);
5164 }
5165 }
5166
5167 // Long term reference
5168 hucVdencBrcUpdateDmem->IsLongTermRef = CodecHal_PictureIsLongTermRef(m_currReconstructedPic);
5169 hucVdencBrcUpdateDmem->UPD_CQMEnabled_U8 = m_hevcSeqParams->scaling_list_enable_flag || m_hevcPicParams->scaling_list_data_present_flag;
5170
5171 if (m_lookaheadDepth > 0)
5172 {
5173 hucVdencBrcUpdateDmem->EnableLookAhead = 1;
5174 }
5175 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][currentPass]);
5176
5177 return eStatus;
5178 }
5179
SetRegionsHuCBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)5180 MOS_STATUS CodechalVdencHevcStateG12::SetRegionsHuCBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
5181 {
5182 CODECHAL_ENCODE_FUNCTION_ENTER;
5183
5184 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5185
5186 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::SetRegionsHuCBrcUpdate(virtualAddrParams));
5187
5188 // With multiple tiles, ensure that HuC BRC kernel is fed with vdenc frame level statistics from HuC PAK Int kernel
5189 // Applicable for scalable/ non-scalable mode
5190 if (m_hevcPicParams->tiles_enabled_flag)
5191 {
5192 virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 1 VDEnc Statistics Buffer (Input) - VDENC_HEVC_VP9_FRAME_BASED_STATISTICS_STREAMOUT
5193 virtualAddrParams->regionParams[1].dwOffset = m_hevcFrameStatsOffset.uiVdencStatistics;
5194 }
5195
5196 if (m_numPipe > 1)
5197 {
5198 virtualAddrParams->regionParams[2].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 2 PAK Statistics Buffer (Input) - MFX_PAK_FRAME_STATISTICS
5199 virtualAddrParams->regionParams[2].dwOffset = m_hevcFrameStatsOffset.uiHevcPakStatistics;
5200 virtualAddrParams->regionParams[7].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 7 Slice Stat Streamout (Input)
5201 virtualAddrParams->regionParams[7].dwOffset = m_hevcFrameStatsOffset.uiHevcSliceStreamout;
5202 // In scalable-mode, use PAK Integration kernel output to get bistream size
5203 virtualAddrParams->regionParams[8].presRegion = &m_resBrcDataBuffer;
5204 }
5205
5206 // Tile reset case, use previous frame BRC data
5207 if ((m_numPipe != m_numPipePre) && IsFirstPass())
5208 {
5209 if (m_numPipePre > 1)
5210 {
5211 virtualAddrParams->regionParams[8].presRegion = &m_resBrcDataBuffer;
5212 }
5213 else
5214 {
5215 virtualAddrParams->regionParams[8].presRegion = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakInfo);
5216 }
5217 }
5218
5219 return eStatus;
5220 }
5221
SetRegionsHuCTileRowBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)5222 MOS_STATUS CodechalVdencHevcStateG12::SetRegionsHuCTileRowBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
5223 {
5224 CODECHAL_ENCODE_FUNCTION_ENTER;
5225
5226 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5227
5228 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::SetRegionsHuCBrcUpdate(virtualAddrParams));
5229
5230 // For tile replay, the tile based statistics is directly passed to HUC kernel
5231 virtualAddrParams->regionParams[1].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; // Region 1 � VDEnc Statistics Buffer (Input)
5232 virtualAddrParams->regionParams[1].dwOffset = m_hevcTileStatsOffset.uiVdencStatistics;
5233
5234 virtualAddrParams->regionParams[2].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; // Region 2 � PAK Statistics Buffer (Input)
5235 virtualAddrParams->regionParams[2].dwOffset = m_hevcTileStatsOffset.uiHevcPakStatistics;
5236
5237 virtualAddrParams->regionParams[7].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; // Region 7 � Slice Stat Streamout (Input)
5238 virtualAddrParams->regionParams[7].dwOffset = m_hevcTileStatsOffset.uiHevcSliceStreamout;
5239
5240 virtualAddrParams->regionParams[12].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource; // Region 12 � Tile encoded information (Input)
5241
5242 return eStatus;
5243 }
5244
SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceStateParams)5245 void CodechalVdencHevcStateG12::SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE& sliceStateParams)
5246 {
5247 CODECHAL_ENCODE_FUNCTION_ENTER;
5248
5249 CodechalVdencHevcState::SetHcpSliceStateCommonParams(sliceStateParams);
5250
5251 static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12 &>(sliceStateParams).dwNumPipe = m_numPipe;
5252
5253 if (m_enableSCC)
5254 {
5255 static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12 &>(sliceStateParams).ucRecNotFilteredID = m_slotForRecNotFiltered;
5256 }
5257 }
5258
SetHcpSliceStateParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceState,PCODEC_ENCODER_SLCDATA slcData,uint16_t slcCount,PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileCodingParams,bool lastSliceInTile,uint32_t idx)5259 void CodechalVdencHevcStateG12::SetHcpSliceStateParams(
5260 MHW_VDBOX_HEVC_SLICE_STATE& sliceState,
5261 PCODEC_ENCODER_SLCDATA slcData,
5262 uint16_t slcCount,
5263 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileCodingParams,
5264 bool lastSliceInTile,
5265 uint32_t idx)
5266 {
5267 CODECHAL_ENCODE_FUNCTION_ENTER;
5268
5269 CodechalEncodeHevcBase::SetHcpSliceStateParams(sliceState, slcData, slcCount);
5270
5271 sliceState.bLastSliceInTile = lastSliceInTile ? true : false;
5272 sliceState.bLastSliceInTileColumn = (lastSliceInTile & tileCodingParams[idx].IsLastTileofColumn) ? true : false;
5273 static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12&>(sliceState).pTileCodingParams = tileCodingParams + idx;
5274 static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12&>(sliceState).dwTileID = idx;
5275
5276 // update pass status
5277 if (m_enableTileReplay && m_FrameLevelBRCForTileRow)
5278 {
5279 sliceState.bFirstPass = true;
5280 sliceState.bLastPass = false;
5281 }
5282 else if (m_enableTileReplay && m_TileRowLevelBRC)
5283 {
5284 sliceState.bFirstPass = IsFirstPassForTileReplay();
5285 sliceState.bLastPass = IsLastPassForTileReplay();
5286 }
5287 }
5288
SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS & vdboxPipeModeSelectParams)5289 void CodechalVdencHevcStateG12::SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS& vdboxPipeModeSelectParams)
5290 {
5291 CODECHAL_ENCODE_FUNCTION_ENTER;
5292
5293 CodechalEncodeHevcBase::SetHcpPipeModeSelectParams(vdboxPipeModeSelectParams);
5294
5295 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12& pipeModeSelectParams = static_cast<MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12&>(vdboxPipeModeSelectParams);
5296
5297 if (m_numPipe > 1)
5298 {
5299 // Running in the multiple VDBOX mode
5300 if (IsFirstPipe())
5301 {
5302 pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT;
5303 }
5304 else if (IsLastPipe())
5305 {
5306 pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT;
5307 }
5308 else
5309 {
5310 pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE;
5311 }
5312 pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_CODEC_BE;
5313 }
5314 else
5315 {
5316 pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
5317 pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY;
5318 }
5319
5320 // In single pipe mode, if TileBasedReplayMode is enabled, the bit stream for each tile will not be continuous
5321 if (m_hevcPicParams->tiles_enabled_flag)
5322 {
5323 pipeModeSelectParams.bTileBasedReplayMode = m_enableTileReplay;
5324 }
5325 else
5326 {
5327 pipeModeSelectParams.bTileBasedReplayMode = 0;
5328 }
5329
5330 // To enable VDENC/PAK statistics stream out for BRC only
5331 // Is stream out needed for ACQP? check this out!
5332 pipeModeSelectParams.bBRCEnabled = m_hevcVdencAcqpEnabled || m_vdencBrcEnabled;
5333 }
5334
SetVdencPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS & vdboxPipeModeSelectParams)5335 void CodechalVdencHevcStateG12::SetVdencPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS& vdboxPipeModeSelectParams)
5336 {
5337 CODECHAL_ENCODE_FUNCTION_ENTER;
5338
5339 CodechalVdencHevcState::SetVdencPipeModeSelectParams(vdboxPipeModeSelectParams);
5340
5341 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12& pipeModeSelectParams = static_cast<MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12&>(vdboxPipeModeSelectParams);
5342
5343 // Enable RGB encoding
5344 pipeModeSelectParams.bRGBEncodingMode = m_RGBEncodingEnable;
5345
5346 // Capture mode enable
5347 pipeModeSelectParams.bWirelessEncodeEnabled = m_CaptureModeEnable;
5348 pipeModeSelectParams.ucWirelessSessionId = 0;
5349
5350 // Set random access flag
5351 pipeModeSelectParams.bIsRandomAccess = !m_lowDelay;
5352
5353 // Set lookahead pass flag
5354 pipeModeSelectParams.bLookaheadPass = m_lookaheadPass;
5355
5356 #ifdef _ENCODE_VDENC_RESERVED
5357 if (m_rsvdState)
5358 {
5359 m_rsvdState->SetVdencPipeModeSelectParams(pipeModeSelectParams);
5360 }
5361 #endif
5362
5363 if (m_enableSCC && (m_hevcPicParams->pps_curr_pic_ref_enabled_flag || m_hevcSeqParams->palette_mode_enabled_flag))
5364 {
5365 pipeModeSelectParams.bVdencPakObjCmdStreamOutEnable = false;
5366 }
5367 }
5368
SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams)5369 void CodechalVdencHevcStateG12::SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams)
5370 {
5371 CODECHAL_ENCODE_FUNCTION_ENTER;
5372
5373 CodechalEncodeHevcBase::SetHcpPipeBufAddrParams(pipeBufAddrParams);
5374
5375 //set MMC flag
5376 if (m_mmcState->IsMmcEnabled())
5377 {
5378 pipeBufAddrParams.bMmcEnabled = true;
5379 }
5380
5381 PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex];
5382 if (!Mos_ResourceIsNull(&tileStatisticsBuffer->sResource) && (m_numPipe > 1))
5383 {
5384 pipeBufAddrParams.presLcuBaseAddressBuffer = &tileStatisticsBuffer->sResource;
5385 pipeBufAddrParams.dwLcuStreamOutOffset = m_hevcTileStatsOffset.uiHevcSliceStreamout;
5386 pipeBufAddrParams.presFrameStatStreamOutBuffer = &tileStatisticsBuffer->sResource;
5387 pipeBufAddrParams.dwFrameStatStreamOutOffset = m_hevcTileStatsOffset.uiHevcPakStatistics;
5388 }
5389
5390 // SAO Row Store is GEN12 specific
5391 pipeBufAddrParams.presSaoRowStoreBuffer = &m_vdencSAORowStoreBuffer;
5392
5393 // Set up the recon not filtered surface for IBC
5394 if (m_enableSCC && m_hevcPicParams->pps_curr_pic_ref_enabled_flag)
5395 {
5396 // I frame is much simpler
5397 if (m_pictureCodingType == I_TYPE)
5398 {
5399 pipeBufAddrParams.presReferences[0] = &m_vdencRecNotFilteredBuffer;
5400 m_slotForRecNotFiltered = 0;
5401 }
5402 // B frame
5403 else
5404 {
5405 unsigned int i;
5406 // Find one available slot
5407 for (i = 0; i < CODECHAL_MAX_CUR_NUM_REF_FRAME_HEVC; i++)
5408 {
5409 if (pipeBufAddrParams.presReferences[i] == nullptr)
5410 {
5411 break;
5412 }
5413 }
5414
5415 CODECHAL_ENCODE_ASSERT(i < CODECHAL_MAX_CUR_NUM_REF_FRAME_HEVC);
5416
5417 //record the slot for HCP_REF_IDX_STATE
5418 m_slotForRecNotFiltered = (unsigned char)i;
5419 pipeBufAddrParams.presReferences[i] = &m_vdencRecNotFilteredBuffer;
5420 }
5421 }
5422 }
5423
SetHcpPicStateParams(MHW_VDBOX_HEVC_PIC_STATE & picStateParams)5424 void CodechalVdencHevcStateG12::SetHcpPicStateParams(MHW_VDBOX_HEVC_PIC_STATE& picStateParams)
5425 {
5426 CODECHAL_ENCODE_FUNCTION_ENTER;
5427
5428 CodechalEncodeHevcBase::SetHcpPicStateParams(picStateParams);
5429 if (m_enableSCC)
5430 {
5431 MHW_VDBOX_HEVC_PIC_STATE_G12& picStateParamsGen12 = dynamic_cast<MHW_VDBOX_HEVC_PIC_STATE_G12&>(picStateParams);
5432 picStateParamsGen12.ucRecNotFilteredID = m_slotForRecNotFiltered;
5433 picStateParamsGen12.IBCControl = m_enableLBCOnly ? SCC_IBC_CONTROL_IBC_ONLY_LBC_G12 : SCC_IBC_CONTROL_IBC_ENABLED_TBCLBC_G12;
5434 }
5435 }
5436
AddHcpRefIdxCmd(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_BATCH_BUFFER batchBuffer,PMHW_VDBOX_HEVC_SLICE_STATE params)5437 MOS_STATUS CodechalVdencHevcStateG12::AddHcpRefIdxCmd(
5438 PMOS_COMMAND_BUFFER cmdBuffer,
5439 PMHW_BATCH_BUFFER batchBuffer,
5440 PMHW_VDBOX_HEVC_SLICE_STATE params)
5441 {
5442 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5443
5444 CODECHAL_ENCODE_FUNCTION_ENTER;
5445
5446 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
5447 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcSliceParams);
5448 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcPicParams);
5449
5450 if (cmdBuffer == nullptr && batchBuffer == nullptr)
5451 {
5452 CODECHAL_ENCODE_ASSERTMESSAGE("There was no valid buffer to add the HW command to.");
5453 return MOS_STATUS_NULL_POINTER;
5454 }
5455
5456 PCODEC_HEVC_ENCODE_PICTURE_PARAMS hevcPicParams = params->pEncodeHevcPicParams;
5457 PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = params->pEncodeHevcSliceParams;
5458
5459 if ((hevcPicParams->pps_curr_pic_ref_enabled_flag) || (hevcSlcParams->slice_type != CODECHAL_ENCODE_HEVC_I_SLICE))
5460 {
5461 MHW_VDBOX_HEVC_REF_IDX_PARAMS_G12 refIdxParams;
5462
5463 refIdxParams.CurrPic = hevcPicParams->CurrReconstructedPic;
5464 refIdxParams.isEncode = true;
5465 refIdxParams.ucList = LIST_0;
5466 refIdxParams.ucNumRefForList = hevcSlcParams->num_ref_idx_l0_active_minus1 + 1;
5467 eStatus = MOS_SecureMemcpy(&refIdxParams.RefPicList, sizeof(refIdxParams.RefPicList),
5468 &hevcSlcParams->RefPicList, sizeof(hevcSlcParams->RefPicList));
5469 if (eStatus != MOS_STATUS_SUCCESS)
5470 {
5471 CODECHAL_ENCODE_ASSERTMESSAGE("Failed to copy memory.");
5472 return eStatus;
5473 }
5474
5475 refIdxParams.hevcRefList = (void**)m_refList;
5476 refIdxParams.poc_curr_pic = hevcPicParams->CurrPicOrderCnt;
5477 for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
5478 {
5479 refIdxParams.poc_list[i] = hevcPicParams->RefFramePOCList[i];
5480 }
5481
5482 refIdxParams.pRefIdxMapping = params->pRefIdxMapping;
5483 refIdxParams.RefFieldPicFlag = 0; // there is no interlaced support in encoder
5484 refIdxParams.RefBottomFieldFlag = 0; // there is no interlaced support in encoder
5485
5486 if (m_hevcPicParams->pps_curr_pic_ref_enabled_flag)
5487 {
5488 refIdxParams.bIBCEnabled = true;
5489 refIdxParams.ucRecNotFilteredID = m_slotForRecNotFiltered;
5490
5491 if ((m_hevcPicParams->CodingType == I_TYPE) && (m_hevcSliceParams->slice_type == MhwVdboxHcpInterface::hevcSliceP))
5492 {
5493 refIdxParams.ucNumRefForList = 0;
5494 }
5495 }
5496
5497 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpRefIdxStateCmd(cmdBuffer, batchBuffer, &refIdxParams));
5498
5499 if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
5500 {
5501 refIdxParams.ucList = LIST_1;
5502 refIdxParams.ucNumRefForList = hevcSlcParams->num_ref_idx_l1_active_minus1 + 1;
5503 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpRefIdxStateCmd(cmdBuffer, batchBuffer, &refIdxParams));
5504 }
5505 }
5506
5507 return eStatus;
5508 }
5509
SetVdencPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams)5510 void CodechalVdencHevcStateG12::SetVdencPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams)
5511 {
5512 CODECHAL_ENCODE_FUNCTION_ENTER;
5513
5514 CodechalVdencHevcState::SetVdencPipeBufAddrParams(pipeBufAddrParams);
5515
5516 PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex];
5517 if (!Mos_ResourceIsNull(&tileStatisticsBuffer->sResource))
5518 {
5519 pipeBufAddrParams.presVdencStreamOutBuffer = &tileStatisticsBuffer->sResource;
5520 pipeBufAddrParams.dwVdencStatsStreamOutOffset = m_hevcTileStatsOffset.uiVdencStatistics;
5521 }
5522
5523 // Set up the recon not filtered surface for IBC
5524 if (m_enableSCC && m_hevcPicParams->pps_curr_pic_ref_enabled_flag)
5525 {
5526 // I frame is much simpler
5527 if (m_pictureCodingType == I_TYPE)
5528 {
5529 pipeBufAddrParams.presVdencReferences[0] = &m_vdencRecNotFilteredBuffer;
5530 }
5531 // LDB
5532 else
5533 {
5534 unsigned int i;
5535
5536 // Find one available slot
5537 for (i = 0; i < CODECHAL_MAX_CUR_NUM_REF_FRAME_HEVC; i++)
5538 {
5539 if (pipeBufAddrParams.presVdencReferences[i] == nullptr)
5540 {
5541 break;
5542 }
5543 }
5544
5545 CODECHAL_ENCODE_ASSERT(i < CODECHAL_MAX_CUR_NUM_REF_FRAME_HEVC);
5546 if (i != 0)
5547 {
5548 pipeBufAddrParams.dwNumRefIdxL0ActiveMinus1 += 1;
5549 }
5550 pipeBufAddrParams.presVdencReferences[i] = &m_vdencRecNotFilteredBuffer;
5551 }
5552 }
5553
5554 pipeBufAddrParams.presVdencTileRowStoreBuffer = &m_vdencTileRowStoreBuffer;
5555 pipeBufAddrParams.presVdencCumulativeCuCountStreamoutSurface = &m_vdencCumulativeCuCountStreamoutSurface;
5556 pipeBufAddrParams.isLowDelayB = m_lowDelay;
5557 }
5558
SetKernelParams(EncOperation operation,MHW_KERNEL_PARAM * kernelParams)5559 MOS_STATUS CodechalVdencHevcStateG12::SetKernelParams(
5560 EncOperation operation,
5561 MHW_KERNEL_PARAM *kernelParams)
5562 {
5563 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5564
5565 CODECHAL_ENCODE_FUNCTION_ENTER;
5566
5567 CODECHAL_ENCODE_CHK_NULL_RETURN(kernelParams);
5568
5569 auto curbeAlignment = m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment();
5570
5571 kernelParams->iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
5572 kernelParams->iIdCount = 1;
5573
5574 switch (operation)
5575 {
5576 case VDENC_ME_P:
5577 case VDENC_ME_B:
5578 case VDENC_STREAMIN:
5579 case VDENC_STREAMIN_HEVC:
5580 case VDENC_STREAMIN_HEVC_RAB:
5581 kernelParams->iBTCount = CODECHAL_VDENC_HME_END_G12 - CODECHAL_VDENC_HME_BEGIN_G12;
5582 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MEDIA_OBJECT_HEVC_VP9_VDENC_ME_CURBE_G12), (size_t)curbeAlignment);
5583 kernelParams->iBlockWidth = 32;
5584 kernelParams->iBlockHeight = 32;
5585 break;
5586 default:
5587 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
5588 eStatus = MOS_STATUS_INVALID_PARAMETER;
5589 }
5590
5591 return eStatus;
5592 }
5593
SetBindingTable(EncOperation operation,PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable)5594 MOS_STATUS CodechalVdencHevcStateG12::SetBindingTable(
5595 EncOperation operation,
5596 PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable)
5597 {
5598 CODECHAL_ENCODE_FUNCTION_ENTER;
5599
5600 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5601
5602 CODECHAL_ENCODE_CHK_NULL_RETURN(bindingTable);
5603
5604 MOS_ZeroMemory(bindingTable, sizeof(*bindingTable));
5605
5606 switch (operation)
5607 {
5608 case VDENC_ME_P:
5609 case VDENC_ME_B:
5610 case VDENC_STREAMIN:
5611 case VDENC_STREAMIN_HEVC:
5612 case VDENC_STREAMIN_HEVC_RAB:
5613 bindingTable->dwNumBindingTableEntries = CODECHAL_VDENC_HME_END_G12 - CODECHAL_VDENC_HME_BEGIN_G12;
5614 bindingTable->dwBindingTableStartOffset = CODECHAL_VDENC_HME_BEGIN_G12;
5615 break;
5616 default:
5617 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
5618 return MOS_STATUS_INVALID_PARAMETER;
5619 }
5620
5621 for (uint32_t i = 0; i < bindingTable->dwNumBindingTableEntries; i++)
5622 {
5623 bindingTable->dwBindingTableEntries[i] = i;
5624 }
5625 return eStatus;
5626 }
5627
EncodeMeKernel(HmeLevel hmeLevel)5628 MOS_STATUS CodechalVdencHevcStateG12::EncodeMeKernel(HmeLevel hmeLevel)
5629 {
5630 CODECHAL_ENCODE_FUNCTION_ENTER;
5631
5632 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5633
5634 PMHW_KERNEL_STATE kernelState = nullptr;
5635 if(hmeLevel == HME_LEVEL_4x)
5636 {
5637 kernelState = m_lowDelay ? &m_vdencStreaminKernelState : &m_vdencStreaminKernelStateRAB;
5638 }
5639 else
5640 {
5641 kernelState = m_lowDelay ? &m_vdencMeKernelState : &m_vdencMeKernelStateRAB;
5642 }
5643 auto encFunctionType = (hmeLevel == HME_LEVEL_32x) ? CODECHAL_MEDIA_STATE_32X_ME :
5644 (hmeLevel == HME_LEVEL_16x) ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME;
5645
5646 // If Single Task Phase is not enabled, use BT count for the kernel state.
5647 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
5648 {
5649 uint32_t maxBtCount = m_singleTaskPhaseSupported ?
5650 m_maxBtCount : kernelState->KernelParams.iBTCount;
5651 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
5652 m_stateHeapInterface,
5653 maxBtCount));
5654 m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
5655 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
5656 }
5657
5658 // Set up the DSH/SSH as normal
5659 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
5660 m_stateHeapInterface,
5661 kernelState,
5662 false,
5663 0,
5664 false,
5665 m_storeData));
5666
5667 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
5668 MOS_ZeroMemory(&idParams, sizeof(idParams));
5669 idParams.pKernelState = kernelState;
5670 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
5671 m_stateHeapInterface,
5672 1,
5673 &idParams));
5674
5675 //Setup curbe for StreamIn Kernel
5676 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMeCurbe(hmeLevel));
5677
5678 CODECHAL_DEBUG_TOOL(
5679 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5680 encFunctionType,
5681 MHW_DSH_TYPE,
5682 kernelState));
5683 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
5684 encFunctionType,
5685 kernelState));
5686 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5687 encFunctionType,
5688 MHW_ISH_TYPE,
5689 kernelState));
5690 )
5691
5692 MOS_COMMAND_BUFFER cmdBuffer;
5693 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
5694
5695 SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
5696 sendKernelCmdsParams.EncFunctionType = encFunctionType;
5697 sendKernelCmdsParams.pKernelState = kernelState;
5698 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
5699
5700 // Add binding table
5701 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
5702 m_stateHeapInterface,
5703 kernelState));
5704
5705 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMeSurfaces(hmeLevel, &cmdBuffer));
5706
5707 // Dump SSH for ME kernel
5708 CODECHAL_DEBUG_TOOL(
5709 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
5710 encFunctionType,
5711 MHW_SSH_TYPE,
5712 kernelState)));
5713
5714 uint32_t scalingFactor = (hmeLevel == HME_LEVEL_32x) ? SCALE_FACTOR_32x :
5715 (hmeLevel == HME_LEVEL_16x) ? SCALE_FACTOR_16x : SCALE_FACTOR_4x;
5716
5717 uint32_t resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / scalingFactor);
5718 uint32_t resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scalingFactor);
5719
5720 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
5721 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
5722 walkerCodecParams.WalkerMode = m_walkerMode;
5723 walkerCodecParams.dwResolutionX = resolutionX;
5724 walkerCodecParams.dwResolutionY = resolutionY;
5725 walkerCodecParams.bNoDependency = true;
5726 walkerCodecParams.bMbaff = false;
5727 walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported;
5728 walkerCodecParams.ucGroupId = m_groupId;
5729
5730 MHW_WALKER_PARAMS walkerParams;
5731 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
5732 m_hwInterface,
5733 &walkerParams,
5734 &walkerCodecParams));
5735
5736 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
5737 &cmdBuffer,
5738 &walkerParams));
5739
5740 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
5741
5742 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5743 {
5744 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
5745 m_stateHeapInterface));
5746 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
5747 }
5748
5749 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
5750 &cmdBuffer,
5751 encFunctionType,
5752 nullptr)));
5753
5754 m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase);
5755
5756 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
5757
5758 MHW_MI_STORE_DATA_PARAMS storeDataParams;
5759
5760 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
5761 {
5762 m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
5763 m_lastTaskInPhase = false;
5764 }
5765 return eStatus;
5766 }
5767
SetMeCurbe(HmeLevel hmeLevel)5768 MOS_STATUS CodechalVdencHevcStateG12::SetMeCurbe(HmeLevel hmeLevel)
5769 {
5770 CODECHAL_ENCODE_FUNCTION_ENTER;
5771
5772 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5773
5774 CODECHAL_VDENC_HEVC_ME_CURBE_G12 curbe;
5775 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
5776 &curbe,
5777 sizeof(CODECHAL_VDENC_HEVC_ME_CURBE_G12),
5778 ME_CURBE_INIT_G12,
5779 sizeof(CODECHAL_VDENC_HEVC_ME_CURBE_G12)));
5780
5781 PMHW_KERNEL_STATE kernelState = nullptr;
5782 if(hmeLevel == HME_LEVEL_4x)
5783 {
5784 kernelState = m_lowDelay ? &m_vdencStreaminKernelState : &m_vdencStreaminKernelStateRAB;
5785 }
5786 else
5787 {
5788 kernelState = m_lowDelay ? &m_vdencMeKernelState : &m_vdencMeKernelStateRAB;
5789 }
5790
5791 bool useMvFromPrevStep;
5792 bool writeDistortions;
5793 uint32_t scaleFactor;
5794 uint32_t mvShiftFactor = 0;
5795 uint32_t prevMvReadPosFactor = 0;
5796
5797 switch (hmeLevel)
5798 {
5799 case HME_LEVEL_32x:
5800 useMvFromPrevStep = false;
5801 writeDistortions = false;
5802 scaleFactor = SCALE_FACTOR_32x;
5803 mvShiftFactor = 1;
5804 prevMvReadPosFactor = 0;
5805 break;
5806 case HME_LEVEL_16x:
5807 useMvFromPrevStep = (m_b32XMeEnabled) ? true : false;
5808 writeDistortions = false;
5809 scaleFactor = SCALE_FACTOR_16x;
5810 mvShiftFactor = 2;
5811 prevMvReadPosFactor = 1;
5812 break;
5813 case HME_LEVEL_4x:
5814 useMvFromPrevStep = (m_b16XMeEnabled) ? true : false;
5815 writeDistortions = true;
5816 scaleFactor = SCALE_FACTOR_4x;
5817 mvShiftFactor = 2;
5818 prevMvReadPosFactor = 0;
5819 break;
5820 default:
5821 eStatus = MOS_STATUS_INVALID_PARAMETER;
5822 return eStatus;
5823 break;
5824 }
5825
5826 curbe.DW3.SubPelMode = 3;
5827 curbe.DW4.PictureHeightMinus1 = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scaleFactor) - 1;
5828 curbe.DW4.PictureWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth / scaleFactor);
5829 curbe.DW5.QpPrimeY = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
5830 curbe.DW6.WriteDistortions = writeDistortions;
5831 curbe.DW6.UseMvFromPrevStep = useMvFromPrevStep;
5832 curbe.DW6.SuperCombineDist = 5;//SuperCombineDist_Generic[pHevcSeqParams->TargetUsage]; Harded coded in KCM
5833 curbe.DW6.MaxVmvR = 511 * 4;
5834 curbe.DW15.MvShiftFactor = mvShiftFactor;
5835 curbe.DW15.PrevMvReadPosFactor = prevMvReadPosFactor;
5836
5837 if (m_pictureCodingType == B_TYPE)
5838 {
5839 // This field is irrelevant since we are not using the bi-direct search.
5840 curbe.DW1.BiWeight = m_bframeMeBidirectionalWeight;
5841 curbe.DW13.NumRefIdxL1MinusOne = m_hevcSliceParams->num_ref_idx_l1_active_minus1;
5842 }
5843
5844 if (m_pictureCodingType == P_TYPE || m_pictureCodingType == B_TYPE)
5845 {
5846 curbe.DW13.NumRefIdxL0MinusOne = m_hevcSliceParams->num_ref_idx_l0_active_minus1;
5847 }
5848
5849 if (hmeLevel == HME_LEVEL_4x)
5850 {
5851 curbe.DW30.ActualMBHeight = m_frameHeight;
5852 curbe.DW30.ActualMBWidth = m_frameWidth;
5853 }
5854 else
5855 {
5856 curbe.DW30.ActualMBHeight = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight);
5857 curbe.DW30.ActualMBWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth);
5858 }
5859
5860 curbe.DW13.RefStreaminCost = 0;
5861 // This flag is to indicate the ROI source type instead of indicating ROI is enabled or not
5862 curbe.DW13.ROIEnable = 0;
5863
5864 uint8_t meMethod = (m_pictureCodingType == B_TYPE) ? m_bMeMethodGeneric[m_hevcSeqParams->TargetUsage] : m_meMethodGeneric[m_hevcSeqParams->TargetUsage];
5865 uint8_t tableIdx = (m_pictureCodingType == B_TYPE) ? 1 : 0;
5866 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(&(curbe.SPDelta), 14 * sizeof(uint32_t),
5867 m_encodeSearchPath[tableIdx][meMethod], 14 * sizeof(uint32_t)));
5868
5869 if (hmeLevel == HME_LEVEL_4x)
5870 {
5871 //StreamIn CURBE
5872 curbe.DW6.LCUSize = 1;//Only LCU64 supported by the VDEnc HW
5873 // Kernel should use driver-prepared stream-in surface during ROI/ MBQP(LCUQP)/ Dirty-Rect
5874 curbe.DW6.InputStreamInEn = (m_hevcPicParams->NumROI || m_encodeParams.bMbQpDataEnabled || (m_hevcPicParams->NumDirtyRects > 0 && (B_TYPE == m_hevcPicParams->CodingType)));
5875 curbe.DW31.MaxCuSize = 3;
5876 curbe.DW31.MaxTuSize = 3;
5877 switch (m_hevcSeqParams->TargetUsage)
5878 {
5879 case 1:
5880 case 4:
5881 curbe.DW36.NumMergeCandCu64x64 = 4;
5882 curbe.DW36.NumMergeCandCu32x32 = 3;
5883 curbe.DW36.NumMergeCandCu16x16 = 2;
5884 curbe.DW36.NumMergeCandCu8x8 = 1;
5885 curbe.DW31.NumImePredictors = m_imgStateImePredictors;
5886 break;
5887 case 7:
5888 curbe.DW36.NumMergeCandCu64x64 = 2;
5889 curbe.DW36.NumMergeCandCu32x32 = 2;
5890 curbe.DW36.NumMergeCandCu16x16 = 2;
5891 curbe.DW36.NumMergeCandCu8x8 = 0;
5892 curbe.DW31.NumImePredictors = 4;
5893 break;
5894 }
5895 }
5896
5897 curbe.DW40._4xMeMvOutputDataSurfIndex = CODECHAL_VDENC_HME_MV_DATA_SURFACE_CM_G12;
5898 curbe.DW41._16xOr32xMeMvInputDataSurfIndex = (hmeLevel == HME_LEVEL_32x) ? CODECHAL_VDENC_32xME_MV_DATA_SURFACE_CM_G12 : CODECHAL_VDENC_16xME_MV_DATA_SURFACE_CM_G12;
5899 curbe.DW42._4xMeOutputDistSurfIndex = CODECHAL_VDENC_HME_DISTORTION_SURFACE_CM_G12;
5900 curbe.DW43._4xMeOutputBrcDistSurfIndex = CODECHAL_VDENC_HME_BRC_DISTORTION_CM_G12;
5901 curbe.DW44.VMEFwdInterPredictionSurfIndex = CODECHAL_VDENC_HME_CURR_FOR_FWD_REF_CM_G12;
5902 curbe.DW45.VMEBwdInterPredictionSurfIndex = CODECHAL_VDENC_HME_CURR_FOR_BWD_REF_CM_G12;
5903 curbe.DW46.VDEncStreamInOutputSurfIndex = CODECHAL_VDENC_HME_VDENC_STREAMIN_OUTPUT_CM_G12;
5904 curbe.DW47.VDEncStreamInInputSurfIndex = CODECHAL_VDENC_HME_VDENC_STREAMIN_INPUT_CM_G12;
5905
5906 CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
5907 &curbe,
5908 kernelState->dwCurbeOffset,
5909 sizeof(curbe)));
5910
5911 return eStatus;
5912 }
5913
SendMeSurfaces(HmeLevel hmeLevel,PMOS_COMMAND_BUFFER cmdBuffer)5914 MOS_STATUS CodechalVdencHevcStateG12::SendMeSurfaces(HmeLevel hmeLevel, PMOS_COMMAND_BUFFER cmdBuffer)
5915 {
5916 CODECHAL_ENCODE_FUNCTION_ENTER;
5917 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5918
5919 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
5920
5921 MOS_SURFACE *meMvDataBuffer;
5922 uint32_t downscaledWidthInMb;
5923 uint32_t downscaledHeightInMb;
5924
5925 if (hmeLevel == HME_LEVEL_32x)
5926 {
5927 meMvDataBuffer = &m_s32XMeMvDataBuffer;
5928 downscaledWidthInMb = m_downscaledWidthInMb32x;
5929 downscaledHeightInMb = m_downscaledHeightInMb32x;
5930 }
5931 else if (hmeLevel == HME_LEVEL_16x)
5932 {
5933 meMvDataBuffer = &m_s16XMeMvDataBuffer;
5934 downscaledWidthInMb = m_downscaledWidthInMb16x;
5935 downscaledHeightInMb = m_downscaledHeightInMb16x;
5936 }
5937 else
5938 {
5939 meMvDataBuffer = &m_s4XMeMvDataBuffer;
5940 downscaledWidthInMb = m_downscaledWidthInMb4x;
5941 downscaledHeightInMb = m_downscaledHeightInMb4x;
5942 }
5943
5944 auto width = MOS_ALIGN_CEIL(downscaledWidthInMb * 32, 64);
5945 auto height = downscaledHeightInMb * 4 * 10;
5946 // Force the values
5947 meMvDataBuffer->dwWidth = width;
5948 meMvDataBuffer->dwHeight = height;
5949 meMvDataBuffer->dwPitch = width;
5950
5951 PMHW_KERNEL_STATE kernelState = nullptr;
5952 if(hmeLevel == HME_LEVEL_4x)
5953 {
5954 kernelState = m_lowDelay ? &m_vdencStreaminKernelState : &m_vdencStreaminKernelStateRAB;
5955 }
5956 else
5957 {
5958 kernelState = m_lowDelay ? &m_vdencMeKernelState : &m_vdencMeKernelStateRAB;
5959 }
5960 auto bindingTable = (hmeLevel == HME_LEVEL_4x) ?
5961 &m_vdencStreaminKernelBindingTable : &m_vdencMeKernelBindingTable;
5962 uint32_t meMvBottomFieldOffset = 0;
5963
5964 CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
5965 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
5966 surfaceCodecParams.bIs2DSurface = true;
5967 surfaceCodecParams.bMediaBlockRW = true;
5968 surfaceCodecParams.psSurface = meMvDataBuffer;
5969 surfaceCodecParams.dwOffset = meMvBottomFieldOffset;
5970 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_MV_DATA_ENCODE].Value;
5971 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_MV_DATA_SURFACE_CM_G12];
5972 surfaceCodecParams.bIsWritable = true;
5973 surfaceCodecParams.bRenderTarget = true;
5974 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5975 m_hwInterface,
5976 cmdBuffer,
5977 &surfaceCodecParams,
5978 kernelState));
5979
5980 if (hmeLevel == HME_LEVEL_16x && m_b32XMeEnabled)
5981 {
5982 // Pass 32x MV to 16x ME operation
5983 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
5984 surfaceCodecParams.bIs2DSurface = true;
5985 surfaceCodecParams.bMediaBlockRW = true;
5986 surfaceCodecParams.psSurface = &m_s32XMeMvDataBuffer;
5987 surfaceCodecParams.dwOffset = 0;
5988 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_MV_DATA_ENCODE].Value;
5989 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_32xME_MV_DATA_SURFACE_CM_G12];
5990 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
5991 m_hwInterface,
5992 cmdBuffer,
5993 &surfaceCodecParams,
5994 kernelState));
5995 }
5996 else if (!(hmeLevel == HME_LEVEL_32x) && m_b16XMeEnabled)
5997 {
5998 // Pass 16x MV to 4x ME operation
5999 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
6000 surfaceCodecParams.bIs2DSurface = true;
6001 surfaceCodecParams.bMediaBlockRW = true;
6002 surfaceCodecParams.psSurface = &m_s16XMeMvDataBuffer;
6003 surfaceCodecParams.dwOffset = 0;
6004 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_MV_DATA_ENCODE].Value;
6005 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_16xME_MV_DATA_SURFACE_CM_G12];
6006 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6007 m_hwInterface,
6008 cmdBuffer,
6009 &surfaceCodecParams,
6010 kernelState));
6011
6012 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
6013 surfaceCodecParams.bIs2DSurface = true;
6014 surfaceCodecParams.bMediaBlockRW = true;
6015 surfaceCodecParams.psSurface = &m_s4XMeDistortionBuffer;
6016 surfaceCodecParams.dwOffset = 0;
6017 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_DISTORTION_SURFACE_CM_G12];
6018 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value;
6019 surfaceCodecParams.bIsWritable = true;
6020 surfaceCodecParams.bRenderTarget = true;
6021 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6022 m_hwInterface,
6023 cmdBuffer,
6024 &surfaceCodecParams,
6025 kernelState));
6026 }
6027
6028 PMOS_SURFACE currScaledSurface = (hmeLevel == HME_LEVEL_4x) ? m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER) :
6029 ((hmeLevel == HME_LEVEL_16x) ? m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER) : m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER));
6030 MOS_SURFACE refScaledSurface = *currScaledSurface;
6031 bool currFieldPicture = CodecHal_PictureIsField(m_currOriginalPic) ? true : false;
6032 bool currBottomField = CodecHal_PictureIsBottomField(m_currOriginalPic) ? true : false;
6033
6034 uint8_t currVDirection = (!currFieldPicture) ? CODECHAL_VDIRECTION_FRAME :
6035 ((currBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
6036 uint32_t currScaledBottomFieldOffset = (hmeLevel == HME_LEVEL_4x) ?
6037 (uint32_t)m_scaledBottomFieldOffset : ((hmeLevel == HME_LEVEL_16x) ? (uint32_t)m_scaled16xBottomFieldOffset : (uint32_t)m_scaled32xBottomFieldOffset);
6038
6039 // Setup references 1...n
6040 // LIST 0 references
6041 for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++)
6042 {
6043 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][refIdx];
6044
6045 if (!CodecHal_PictureIsInvalid(refPic))
6046 {
6047 if (refIdx == 0)
6048 {
6049 // Current Picture Y - VME
6050 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
6051 surfaceCodecParams.bUseAdvState = true;
6052 surfaceCodecParams.psSurface = currScaledSurface;
6053 surfaceCodecParams.dwOffset = currBottomField ? currScaledBottomFieldOffset : 0;
6054 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value;
6055 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_CURR_FOR_FWD_REF_CM_G12];
6056 surfaceCodecParams.ucVDirection = currVDirection;
6057 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6058 m_hwInterface,
6059 cmdBuffer,
6060 &surfaceCodecParams,
6061 kernelState));
6062 }
6063
6064 bool refFieldPicture = CodecHal_PictureIsField(refPic) ? true : false;
6065 bool refBottomField = CodecHal_PictureIsBottomField(refPic) ? true : false;
6066 uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
6067 uint8_t scaledIdx = m_refList[refPicIdx]->ucScalingIdx;
6068 if (hmeLevel == HME_LEVEL_4x)
6069 {
6070 refScaledSurface.OsResource = m_trackedBuf->Get4xDsSurface(scaledIdx)->OsResource;
6071 }
6072 else if (hmeLevel == HME_LEVEL_16x)
6073 {
6074 refScaledSurface.OsResource = m_trackedBuf->Get16xDsSurface(scaledIdx)->OsResource;
6075 }
6076 else
6077 {
6078 refScaledSurface.OsResource = m_trackedBuf->Get32xDsSurface(scaledIdx)->OsResource;
6079 }
6080 uint32_t refScaledBottomFieldOffset = refBottomField ? currScaledBottomFieldOffset : 0;
6081
6082 // L0 Reference Picture Y - VME
6083 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
6084 surfaceCodecParams.bUseAdvState = true;
6085 surfaceCodecParams.psSurface = &refScaledSurface;
6086 surfaceCodecParams.dwOffset = refBottomField ? refScaledBottomFieldOffset : 0;
6087 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value;
6088 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_FWD_REF_IDX0_CM_G12 + (refIdx * 2)];
6089 surfaceCodecParams.ucVDirection = !currFieldPicture ? CODECHAL_VDIRECTION_FRAME :
6090 ((refBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
6091 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6092 m_hwInterface,
6093 cmdBuffer,
6094 &surfaceCodecParams,
6095 kernelState));
6096
6097 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_RESERVED1_CM_G12 + (refIdx * 2)];
6098 surfaceCodecParams.ucVDirection = !currFieldPicture ? CODECHAL_VDIRECTION_FRAME :
6099 ((refBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
6100 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6101 m_hwInterface,
6102 cmdBuffer,
6103 &surfaceCodecParams,
6104 kernelState));
6105 }
6106 }
6107
6108 //List1
6109 for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l1_active_minus1; refIdx++)
6110 {
6111 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_1][refIdx];
6112
6113 if (!CodecHal_PictureIsInvalid(refPic))
6114 {
6115 if (refIdx == 0)
6116 {
6117 // Current Picture Y - VME
6118 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
6119 surfaceCodecParams.bUseAdvState = true;
6120 surfaceCodecParams.psSurface = currScaledSurface;
6121 surfaceCodecParams.dwOffset = currBottomField ? currScaledBottomFieldOffset : 0;
6122 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value;
6123 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_CURR_FOR_BWD_REF_CM_G12];
6124 surfaceCodecParams.ucVDirection = currVDirection;
6125 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6126 m_hwInterface,
6127 cmdBuffer,
6128 &surfaceCodecParams,
6129 kernelState));
6130 }
6131
6132 bool refFieldPicture = CodecHal_PictureIsField(refPic) ? 1 : 0;
6133 bool refBottomField = CodecHal_PictureIsBottomField(refPic) ? 1 : 0;
6134 auto refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
6135 uint8_t scaledIdx = m_refList[refPicIdx]->ucScalingIdx;
6136
6137 if (hmeLevel == HME_LEVEL_4x)
6138 {
6139 refScaledSurface.OsResource = m_trackedBuf->Get4xDsSurface(scaledIdx)->OsResource;
6140 }
6141 else if (hmeLevel == HME_LEVEL_16x)
6142 {
6143 refScaledSurface.OsResource = m_trackedBuf->Get16xDsSurface(scaledIdx)->OsResource;
6144 }
6145 else
6146 {
6147 refScaledSurface.OsResource = m_trackedBuf->Get32xDsSurface(scaledIdx)->OsResource;
6148 }
6149 uint32_t refScaledBottomFieldOffset = refBottomField ? currScaledBottomFieldOffset : 0;
6150
6151 // L1 Reference Picture Y - VME
6152 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
6153 surfaceCodecParams.bUseAdvState = true;
6154 surfaceCodecParams.psSurface = &refScaledSurface;
6155 surfaceCodecParams.dwOffset = refBottomField ? refScaledBottomFieldOffset : 0;
6156 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value;
6157 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_BWD_REF_IDX0_CM_G12 + (refIdx * 2)];
6158 surfaceCodecParams.ucVDirection = !currFieldPicture ? CODECHAL_VDIRECTION_FRAME :
6159 ((refBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
6160 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6161 m_hwInterface,
6162 cmdBuffer,
6163 &surfaceCodecParams,
6164 kernelState));
6165
6166 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_RESERVED9_CM_G12 + (refIdx * 2)];
6167 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6168 m_hwInterface,
6169 cmdBuffer,
6170 &surfaceCodecParams,
6171 kernelState));
6172 }
6173 }
6174
6175 if (hmeLevel == HME_LEVEL_4x)
6176 {
6177 CODECHAL_ENCODE_CHK_NULL_RETURN(&m_resVdencStreamInBuffer[m_currRecycledBufIdx]);
6178
6179 auto streamingSize = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32) * (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32) * CODECHAL_CACHELINE_SIZE;
6180
6181 // Send driver-prepared stream-in surface as input during ROI/ MBQP(LCUQP)/ Dirty-Rect
6182 if (m_hevcPicParams->NumROI || m_encodeParams.bMbQpDataEnabled || (m_hevcPicParams->NumDirtyRects > 0 && (B_TYPE == m_hevcPicParams->CodingType)))
6183 {
6184 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
6185 surfaceCodecParams.dwSize = MOS_BYTES_TO_DWORDS(streamingSize);
6186 surfaceCodecParams.bIs2DSurface = false;
6187 surfaceCodecParams.presBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
6188 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_VDENC_STREAMIN_CODEC].Value;
6189 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_VDENC_STREAMIN_INPUT_CM_G12];
6190 surfaceCodecParams.bIsWritable = true;
6191 surfaceCodecParams.bRenderTarget = true;
6192 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6193 m_hwInterface,
6194 cmdBuffer,
6195 &surfaceCodecParams,
6196 kernelState));
6197 }
6198 else // Clear stream-in surface otherwise
6199 {
6200 MOS_LOCK_PARAMS lockFlags;
6201 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
6202 lockFlags.WriteOnly = true;
6203
6204 auto data = m_osInterface->pfnLockResource(
6205 m_osInterface,
6206 &m_resVdencStreamInBuffer[m_currRecycledBufIdx],
6207 &lockFlags);
6208
6209 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
6210
6211 MOS_ZeroMemory(
6212 data,
6213 streamingSize);
6214
6215 m_osInterface->pfnUnlockResource(
6216 m_osInterface,
6217 &m_resVdencStreamInBuffer[m_currRecycledBufIdx]);
6218 }
6219
6220 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
6221 surfaceCodecParams.dwSize = MOS_BYTES_TO_DWORDS(streamingSize);
6222 surfaceCodecParams.bIs2DSurface = false;
6223 surfaceCodecParams.presBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
6224 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_VDENC_STREAMIN_CODEC].Value;
6225 surfaceCodecParams.dwBindingTableOffset = bindingTable->dwBindingTableEntries[CODECHAL_VDENC_HME_VDENC_STREAMIN_OUTPUT_CM_G12];
6226 surfaceCodecParams.bIsWritable = true;
6227 surfaceCodecParams.bRenderTarget = true;
6228 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
6229 m_hwInterface,
6230 cmdBuffer,
6231 &surfaceCodecParams,
6232 kernelState));
6233 }
6234
6235 return eStatus;
6236 }
6237
6238 MOS_STATUS
GetKernelHeaderAndSize(void * binary,EncOperation operation,uint32_t krnStateIdx,void * krnHeader,uint32_t * krnSize)6239 CodechalVdencHevcStateG12::GetKernelHeaderAndSize(
6240 void *binary,
6241 EncOperation operation,
6242 uint32_t krnStateIdx,
6243 void *krnHeader,
6244 uint32_t *krnSize)
6245 {
6246 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6247
6248 CODECHAL_ENCODE_FUNCTION_ENTER;
6249
6250 CODECHAL_ENCODE_CHK_NULL_RETURN(binary);
6251 CODECHAL_ENCODE_CHK_NULL_RETURN(krnHeader);
6252 CODECHAL_ENCODE_CHK_NULL_RETURN(krnSize);
6253
6254 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG12(binary, operation, krnStateIdx, krnHeader, krnSize));
6255
6256 return eStatus;
6257 }
6258
AddVdencWalkerStateCmd(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_VDBOX_HEVC_SLICE_STATE params)6259 MOS_STATUS CodechalVdencHevcStateG12::AddVdencWalkerStateCmd(
6260 PMOS_COMMAND_BUFFER cmdBuffer,
6261 PMHW_VDBOX_HEVC_SLICE_STATE params)
6262 {
6263 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6264
6265 CODECHAL_ENCODE_FUNCTION_ENTER;
6266
6267 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
6268 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
6269
6270 MHW_VDBOX_VDENC_WALKER_STATE_PARAMS_G12 vdencWalkerStateParams;
6271 vdencWalkerStateParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
6272 vdencWalkerStateParams.pHevcEncSeqParams = params->pEncodeHevcSeqParams;
6273 vdencWalkerStateParams.pHevcEncPicParams = params->pEncodeHevcPicParams;
6274 vdencWalkerStateParams.pEncodeHevcSliceParams = params->pEncodeHevcSliceParams;
6275 vdencWalkerStateParams.pTileCodingParams = static_cast<PMHW_VDBOX_HEVC_SLICE_STATE_G12>(params)->pTileCodingParams;
6276 vdencWalkerStateParams.dwTileId = static_cast<PMHW_VDBOX_HEVC_SLICE_STATE_G12>(params)->dwTileID;
6277 switch (static_cast<PMHW_VDBOX_HEVC_SLICE_STATE_G12>(params)->dwNumPipe)
6278 {
6279 case 0:
6280 case 1:
6281 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_SINGLE_PIPE;
6282 break;
6283 case 2:
6284 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_TWO_PIPE;
6285 break;
6286 case 4:
6287 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_FOUR_PIPE;
6288 break;
6289 default:
6290 vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_INVALID;
6291 CODECHAL_ENCODE_ASSERT(false);
6292 break;
6293 }
6294
6295 vdencWalkerStateParams.IBCControl =
6296 m_enableLBCOnly ? SCC_IBC_CONTROL_IBC_ONLY_LBC_G12 : SCC_IBC_CONTROL_IBC_ENABLED_TBCLBC_G12;
6297
6298 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWalkerStateCmd(cmdBuffer, &vdencWalkerStateParams));
6299
6300 return eStatus;
6301 }
6302
GetSystemPipeNumberCommon()6303 MOS_STATUS CodechalVdencHevcStateG12::GetSystemPipeNumberCommon()
6304 {
6305 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6306
6307 CODECHAL_ENCODE_FUNCTION_ENTER;
6308
6309 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
6310 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6311
6312 MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
6313 statusKey = MOS_UserFeature_ReadValue_ID(
6314 nullptr,
6315 __MEDIA_USER_FEATURE_VALUE_ENCODE_DISABLE_SCALABILITY,
6316 &userFeatureData,
6317 m_osInterface->pOsContext);
6318
6319 bool disableScalability = m_hwInterface->IsDisableScalability();
6320 if (statusKey == MOS_STATUS_SUCCESS)
6321 {
6322 disableScalability = userFeatureData.i32Data ? true : false;
6323 }
6324
6325 MEDIA_SYSTEM_INFO *gtSystemInfo = m_osInterface->pfnGetGtSystemInfo(m_osInterface);
6326 CODECHAL_ENCODE_CHK_NULL_RETURN(gtSystemInfo);
6327
6328 if (gtSystemInfo && disableScalability == false)
6329 {
6330 // Both VE mode and media solo mode should be able to get the VDBOX number via the same interface
6331 m_numVdbox = (uint8_t)(gtSystemInfo->VDBoxInfo.NumberOfVDBoxEnabled);
6332 }
6333 else
6334 {
6335 m_numVdbox = 1;
6336 }
6337
6338 CODECHAL_ENCODE_VERBOSEMESSAGE("System VDBOX number = %d.", m_numVdbox);
6339
6340 return eStatus;
6341 }
6342
Initialize(CodechalSetting * settings)6343 MOS_STATUS CodechalVdencHevcStateG12::Initialize(CodechalSetting * settings)
6344 {
6345 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6346
6347 CODECHAL_ENCODE_FUNCTION_ENTER;
6348
6349 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
6350 // Tile Replay Enable should be passed from DDI, will change later when DDI is ready
6351 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6352 MOS_UserFeature_ReadValue_ID(
6353 nullptr,
6354 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_TILEREPLAY_ENABLE_ID,
6355 &userFeatureData,
6356 m_osInterface->pOsContext);
6357 m_enableTileReplay = userFeatureData.i32Data ? true : false;
6358
6359 m_skipFrameBasedHWCounterRead = m_enableTileReplay;
6360
6361 // RGB Encoding Enable should be passed from DDI, will change later when DDI is ready
6362 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6363 MOS_UserFeature_ReadValue_ID(
6364 nullptr,
6365 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_RGB_ENCODING_ENABLE_ID,
6366 &userFeatureData,
6367 m_osInterface->pOsContext);
6368 m_RGBEncodingEnable = userFeatureData.i32Data ? true : false;
6369
6370 // Capture mode with display Enable should be passed from DDI, will change later
6371 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6372 MOS_UserFeature_ReadValue_ID(
6373 nullptr,
6374 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_CAPTURE_MODE_ENABLE_ID,
6375 &userFeatureData,
6376 m_osInterface->pOsContext);
6377 m_CaptureModeEnable = userFeatureData.i32Data ? true : false;
6378
6379 #if (_DEBUG || _RELEASE_INTERNAL)
6380 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6381 MOS_UserFeature_ReadValue_ID(
6382 nullptr,
6383 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_TCBRC_ARB_DISABLE_ID,
6384 &userFeatureData,
6385 m_osInterface->pOsContext);
6386 m_brcAdaptiveRegionBoostSupported = userFeatureData.i32Data ? false : m_brcAdaptiveRegionBoostSupported;
6387 #endif
6388
6389 // common initilization
6390 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::Initialize(settings));
6391
6392 MEDIA_FEATURE_TABLE *skuTable = m_osInterface->pfnGetSkuTable(m_osInterface);
6393 if (m_osInterface->bSimIsActive && (m_enableTileReplay == true))
6394 {
6395 m_frameTrackingEnabled = false;
6396 }
6397
6398 // To do: current size assumes 8Kx8K max resolution. Needs to be increased based on Gen12, along with m_maxNumNativeROI.
6399 m_deltaQpRoiBufferSize = m_deltaQpBufferSize;
6400 m_brcRoiBufferSize = m_roiStreamInBufferSize;
6401 m_maxTileNumber = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE) *
6402 CODECHAL_GET_HEIGHT_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MIN_TILE_SIZE);
6403
6404 // we need additional buffer for (1) 1 CL for size info at the beginning of each tile column (max of 4 vdbox in scalability mode)
6405 // (2) CL alignment at end of every tile column
6406 // as a result, increase the height by 1 for allocation purposes
6407 m_numLcu = MOS_ROUNDUP_DIVIDE(m_frameWidth, MAX_LCU_SIZE) * (MOS_ROUNDUP_DIVIDE(m_frameHeight, MAX_LCU_SIZE) + 1);
6408 m_mbCodeSize = MOS_ALIGN_CEIL(2 * sizeof(uint32_t) * (m_numLcu * 5 + m_numLcu * 64 * 8), CODECHAL_PAGE_SIZE);
6409 m_mbCodeSize += m_mvOffset;
6410
6411 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetSystemPipeNumberCommon());
6412
6413 if (MOS_VE_SUPPORTED(m_osInterface))
6414 {
6415 m_scalabilityState = (PCODECHAL_ENCODE_SCALABILITY_STATE)MOS_AllocAndZeroMemory(sizeof(CODECHAL_ENCODE_SCALABILITY_STATE));
6416 CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalabilityState);
6417 //scalability initialize
6418 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_InitializeState(m_scalabilityState, m_hwInterface));
6419 }
6420
6421 // Caculate the size for 3nd level batch buffer
6422 // mhw_vdbox_hcp_g12_X::HCP_PIC_STATE_CMD::byteSize
6423 // As this buffer is going to passed to HuC to generate the command, must be page aligned
6424 // To add the HW interface get the buffer size later
6425
6426 m_thirdLBSize = MOS_ALIGN_CEIL(1024, CODECHAL_PAGE_SIZE);
6427
6428 // Caculate the batch buffer size for each tile
6429 // To add the MHW interface later, can be fine tuned
6430 m_tileLevelBatchSize = m_hwInterface->m_vdenc2ndLevelBatchBufferSize;
6431
6432 // Caculate the size for MV temporal buffer
6433 uint32_t mvt_size = MOS_ALIGN_CEIL(((m_frameWidth + 63) >> 6)*((m_frameHeight + 15) >> 4), 2) * CODECHAL_CACHELINE_SIZE;
6434 uint32_t mvtb_size = MOS_ALIGN_CEIL(((m_frameWidth + 31) >> 5)*((m_frameHeight + 31) >> 5), 2) * CODECHAL_CACHELINE_SIZE;
6435 m_sizeOfMvTemporalBuffer = MOS_MAX(mvt_size, mvtb_size);
6436
6437 m_sizeOfHcpPakFrameStats = 9 * CODECHAL_CACHELINE_SIZE;
6438
6439 #ifdef _ENCODE_VDENC_RESERVED
6440 InitReserveState(settings);
6441 #endif
6442 m_enableSCC = settings->isSCCEnabled;
6443
6444 #if (_DEBUG || _RELEASE_INTERNAL)
6445 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6446 // LBC only Enable should be passed from DDI, will change later when DDI is ready
6447 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6448 MOS_UserFeature_ReadValue_ID(
6449 nullptr,
6450 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_LBCONLY_ENABLE_ID,
6451 &userFeatureData,
6452 m_osInterface->pOsContext);
6453 m_enableLBCOnly = userFeatureData.i32Data ? true : false;
6454 #endif
6455
6456 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6457 MOS_UserFeature_ReadValue_ID(
6458 nullptr,
6459 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_STITCH,
6460 &userFeatureData,
6461 m_osInterface->pOsContext);
6462 m_enableTileStitchByHW = userFeatureData.i32Data ? true : false;
6463
6464 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6465 MOS_UserFeature_ReadValue_ID(
6466 nullptr,
6467 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_SEMAPHORE,
6468 &userFeatureData,
6469 m_osInterface->pOsContext);
6470 m_enableHWSemaphore = userFeatureData.i32Data ? true : false;
6471
6472 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6473 MOS_UserFeature_ReadValue_ID(
6474 nullptr,
6475 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VDBOX_HW_SEMAPHORE,
6476 &userFeatureData,
6477 m_osInterface->pOsContext);
6478 m_enableVdBoxHWSemaphore = userFeatureData.i32Data ? true : false;
6479
6480 // ACQP is now supported on Gen12 for TU1 / TU4
6481 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6482 MOS_UserFeature_ReadValue_ID(
6483 nullptr,
6484 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_ACQP_ENABLE_ID,
6485 &userFeatureData,
6486 m_osInterface->pOsContext);
6487 m_hevcVdencAcqpEnabled = userFeatureData.i32Data ? true : false;
6488
6489 m_numDelay = 15;
6490
6491 #if (_DEBUG || _RELEASE_INTERNAL)
6492 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6493 MOS_UserFeature_ReadValue_ID(
6494 nullptr,
6495 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE,
6496 &userFeatureData,
6497 m_osInterface->pOsContext);
6498 m_kmdVeOveride.Value = (uint64_t)userFeatureData.i64Data;
6499
6500 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6501 MOS_UserFeature_ReadValue_ID(
6502 nullptr,
6503 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_FORCE_SCALABILITY_ID,
6504 &userFeatureData,
6505 m_osInterface->pOsContext);
6506 m_forceScalability = userFeatureData.i32Data ? true : false;
6507 #endif
6508
6509 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6510 MOS_UserFeature_ReadValue_ID(
6511 nullptr,
6512 __MEDIA_USER_FEATURE_VALUE_HEVC_TCBRC_QUALITY_BOOST_ENABLE_ID,
6513 &userFeatureData,
6514 m_osInterface->pOsContext);
6515 m_tcbrcQualityBoost = (userFeatureData.i32Data) ? true : false;
6516
6517 return eStatus;
6518 }
6519
CodechalVdencHevcStateG12(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)6520 CodechalVdencHevcStateG12::CodechalVdencHevcStateG12(
6521 CodechalHwInterface* hwInterface,
6522 CodechalDebugInterface* debugInterface,
6523 PCODECHAL_STANDARD_INFO standardInfo)
6524 :CodechalVdencHevcState(hwInterface, debugInterface, standardInfo)
6525 {
6526 CODECHAL_ENCODE_FUNCTION_ENTER;
6527
6528 m_useCommonKernel = true;
6529 pfnGetKernelHeaderAndSize = GetKernelHeaderAndSize;
6530 m_useHwScoreboard = false;
6531 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
6532 m_kernelBase = (uint8_t*)IGCODECKRN_G12;
6533 #endif
6534 m_kuidCommon = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
6535 m_scalabilityState = nullptr;
6536 m_brcAdaptiveRegionBoostSupported = true;
6537
6538 MOS_ZeroMemory(&m_resPakcuLevelStreamoutData, sizeof(m_resPakcuLevelStreamoutData));
6539 MOS_ZeroMemory(&m_resPakSliceLevelStreamoutData, sizeof(m_resPakSliceLevelStreamoutData));
6540 MOS_ZeroMemory(m_resTileBasedStatisticsBuffer, sizeof(m_resTileBasedStatisticsBuffer));
6541 MOS_ZeroMemory(&m_resHuCPakAggregatedFrameStatsBuffer, sizeof(m_resHuCPakAggregatedFrameStatsBuffer));
6542 MOS_ZeroMemory(m_tileRecordBuffer, sizeof(m_tileRecordBuffer));
6543 MOS_ZeroMemory(&m_kmdVeOveride, sizeof(m_kmdVeOveride));
6544 MOS_ZeroMemory(&m_resHcpScalabilitySyncBuffer, sizeof(m_resHcpScalabilitySyncBuffer));
6545
6546 MOS_ZeroMemory(m_veBatchBuffer, sizeof(m_veBatchBuffer));
6547 MOS_ZeroMemory(&m_realCmdBuffer, sizeof(m_realCmdBuffer));
6548 MOS_ZeroMemory(&m_resBrcSemaphoreMem, sizeof(m_resBrcSemaphoreMem));
6549 MOS_ZeroMemory(&m_resBrcPakSemaphoreMem, sizeof(m_resBrcPakSemaphoreMem));
6550 MOS_ZeroMemory(m_resVdBoxSemaphoreMem, sizeof(m_resVdBoxSemaphoreMem));
6551 MOS_ZeroMemory(&m_resPipeStartSemaMem, sizeof(m_resPipeStartSemaMem));
6552
6553 MOS_ZeroMemory(&m_vdencTileRowStoreBuffer, sizeof(m_vdencTileRowStoreBuffer));
6554 MOS_ZeroMemory(&m_thirdLevelBatchBuffer, sizeof(MHW_BATCH_BUFFER));
6555 MOS_ZeroMemory(&m_vdencSAORowStoreBuffer, sizeof(m_vdencSAORowStoreBuffer));
6556
6557 CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_osInterface);
6558 for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
6559 {
6560 MOS_ZeroMemory(&m_tileLevelBatchBuffer[i], sizeof(PMHW_BATCH_BUFFER));
6561 MOS_ZeroMemory(&m_TileRowBRCBatchBuffer[i], sizeof(PMHW_BATCH_BUFFER));
6562 }
6563
6564 for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
6565 {
6566 for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
6567 {
6568 MOS_ZeroMemory(&m_resHucPakStitchDmemBuffer[k][i], sizeof(m_resHucPakStitchDmemBuffer[k][i]));
6569 }
6570 }
6571
6572 MOS_ZeroMemory(&m_resBrcDataBuffer, sizeof(m_resBrcDataBuffer));
6573 MOS_ZeroMemory(&m_resTileRowBRCsyncSemaphore, sizeof(m_resTileRowBRCsyncSemaphore));
6574
6575 m_vdencBrcInitDmemBufferSize = sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_INIT_DMEM_G12);
6576 m_vdencBrcUpdateDmemBufferSize = sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_UPDATE_DMEM_G12);
6577 m_vdencBrcConstDataBufferSize = sizeof(CODECHAL_VDENC_HEVC_HUC_BRC_CONSTANT_DATA_G12);
6578 m_maxNumSlicesSupported = CODECHAL_VDENC_HEVC_MAX_SLICE_NUM;
6579
6580 m_hwInterface->GetStateHeapSettings()->dwNumSyncTags = CODECHAL_ENCODE_HEVC_NUM_SYNC_TAGS;
6581 m_hwInterface->GetStateHeapSettings()->dwDshSize = CODECHAL_INIT_DSH_SIZE_HEVC_ENC;
6582 #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
6583 m_kernelBase = (uint8_t*)IGCODECKRN_G12;
6584 #endif
6585
6586 MOS_STATUS eStatus = CodecHalGetKernelBinaryAndSize(
6587 m_kernelBase,
6588 m_kuidCommon,
6589 &m_kernelBinary,
6590 &m_combinedKernelSize);
6591 CODECHAL_ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
6592
6593 m_hwInterface->GetStateHeapSettings()->dwIshSize +=
6594 MOS_ALIGN_CEIL(m_combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
6595
6596 m_hwInterface->m_hucCommandBufferSize += 64;
6597
6598 m_osInterface->pfnVirtualEngineSupported(m_osInterface, false, true);
6599 Mos_SetVirtualEngineSupported(m_osInterface, true);
6600
6601 CODECHAL_DEBUG_TOOL(
6602 CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(m_encodeParState = MOS_New(CodechalDebugEncodeParG12, this));
6603 )
6604 }
6605
SetGpuCtxCreatOption()6606 MOS_STATUS CodechalVdencHevcStateG12::SetGpuCtxCreatOption()
6607 {
6608 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6609
6610 CODECHAL_ENCODE_FUNCTION_ENTER;
6611
6612 if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
6613 {
6614 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SetGpuCtxCreatOption());
6615 }
6616 else
6617 {
6618 m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS_ENHANCED);
6619 CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt);
6620
6621 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ConstructParmsForGpuCtxCreation(
6622 m_scalabilityState,
6623 (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
6624 }
6625
6626 return eStatus;
6627 }
6628
SetRegionsHuCPakIntegrate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)6629 MOS_STATUS CodechalVdencHevcStateG12::SetRegionsHuCPakIntegrate(
6630 PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
6631 {
6632 CODECHAL_ENCODE_FUNCTION_ENTER;
6633
6634 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6635
6636 int32_t currentPass = GetCurrentPass();
6637
6638 if(m_enableTileStitchByHW)
6639 {
6640 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
6641 }
6642
6643 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
6644 CODECHAL_ENCODE_CHK_NULL_RETURN(tileParams);
6645
6646 MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
6647
6648 // Add Virtual addr
6649 virtualAddrParams->regionParams[0].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; // Region 0 - Tile based input statistics from PAK/ VDEnc
6650 virtualAddrParams->regionParams[0].dwOffset = 0;
6651 virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 1 - HuC Frame statistics output
6652 virtualAddrParams->regionParams[1].isWritable = true;
6653 virtualAddrParams->regionParams[4].presRegion = &m_resBitstreamBuffer; // Region 4 - Last Tile bitstream
6654 virtualAddrParams->regionParams[4].dwOffset = MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
6655 virtualAddrParams->regionParams[5].presRegion = &m_resBitstreamBuffer; // Region 5 - HuC modifies the last tile bitstream before stitch command
6656 virtualAddrParams->regionParams[5].dwOffset = MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
6657 virtualAddrParams->regionParams[5].isWritable = true;
6658 virtualAddrParams->regionParams[6].presRegion = &m_vdencBrcHistoryBuffer; // Region 6 History Buffer (Input/Output)
6659 virtualAddrParams->regionParams[6].isWritable = true;
6660 virtualAddrParams->regionParams[7].presRegion = &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource; // Region 7 - HCP PIC state command
6661 virtualAddrParams->regionParams[9].presRegion = &m_resBrcDataBuffer; // Region 9 HuC outputs BRC data
6662 virtualAddrParams->regionParams[9].isWritable = true;
6663 if (m_enableTileStitchByHW)
6664 {
6665 virtualAddrParams->regionParams[8].presRegion = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]; // Region 8 - data buffer read by HUC for stitching cmd generation
6666 virtualAddrParams->regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource; // Region 10 - SLB for stitching cmd output from Huc
6667 virtualAddrParams->regionParams[10].isWritable = true;
6668 }
6669 virtualAddrParams->regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource; // Region 15 [In/Out] - Tile Record Buffer
6670 virtualAddrParams->regionParams[15].dwOffset = 0;
6671
6672 return eStatus;
6673 }
6674
ConfigStitchDataBuffer()6675 MOS_STATUS CodechalVdencHevcStateG12::ConfigStitchDataBuffer()
6676 {
6677 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6678 CODECHAL_ENCODE_FUNCTION_ENTER;
6679 int32_t currentPass = GetCurrentPass();
6680 if (currentPass < 0 ||
6681 (currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES && m_brcEnabled))
6682 {
6683 eStatus = MOS_STATUS_INVALID_PARAMETER;
6684 return eStatus;
6685 }
6686
6687 MOS_LOCK_PARAMS lockFlagsWriteOnly;
6688 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
6689 lockFlagsWriteOnly.WriteOnly = 1;
6690
6691 HucCommandDataVdencG12 *hucStitchDataBuf = (HucCommandDataVdencG12 *)m_osInterface->pfnLockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass], &lockFlagsWriteOnly);
6692
6693 MOS_ZeroMemory(hucStitchDataBuf, sizeof(HucCommandDataVdencG12));
6694 hucStitchDataBuf->TotalCommands = 1;
6695 hucStitchDataBuf->InputCOM[0].SizeOfData = 0xF;
6696
6697 HucInputCmdVdencG12 hucInputCmd;
6698 MOS_ZeroMemory(&hucInputCmd, sizeof(HucInputCmdVdencG12));
6699
6700 CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
6701 hucInputCmd.SelectionForIndData = m_osInterface->osCpInterface->IsCpEnabled() ? 4 : 0;
6702 hucInputCmd.CmdMode = HUC_CMD_LIST_MODE;
6703 hucInputCmd.LengthOfTable = (uint8_t)(m_numTiles);
6704 hucInputCmd.CopySize = m_hwInterface->m_tileRecordSize;
6705
6706 PMOS_RESOURCE presSrc = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource;
6707
6708 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
6709 m_osInterface,
6710 presSrc,
6711 false,
6712 false));
6713 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
6714 m_osInterface,
6715 &m_resBitstreamBuffer,
6716 true,
6717 true));
6718
6719 uint64_t srcAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, presSrc);
6720 uint64_t destAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, &m_resBitstreamBuffer);
6721 hucInputCmd.SrcAddrBottom = (uint32_t)(srcAddr & 0x00000000FFFFFFFF);
6722 hucInputCmd.SrcAddrTop = (uint32_t)((srcAddr & 0xFFFFFFFF00000000) >> 32);
6723
6724 hucInputCmd.DestAddrBottom = (uint32_t)(destAddr & 0x00000000FFFFFFFF);
6725 hucInputCmd.DestAddrTop = (uint32_t)((destAddr & 0xFFFFFFFF00000000) >> 32);
6726
6727 MOS_SecureMemcpy(hucStitchDataBuf->InputCOM[0].data, sizeof(HucInputCmdVdencG12), &hucInputCmd, sizeof(HucInputCmdVdencG12));
6728
6729 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]);
6730
6731 return eStatus;
6732 }
6733
SetRegionsHuCPakIntegrateStitch(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)6734 MOS_STATUS CodechalVdencHevcStateG12::SetRegionsHuCPakIntegrateStitch(
6735 PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
6736 {
6737 CODECHAL_ENCODE_FUNCTION_ENTER;
6738
6739 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6740
6741 int32_t currentPass = GetCurrentPass();
6742
6743 MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
6744
6745 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
6746 CODECHAL_ENCODE_CHK_NULL_RETURN(tileParams);
6747
6748 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
6749
6750 // Add Virtual addr
6751 virtualAddrParams->regionParams[0].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; // Region 0 - Tile based input statistics from PAK/ VDEnc
6752 virtualAddrParams->regionParams[0].dwOffset = 0;
6753 virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 1 - HuC Frame statistics output
6754 virtualAddrParams->regionParams[1].isWritable = true;
6755 virtualAddrParams->regionParams[4].presRegion = &m_resBitstreamBuffer; // Region 4 - Last Tile bitstream
6756 virtualAddrParams->regionParams[4].dwOffset = MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
6757 virtualAddrParams->regionParams[5].presRegion = &m_resBitstreamBuffer; // Region 5 - HuC modifies the last tile bitstream before stitch command
6758 virtualAddrParams->regionParams[5].dwOffset = MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
6759 virtualAddrParams->regionParams[5].isWritable = true;
6760 virtualAddrParams->regionParams[6].presRegion = &m_vdencBrcHistoryBuffer; // Region 6 History Buffer (Input/Output)
6761 virtualAddrParams->regionParams[6].isWritable = true;
6762 virtualAddrParams->regionParams[7].presRegion = &m_thirdLevelBatchBuffer.OsResource; //&m_resHucPakStitchReadBatchBuffer; // Region 7 - HCP PIC state command
6763 virtualAddrParams->regionParams[8].presRegion = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]; // Region 8 - data buffer read by HUC for stitching cmd generation
6764 virtualAddrParams->regionParams[9].presRegion = &m_resBrcDataBuffer; // Region 9 HuC outputs BRC data
6765 virtualAddrParams->regionParams[9].isWritable = true;
6766 virtualAddrParams->regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource; // Region 10 - SLB for stitching cmd output from Huc
6767 virtualAddrParams->regionParams[10].isWritable = true;
6768 virtualAddrParams->regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource; // Region 15 [In/Out] - Tile Record Buffer
6769 virtualAddrParams->regionParams[15].dwOffset = 0;
6770 return eStatus;
6771 }
6772
SetDmemHuCPakIntegrateStitch(PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)6773 MOS_STATUS CodechalVdencHevcStateG12::SetDmemHuCPakIntegrateStitch(
6774 PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)
6775 {
6776 CODECHAL_ENCODE_FUNCTION_ENTER;
6777
6778 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6779
6780 MOS_LOCK_PARAMS lockFlagsWriteOnly;
6781 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
6782 lockFlagsWriteOnly.WriteOnly = true;
6783
6784 int32_t currentPass = GetCurrentPass();
6785
6786 HucPakStitchDmemVdencG12 *hucPakStitchDmem = (HucPakStitchDmemVdencG12 *)m_osInterface->pfnLockResource(
6787 m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]), &lockFlagsWriteOnly);
6788 CODECHAL_ENCODE_CHK_NULL_RETURN(hucPakStitchDmem);
6789
6790 MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakStitchDmemVdencG12));
6791
6792 // reset all the offsets to -1
6793 uint32_t TotalOffsetSize = sizeof(hucPakStitchDmem->TileSizeRecord_offset) +
6794 sizeof(hucPakStitchDmem->VDENCSTAT_offset) +
6795 sizeof(hucPakStitchDmem->HEVC_PAKSTAT_offset) +
6796 sizeof(hucPakStitchDmem->HEVC_Streamout_offset) +
6797 sizeof(hucPakStitchDmem->VP9_PAK_STAT_offset) +
6798 sizeof(hucPakStitchDmem->Vp9CounterBuffer_offset);
6799 MOS_FillMemory(hucPakStitchDmem, TotalOffsetSize, 0xFF);
6800
6801 uint16_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
6802 uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
6803 CODECHAL_ENCODE_ASSERT(numTileColumns > 0 && numTileColumns % 2 == 0); //numTileColumns is nonzero and even number; 2 or 4
6804 CODECHAL_ENCODE_ASSERT(m_numPipe > 0 && m_numPipe % 2 == 0 && numTileColumns <= m_numPipe); //ucNumPipe is nonzero and even number; 2 or 4
6805 uint16_t numTiles = numTileRows * numTileColumns;
6806 uint16_t numTilesPerPipe = m_numTiles / m_numPipe;
6807 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
6808 CODECHAL_ENCODE_CHK_NULL_RETURN(tileParams);
6809
6810 hucPakStitchDmem->PicWidthInPixel = (uint16_t)m_frameWidth;
6811 hucPakStitchDmem->PicHeightInPixel = (uint16_t)m_frameHeight;
6812 hucPakStitchDmem->TotalNumberOfPAKs = 0;
6813 hucPakStitchDmem->Codec = 2; //HEVC DP CQP
6814 hucPakStitchDmem->MAXPass = 1;
6815 hucPakStitchDmem->CurrentPass = 1;
6816 hucPakStitchDmem->MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
6817 hucPakStitchDmem->CabacZeroWordFlag = false;
6818 hucPakStitchDmem->bitdepth_luma = m_hevcSeqParams->bit_depth_luma_minus8 + 8; // default: 8
6819 hucPakStitchDmem->bitdepth_chroma = m_hevcSeqParams->bit_depth_chroma_minus8 + 8; // default: 8
6820 hucPakStitchDmem->ChromaFormatIdc = m_hevcSeqParams->chroma_format_idc;
6821 hucPakStitchDmem->TotalSizeInCommandBuffer = m_numTiles * CODECHAL_CACHELINE_SIZE;
6822 // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record
6823 hucPakStitchDmem->OffsetInCommandBuffer = tileParams[m_numTiles - 1].TileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8;
6824 hucPakStitchDmem->LastTileBS_StartInBytes = (tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE) & (CODECHAL_PAGE_SIZE - 1);
6825
6826 hucPakStitchDmem->StitchEnable = true;
6827 hucPakStitchDmem->StitchCommandOffset = 0;
6828 hucPakStitchDmem->BBEndforStitch = HUC_BATCH_BUFFER_END;
6829
6830 //Set the kernel output offsets
6831 hucPakStitchDmem->TileSizeRecord_offset[0] = m_hevcFrameStatsOffset.uiTileSizeRecord;
6832 hucPakStitchDmem->HEVC_PAKSTAT_offset[0] = 0xFFFFFFFF;
6833 hucPakStitchDmem->HEVC_Streamout_offset[0] = 0xFFFFFFFF;
6834 hucPakStitchDmem->VDENCSTAT_offset[0] = 0xFFFFFFFF;
6835
6836 for (auto i = 0; i < m_numPipe; i++)
6837 {
6838 hucPakStitchDmem->NumTiles[i] = numTilesPerPipe;
6839
6840 // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic.
6841 // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region.
6842 hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiTileSizeRecord) +
6843 m_hevcTileStatsOffset.uiTileSizeRecord;
6844 }
6845
6846 m_osInterface->pfnUnlockResource(m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]));
6847
6848 MOS_ZeroMemory(dmemParams, sizeof(MHW_VDBOX_HUC_DMEM_STATE_PARAMS));
6849 dmemParams->presHucDataSource = &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]);
6850 dmemParams->dwDataLength = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemVdencG12), CODECHAL_CACHELINE_SIZE);
6851 dmemParams->dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
6852
6853 return eStatus;
6854 }
6855
SetDmemHuCPakIntegrate(PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)6856 MOS_STATUS CodechalVdencHevcStateG12::SetDmemHuCPakIntegrate(
6857 PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams)
6858 {
6859 CODECHAL_ENCODE_FUNCTION_ENTER;
6860
6861 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6862
6863 MOS_LOCK_PARAMS lockFlagsWriteOnly;
6864 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
6865 lockFlagsWriteOnly.WriteOnly = true;
6866
6867 int32_t currentPass = GetCurrentPass();
6868 if (currentPass < 0 || currentPass >= CODECHAL_VDENC_BRC_NUM_OF_PASSES)
6869 {
6870 eStatus = MOS_STATUS_INVALID_PARAMETER;
6871 return eStatus;
6872 }
6873
6874 HucPakStitchDmemVdencG12* hucPakStitchDmem = (HucPakStitchDmemVdencG12*)m_osInterface->pfnLockResource(
6875 m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]), &lockFlagsWriteOnly);
6876 CODECHAL_ENCODE_CHK_NULL_RETURN(hucPakStitchDmem);
6877 MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakStitchDmemVdencG12));
6878
6879 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
6880 CODECHAL_ENCODE_CHK_NULL_RETURN(tileParams);
6881
6882 // Reset all the offsets to be shared in the huc dmem (6*5 DW's)
6883 MOS_FillMemory(hucPakStitchDmem, 120, 0xFF);
6884
6885 uint16_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
6886 uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
6887 uint16_t numTiles = numTileRows * numTileColumns;
6888 uint16_t numTilesPerPipe = m_numTiles / m_numPipe;
6889
6890 hucPakStitchDmem->TotalSizeInCommandBuffer = m_numTiles * CODECHAL_CACHELINE_SIZE;
6891 // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record
6892 hucPakStitchDmem->OffsetInCommandBuffer = (m_numTiles - 1) * CODECHAL_CACHELINE_SIZE + 8;
6893 hucPakStitchDmem->PicWidthInPixel = (uint16_t)m_frameWidth;
6894 hucPakStitchDmem->PicHeightInPixel = (uint16_t)m_frameHeight;
6895 hucPakStitchDmem->TotalNumberOfPAKs = m_numPipe;
6896 hucPakStitchDmem->Codec = 2; // 1: HEVC DP; 2: HEVC VDEnc; 3: VP9 VDEnc
6897 hucPakStitchDmem->MAXPass = m_brcEnabled ? CODECHAL_VDENC_BRC_NUM_OF_PASSES : 1;
6898 hucPakStitchDmem->CurrentPass = (uint8_t) currentPass + 1; // Current BRC pass [1..MAXPass]
6899 hucPakStitchDmem->MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
6900 hucPakStitchDmem->CabacZeroWordFlag = false;
6901 hucPakStitchDmem->bitdepth_luma = m_hevcSeqParams->bit_depth_luma_minus8 + 8; // default: 8
6902 hucPakStitchDmem->bitdepth_chroma = m_hevcSeqParams->bit_depth_chroma_minus8 + 8; // default: 8
6903 hucPakStitchDmem->ChromaFormatIdc = m_hevcSeqParams->chroma_format_idc;
6904 hucPakStitchDmem->LastTileBS_StartInBytes = (tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE) & (CODECHAL_PAGE_SIZE - 1);
6905 hucPakStitchDmem->PIC_STATE_StartInBytes = (uint16_t)m_picStateCmdStartInBytes;
6906 CODECHAL_ENCODE_VERBOSEMESSAGE("last tile offset = 0x%x, LastTileBS_StartInBytes =0x%x, (tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE), hucPakStitchDmem->LastTileBS_StartInBytes");
6907 if(m_enableTileStitchByHW)
6908 {
6909 hucPakStitchDmem->StitchEnable = true;
6910 hucPakStitchDmem->StitchCommandOffset = 0;
6911 hucPakStitchDmem->BBEndforStitch = HUC_BATCH_BUFFER_END;
6912 }
6913
6914 if (m_numPipe > 1)
6915 {
6916 //Set the kernel output offsets
6917 hucPakStitchDmem->HEVC_PAKSTAT_offset[0] = m_hevcFrameStatsOffset.uiHevcPakStatistics;
6918 hucPakStitchDmem->HEVC_Streamout_offset[0] = m_hevcFrameStatsOffset.uiHevcSliceStreamout;
6919 hucPakStitchDmem->TileSizeRecord_offset[0] = m_hevcFrameStatsOffset.uiTileSizeRecord;
6920 hucPakStitchDmem->VDENCSTAT_offset[0] = m_hevcFrameStatsOffset.uiVdencStatistics;
6921
6922 // Calculate number of slices that execute on a single pipe
6923 for (auto tileRow = 0; tileRow < numTileRows; tileRow++)
6924 {
6925 for (auto tileCol = 0; tileCol < numTileColumns; tileCol++)
6926 {
6927 PCODEC_ENCODER_SLCDATA slcData = m_slcData;
6928 uint16_t slcCount, idx, sliceNumInTile = 0;
6929
6930 idx = tileRow * numTileColumns + tileCol;
6931 for (slcCount = 0; slcCount < m_numSlices; slcCount++)
6932 {
6933 bool lastSliceInTile = false, sliceInTile = false;
6934
6935 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
6936 &tileParams[idx],
6937 &sliceInTile,
6938 &lastSliceInTile));
6939
6940 if (!sliceInTile)
6941 {
6942 continue;
6943 }
6944
6945 sliceNumInTile++;
6946 } // end of slice
6947 if (0 == sliceNumInTile)
6948 {
6949 // One tile must have at least one slice
6950 CODECHAL_ENCODE_ASSERT(false);
6951 eStatus = MOS_STATUS_INVALID_PARAMETER;
6952 break;
6953 }
6954
6955 if (sliceNumInTile > 1 && (numTileColumns > 1 || numTileRows > 1))
6956 {
6957 CODECHAL_ENCODE_ASSERTMESSAGE("Multi-slices in a tile is not supported!");
6958 return MOS_STATUS_INVALID_PARAMETER;
6959 }
6960 // Set the number of slices per pipe in the Dmem structure
6961 hucPakStitchDmem->NumSlices[tileCol] += sliceNumInTile;
6962 }
6963 }
6964
6965 for (auto i = 0; i < m_numPipe; i++)
6966 {
6967 hucPakStitchDmem->NumTiles[i] = numTilesPerPipe;
6968 hucPakStitchDmem->NumSlices[i] = numTilesPerPipe; // Assuming 1 slice/ tile. To do: change this later.
6969
6970 // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic.
6971 // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region.
6972 hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiTileSizeRecord) + m_hevcTileStatsOffset.uiTileSizeRecord;
6973 hucPakStitchDmem->HEVC_PAKSTAT_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiHevcPakStatistics) + m_hevcTileStatsOffset.uiHevcPakStatistics;
6974 hucPakStitchDmem->VDENCSTAT_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiVdencStatistics) + m_hevcTileStatsOffset.uiVdencStatistics;
6975 hucPakStitchDmem->HEVC_Streamout_offset[i + 1] = (i * hucPakStitchDmem->NumSlices[i] * CODECHAL_CACHELINE_SIZE) + m_hevcTileStatsOffset.uiHevcSliceStreamout;
6976 }
6977 }
6978 else
6979 {
6980 hucPakStitchDmem->NumTiles[0] = numTiles;
6981 hucPakStitchDmem->TotalNumberOfPAKs = m_numPipe;
6982
6983 // non-scalable mode, only VDEnc statistics need to be aggregated
6984 hucPakStitchDmem->VDENCSTAT_offset[0] = m_hevcFrameStatsOffset.uiVdencStatistics;
6985 hucPakStitchDmem->VDENCSTAT_offset[1] = m_hevcTileStatsOffset.uiVdencStatistics;
6986 }
6987
6988 m_osInterface->pfnUnlockResource(m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]));
6989
6990 MOS_ZeroMemory(dmemParams, sizeof(MHW_VDBOX_HUC_DMEM_STATE_PARAMS));
6991 dmemParams->presHucDataSource = &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]);
6992 dmemParams->dwDataLength = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemVdencG12), CODECHAL_CACHELINE_SIZE);
6993 dmemParams->dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
6994
6995 return eStatus;
6996 }
6997
HucPakIntegrate(PMOS_COMMAND_BUFFER cmdBuffer)6998 MOS_STATUS CodechalVdencHevcStateG12::HucPakIntegrate(
6999 PMOS_COMMAND_BUFFER cmdBuffer)
7000 {
7001 CODECHAL_ENCODE_FUNCTION_ENTER;
7002
7003 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7004
7005 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
7006
7007 CODECHAL_ENCODE_CHK_COND_RETURN(
7008 (m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()),
7009 "ERROR - vdbox index exceed the maximum");
7010
7011 auto mmioRegisters = m_hwInterface->GetHucInterface()->GetMmioRegisters(m_vdboxIndex);
7012
7013 // load kernel from WOPCM into L2 storage RAM
7014 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
7015 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
7016 imemParams.dwKernelDescriptor = VDBOX_HUC_PAK_INTEGRATION_KERNEL_DESCRIPTOR;
7017
7018 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucImemStateCmd(cmdBuffer, &imemParams));
7019
7020 // pipe mode select
7021 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
7022 pipeModeSelectParams.Mode = m_mode;
7023 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
7024
7025 // DMEM set
7026 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
7027 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakIntegrate(&dmemParams));
7028 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucDmemStateCmd(cmdBuffer, &dmemParams));
7029
7030 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
7031 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCPakIntegrate(&virtualAddrParams));
7032 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucVirtualAddrStateCmd(cmdBuffer, &virtualAddrParams));
7033
7034 // Write HUC_STATUS2 mask - bit 6 - valid IMEM loaded
7035 MHW_MI_STORE_DATA_PARAMS storeDataParams;
7036 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
7037 storeDataParams.pOsResource = &m_resHucStatus2Buffer;
7038 storeDataParams.dwResourceOffset = 0;
7039 storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatus2ImemLoadedMask();
7040 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
7041
7042 // Store HUC_STATUS2 register
7043 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
7044 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
7045 storeRegParams.presStoreBuffer = &m_resHucStatus2Buffer;
7046 storeRegParams.dwOffset = sizeof(uint32_t);
7047 storeRegParams.dwRegister = mmioRegisters->hucStatus2RegOffset;
7048 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams));
7049
7050 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(cmdBuffer));
7051
7052 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucStartCmd(cmdBuffer, true));
7053
7054 // wait Huc completion (use HEVC bit for now)
7055 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
7056 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
7057 vdPipeFlushParams.Flags.bFlushHEVC = 1;
7058 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
7059 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetVdencInterface()->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams));
7060
7061 // Flush the engine to ensure memory written out
7062 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
7063 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
7064 flushDwParams.bVideoPipelineCacheInvalidate = true;
7065 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
7066
7067 EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
7068
7069 uint32_t baseOffset =
7070 (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource
7071
7072 // Write HUC_STATUS mask
7073 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
7074 storeDataParams.pOsResource = &encodeStatusBuf.resStatusBuffer;
7075 storeDataParams.dwResourceOffset = baseOffset + encodeStatusBuf.dwHuCStatusMaskOffset;
7076 storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatusReEncodeMask();
7077 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
7078 cmdBuffer,
7079 &storeDataParams));
7080
7081 // store HUC_STATUS register
7082 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
7083 storeRegParams.presStoreBuffer = &encodeStatusBuf.resStatusBuffer;
7084 storeRegParams.dwOffset = baseOffset + encodeStatusBuf.dwHuCStatusRegOffset;
7085 storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
7086 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(
7087 cmdBuffer,
7088 &storeRegParams));
7089
7090 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, cmdBuffer, false));
7091 CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(cmdBuffer));
7092
7093 return eStatus;
7094 }
7095
HucPakIntegrateStitch(PMOS_COMMAND_BUFFER cmdBuffer)7096 MOS_STATUS CodechalVdencHevcStateG12::HucPakIntegrateStitch(
7097 PMOS_COMMAND_BUFFER cmdBuffer)
7098 {
7099 CODECHAL_ENCODE_FUNCTION_ENTER;
7100
7101 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7102
7103 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
7104
7105 CODECHAL_ENCODE_CHK_COND_RETURN(
7106 (m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()),
7107 "ERROR - vdbox index exceed the maximum");
7108
7109 auto mmioRegisters = m_hwInterface->GetHucInterface()->GetMmioRegisters(m_vdboxIndex);
7110
7111 // load kernel from WOPCM into L2 storage RAM
7112 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
7113 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
7114 imemParams.dwKernelDescriptor = VDBOX_HUC_PAK_INTEGRATION_KERNEL_DESCRIPTOR;
7115
7116 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucImemStateCmd(cmdBuffer, &imemParams));
7117
7118 // pipe mode select
7119 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
7120 pipeModeSelectParams.Mode = m_mode;
7121 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
7122
7123 // DMEM set
7124 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
7125 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakIntegrateStitch(&dmemParams));
7126 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucDmemStateCmd(cmdBuffer, &dmemParams));
7127
7128 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
7129 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCPakIntegrateStitch(&virtualAddrParams));
7130 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucVirtualAddrStateCmd(cmdBuffer, &virtualAddrParams));
7131
7132 // Write HUC_STATUS2 mask - bit 6 - valid IMEM loaded
7133 MHW_MI_STORE_DATA_PARAMS storeDataParams;
7134 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
7135 storeDataParams.pOsResource = &m_resHucStatus2Buffer;
7136 storeDataParams.dwResourceOffset = 0;
7137 storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatus2ImemLoadedMask();
7138 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
7139
7140 // Store HUC_STATUS2 register
7141 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
7142 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
7143 storeRegParams.presStoreBuffer = &m_resHucStatus2Buffer;
7144 storeRegParams.dwOffset = sizeof(uint32_t);
7145 storeRegParams.dwRegister = mmioRegisters->hucStatus2RegOffset;
7146 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams));
7147
7148 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(cmdBuffer));
7149
7150 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucStartCmd(cmdBuffer, true));
7151
7152 // wait Huc completion (use HEVC bit for now)
7153 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
7154 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
7155 vdPipeFlushParams.Flags.bFlushHEVC = 1;
7156 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
7157 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetVdencInterface()->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams));
7158
7159 // Flush the engine to ensure memory written out
7160 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
7161 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
7162 flushDwParams.bVideoPipelineCacheInvalidate = true;
7163 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
7164
7165 EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
7166
7167 uint32_t baseOffset =
7168 (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource
7169
7170 // Write HUC_STATUS mask
7171 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
7172 storeDataParams.pOsResource = &encodeStatusBuf.resStatusBuffer;
7173 storeDataParams.dwResourceOffset = baseOffset + encodeStatusBuf.dwHuCStatusMaskOffset;
7174 storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatusReEncodeMask();
7175 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
7176 cmdBuffer,
7177 &storeDataParams));
7178
7179 // store HUC_STATUS register
7180 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
7181 storeRegParams.presStoreBuffer = &encodeStatusBuf.resStatusBuffer;
7182 storeRegParams.dwOffset = baseOffset + encodeStatusBuf.dwHuCStatusRegOffset;
7183 storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
7184 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(
7185 cmdBuffer,
7186 &storeRegParams));
7187
7188 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, cmdBuffer, false));
7189 CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(cmdBuffer));
7190
7191 return eStatus;
7192 }
7193
CreateMhwParams()7194 void CodechalVdencHevcStateG12::CreateMhwParams()
7195 {
7196 m_sliceStateParams = MOS_New(MHW_VDBOX_HEVC_SLICE_STATE_G12);
7197 m_pipeModeSelectParams = MOS_New(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12);
7198 m_pipeBufAddrParams = MOS_New(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS_G12);
7199 }
7200
CalculatePictureStateCommandSize()7201 MOS_STATUS CodechalVdencHevcStateG12::CalculatePictureStateCommandSize()
7202 {
7203 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7204
7205 CODECHAL_ENCODE_FUNCTION_ENTER;
7206
7207 MHW_VDBOX_STATE_CMDSIZE_PARAMS_G12 stateCmdSizeParams;
7208 CODECHAL_ENCODE_CHK_STATUS_RETURN(
7209 m_hwInterface->GetHxxStateCommandSize(
7210 CODECHAL_ENCODE_MODE_HEVC,
7211 &m_defaultPictureStatesSize,
7212 &m_defaultPicturePatchListSize,
7213 &stateCmdSizeParams));
7214
7215 return eStatus;
7216 }
7217
AddHcpPipeBufAddrCmd(PMOS_COMMAND_BUFFER cmdBuffer)7218 MOS_STATUS CodechalVdencHevcStateG12::AddHcpPipeBufAddrCmd(
7219 PMOS_COMMAND_BUFFER cmdBuffer)
7220 {
7221 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7222
7223 CODECHAL_ENCODE_FUNCTION_ENTER;
7224
7225 #ifdef _MMC_SUPPORTED
7226 m_mmcState->SetPipeBufAddr(m_pipeBufAddrParams);
7227 // Recon P010v MMC state set from RC for compression write
7228 // Reference P010v MMC state set from MC for compression read
7229 if (m_reconSurface.Format == Format_P010 && m_pipeBufAddrParams && MmcEnable(m_pipeBufAddrParams->PreDeblockSurfMmcState))
7230 {
7231 auto paramsG12 = dynamic_cast<PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS_G12>(m_pipeBufAddrParams);
7232 MHW_CHK_NULL_RETURN(paramsG12);
7233 paramsG12->bSpecificReferencedMmcRequired = true;
7234 paramsG12->ReferencesMmcState = m_pipeBufAddrParams->PreDeblockSurfMmcState;
7235
7236 m_pipeBufAddrParams->PreDeblockSurfMmcState = MOS_MEMCOMP_RC;
7237 }
7238 #endif
7239 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(cmdBuffer, m_pipeBufAddrParams));
7240
7241 return eStatus;
7242 }
7243
SetTileData(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 * tileCodingParams)7244 MOS_STATUS CodechalVdencHevcStateG12::SetTileData(
7245 MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12* tileCodingParams)
7246 {
7247 CODECHAL_ENCODE_FUNCTION_ENTER;
7248
7249 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7250
7251 if (!m_hevcPicParams->tiles_enabled_flag)
7252 {
7253 return eStatus;
7254 }
7255
7256 uint32_t colBd[100] = { 0 };
7257 uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1;
7258 for (uint32_t i = 0; i < numTileColumns; i++)
7259 {
7260 colBd[i + 1] = colBd[i] + m_hevcPicParams->tile_column_width[i];
7261 }
7262
7263 uint32_t rowBd[100] = { 0 };
7264 uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1;
7265 for (uint32_t i = 0; i < numTileRows; i++)
7266 {
7267 rowBd[i + 1] = rowBd[i] + m_hevcPicParams->tile_row_height[i];
7268 }
7269
7270 m_numTiles = numTileRows * numTileColumns;
7271 if (m_numTiles > CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_VDENC_MIN_TILE_WIDTH_SIZE) *
7272 CODECHAL_GET_HEIGHT_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_VDENC_MIN_TILE_HEIGHT_SIZE))
7273 {
7274 return MOS_STATUS_INVALID_PARAMETER;
7275 }
7276 m_numTileRows = numTileRows;
7277
7278 uint32_t const numCuRecordTab[] = { 1, 4, 16, 64 }; //LCU: 8x8->1, 16x16->4, 32x32->16, 64x64->64
7279 uint32_t numCuRecord = numCuRecordTab[MOS_MIN(3, m_hevcSeqParams->log2_max_coding_block_size_minus3)];
7280 uint32_t maxBytePerLCU = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
7281 maxBytePerLCU = maxBytePerLCU * maxBytePerLCU; // number of pixels per LCU
7282 maxBytePerLCU = maxBytePerLCU * 3 / (m_is10BitHevc ? 1 : 2); //assume 4:2:0 format
7283 uint32_t bitstreamByteOffset = 0, saoRowstoreOffset = 0, cuLevelStreamoutOffset = 0, sseRowstoreOffset = 0;
7284 int32_t frameWidthInMinCb = m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1;
7285 int32_t frameHeightInMinCb = m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1;
7286 int32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
7287 uint32_t ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
7288 uint32_t streamInWidthinLCU = MOS_ROUNDUP_DIVIDE((frameWidthInMinCb << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
7289 uint32_t numLcuInPic = 0;
7290 uint32_t tileStartLCUAddr = 0;
7291
7292 for (uint32_t numLcusInTiles = 0, i = 0; i < numTileRows; i++)
7293 {
7294 for (uint32_t j = 0; j < numTileColumns; j++)
7295 {
7296 numLcuInPic += m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j];
7297 }
7298 }
7299
7300 uint32_t numSliceInTile = 0;
7301 uint64_t activeBitstreamSize = (uint64_t)m_encodeParams.dwBitstreamSize;
7302 // There would be padding at the end of last tile in CBR, reserve dedicated part in the BS buf
7303 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR)
7304 {
7305 // Assume max padding num < target frame size derived from target bit rate and frame rate
7306 uint32_t actualFrameRate = m_hevcSeqParams->FrameRate.Numerator / m_hevcSeqParams->FrameRate.Denominator;
7307 uint64_t reservedPart = (uint64_t)m_hevcSeqParams->TargetBitRate / 8 / (uint64_t)actualFrameRate * 1024;
7308
7309 if (reservedPart > activeBitstreamSize)
7310 {
7311 CODECHAL_ENCODE_ASSERTMESSAGE("Frame size cal from target Bit rate is larger than BS buf! Issues in CBR paras!");
7312 return MOS_STATUS_INVALID_PARAMETER;
7313 }
7314
7315 // Capping the reserved part to 1/10 of bs buf size
7316 if (reservedPart > activeBitstreamSize / 10)
7317 {
7318 reservedPart = activeBitstreamSize / 10;
7319 }
7320
7321 activeBitstreamSize -= reservedPart;
7322 }
7323
7324 for (uint32_t numLcusInTiles = 0, i = 0; i < numTileRows; i++)
7325 {
7326 for (uint32_t j = 0; j < numTileColumns; j++)
7327 {
7328 uint32_t idx = i * numTileColumns + j;
7329 uint32_t numLcuInTile = m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j];
7330
7331 tileCodingParams[idx].TileStartLCUX = colBd[j];
7332 tileCodingParams[idx].TileStartLCUY = rowBd[i];
7333
7334 tileCodingParams[idx].TileColumnStoreSelect = j % 2;
7335 tileCodingParams[idx].TileRowStoreSelect = i % 2;
7336
7337 if (j != numTileColumns - 1)
7338 {
7339 tileCodingParams[idx].TileWidthInMinCbMinus1 = (m_hevcPicParams->tile_column_width[j] << shift) - 1;
7340 tileCodingParams[idx].IsLastTileofRow = false;
7341 }
7342 else
7343 {
7344 tileCodingParams[idx].TileWidthInMinCbMinus1 = (frameWidthInMinCb - (colBd[j] << shift)) - 1;
7345 tileCodingParams[idx].IsLastTileofRow = true;
7346
7347 }
7348
7349 if (i != numTileRows - 1)
7350 {
7351 tileCodingParams[idx].IsLastTileofColumn = false;
7352 tileCodingParams[idx].TileHeightInMinCbMinus1 = (m_hevcPicParams->tile_row_height[i] << shift) - 1;
7353 }
7354 else
7355 {
7356 tileCodingParams[idx].TileHeightInMinCbMinus1 = (frameHeightInMinCb - (rowBd[i] << shift)) - 1;
7357 tileCodingParams[idx].IsLastTileofColumn = true;
7358 }
7359
7360 tileCodingParams[idx].NumOfTilesInFrame = m_numTiles;
7361 tileCodingParams[idx].NumOfTileColumnsInFrame = numTileColumns;
7362 tileCodingParams[idx].CuRecordOffset = MOS_ALIGN_CEIL(((numCuRecord * numLcusInTiles) * m_hcpInterface->GetHevcEncCuRecordSize()),
7363 CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
7364 tileCodingParams[idx].NumberOfActiveBePipes = (m_numPipe > 1) ? m_numPipe : 1;
7365
7366 tileCodingParams[idx].PakTileStatisticsOffset = 9 * idx;
7367 tileCodingParams[idx].TileSizeStreamoutOffset = idx;
7368 tileCodingParams[idx].Vp9ProbabilityCounterStreamoutOffset = 0;
7369 tileCodingParams[idx].presHcpSyncBuffer = &m_resHcpScalabilitySyncBuffer.sResource;
7370 tileCodingParams[idx].CuLevelStreamoutOffset = cuLevelStreamoutOffset;
7371 tileCodingParams[idx].SliceSizeStreamoutOffset = numSliceInTile;
7372 tileCodingParams[idx].SseRowstoreOffset = sseRowstoreOffset;
7373 tileCodingParams[idx].BitstreamByteOffset = bitstreamByteOffset;
7374 tileCodingParams[idx].SaoRowstoreOffset = saoRowstoreOffset;
7375
7376 uint32_t tileHeightInLCU = MOS_ROUNDUP_DIVIDE(((tileCodingParams[idx].TileHeightInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
7377 uint32_t tileWidthInLCU = MOS_ROUNDUP_DIVIDE(((tileCodingParams[idx].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
7378
7379 //StreamIn data is 4 Cachelines per LCU
7380 tileCodingParams[idx].TileStreaminOffset = 4 * (tileCodingParams[idx].TileStartLCUY * streamInWidthinLCU + tileCodingParams[idx].TileStartLCUX * tileHeightInLCU);
7381 tileCodingParams[idx].SliceSizeStreamoutOffset = tileStartLCUAddr;
7382 tileStartLCUAddr += (tileWidthInLCU * tileHeightInLCU);
7383
7384 cuLevelStreamoutOffset += (tileCodingParams[idx].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[idx].TileHeightInMinCbMinus1 + 1) * 16 / CODECHAL_CACHELINE_SIZE;
7385 sseRowstoreOffset += ((m_hevcPicParams->tile_column_width[j] + 3) * m_sizeOfSseSrcPixelRowStoreBufferPerLcu) / CODECHAL_CACHELINE_SIZE;
7386 saoRowstoreOffset += (MOS_ALIGN_CEIL(m_hevcPicParams->tile_column_width[j], 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU) / CODECHAL_CACHELINE_SIZE;
7387
7388 uint64_t totalSizeTemp = (uint64_t)activeBitstreamSize * (uint64_t)numLcuInTile;
7389 uint32_t bitStreamSizePerTile = (uint32_t)(totalSizeTemp / (uint64_t)numLcuInPic) + ((totalSizeTemp % (uint64_t)numLcuInPic) ? 1 : 0);
7390 bitstreamByteOffset += MOS_ALIGN_CEIL(bitStreamSizePerTile, CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
7391
7392 numLcusInTiles += numLcuInTile;
7393
7394 for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
7395 {
7396 bool lastSliceInTile = false, sliceInTile = false;
7397 CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount,
7398 &tileCodingParams[idx],
7399 &sliceInTile,
7400 &lastSliceInTile));
7401 numSliceInTile += (sliceInTile ? 1 : 0);
7402 }
7403 }
7404
7405 // same row store buffer for different tile rows.
7406 saoRowstoreOffset = 0;
7407 sseRowstoreOffset = 0;
7408 }
7409
7410 return eStatus;
7411 }
7412
IsSliceInTile(uint32_t sliceNumber,PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 currentTile,bool * sliceInTile,bool * lastSliceInTile)7413 MOS_STATUS CodechalVdencHevcStateG12::IsSliceInTile(
7414 uint32_t sliceNumber,
7415 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 currentTile,
7416 bool *sliceInTile,
7417 bool *lastSliceInTile)
7418 {
7419 CODECHAL_ENCODE_FUNCTION_ENTER;
7420
7421 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7422
7423 CODECHAL_ENCODE_CHK_NULL_RETURN(currentTile);
7424 CODECHAL_ENCODE_CHK_NULL_RETURN(sliceInTile);
7425 CODECHAL_ENCODE_CHK_NULL_RETURN(lastSliceInTile);
7426
7427 uint32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3;
7428 uint32_t residual = (1 << shift) - 1;
7429 uint32_t frameWidthInLCU = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift;
7430 uint32_t frameHeightInLCU = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift;
7431
7432 PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = &m_hevcSliceParams[sliceNumber];
7433 uint32_t sliceStartLCU = hevcSlcParams->slice_segment_address;
7434 uint32_t sliceLCUx = sliceStartLCU % frameWidthInLCU;
7435 uint32_t sliceLCUy = sliceStartLCU / frameWidthInLCU;
7436
7437 uint32_t tileColumnWidth = (currentTile->TileWidthInMinCbMinus1 + 1 + residual) >> shift;
7438 uint32_t tileRowHeight = (currentTile->TileHeightInMinCbMinus1 + 1 + residual) >> shift;
7439 if (sliceLCUx < currentTile->TileStartLCUX ||
7440 sliceLCUy < currentTile->TileStartLCUY ||
7441 sliceLCUx >= currentTile->TileStartLCUX + tileColumnWidth ||
7442 sliceLCUy >= currentTile->TileStartLCUY + tileRowHeight
7443 )
7444 {
7445 // slice start is not in the tile boundary
7446 *lastSliceInTile = *sliceInTile = false;
7447 return eStatus;
7448 }
7449
7450 sliceLCUx += (hevcSlcParams->NumLCUsInSlice - 1) % tileColumnWidth;
7451 sliceLCUy += (hevcSlcParams->NumLCUsInSlice - 1) / tileColumnWidth;
7452
7453 if (sliceLCUx >= currentTile->TileStartLCUX + tileColumnWidth)
7454 {
7455 sliceLCUx -= tileColumnWidth;
7456 sliceLCUy++;
7457 }
7458
7459 if (sliceLCUx < currentTile->TileStartLCUX ||
7460 sliceLCUy < currentTile->TileStartLCUY ||
7461 sliceLCUx >= currentTile->TileStartLCUX + tileColumnWidth ||
7462 sliceLCUy >= currentTile->TileStartLCUY + tileRowHeight
7463 )
7464 {
7465 // last LCU of the slice is out of the tile boundary
7466 *lastSliceInTile = *sliceInTile = false;
7467 return eStatus;
7468 }
7469
7470 *sliceInTile = true;
7471
7472 sliceLCUx++;
7473 sliceLCUy++;
7474
7475 // the end of slice is at the boundary of tile
7476 *lastSliceInTile = (
7477 sliceLCUx == currentTile->TileStartLCUX + tileColumnWidth &&
7478 sliceLCUy == currentTile->TileStartLCUY + tileRowHeight);
7479
7480 return eStatus;
7481 }
7482
InitMmcState()7483 MOS_STATUS CodechalVdencHevcStateG12::InitMmcState()
7484 {
7485 CODECHAL_ENCODE_FUNCTION_ENTER;
7486 #ifdef _MMC_SUPPORTED
7487 m_mmcState = MOS_New(CodechalMmcEncodeHevcG12, m_hwInterface, this);
7488 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
7489 #endif
7490 return MOS_STATUS_SUCCESS;
7491 }
7492
7493 #ifdef _ENCODE_VDENC_RESERVED
InitReserveState(CodechalSetting * settings)7494 MOS_STATUS CodechalVdencHevcStateG12::InitReserveState(CodechalSetting * settings)
7495 {
7496 CODECHAL_ENCODE_FUNCTION_ENTER;
7497
7498 m_rsvdState = MOS_New(CodechalVdencHevcG12Rsvd, m_hwInterface, this);
7499 return MOS_STATUS_SUCCESS;
7500 }
7501 #endif
7502
CalculateCommandBufferSize()7503 uint32_t CodechalVdencHevcStateG12::CalculateCommandBufferSize()
7504 {
7505 CODECHAL_ENCODE_FUNCTION_ENTER;
7506
7507 // To be refined later, differentiate BRC and CQP
7508 uint32_t commandBufferSize =
7509 m_pictureStatesSize +
7510 m_extraPictureStatesSize +
7511 (m_sliceStatesSize * m_numSlices) +
7512 m_hucCommandsSize * 5;
7513
7514 if (m_singleTaskPhaseSupported)
7515 {
7516 commandBufferSize *= (m_numPasses + 1);
7517 }
7518
7519 if (m_osInterface->bUsesPatchList && m_hevcPicParams->tiles_enabled_flag)
7520 {
7521 commandBufferSize += (m_tileLevelBatchSize * m_numTiles * CODECHAL_VDENC_BRC_NUM_OF_PASSES);
7522 }
7523
7524 // 4K align since allocation is in chunks of 4K bytes.
7525 commandBufferSize = MOS_ALIGN_CEIL(commandBufferSize, 0x1000);
7526
7527 return commandBufferSize;
7528 }
7529
VerifyCommandBufferSize()7530 MOS_STATUS CodechalVdencHevcStateG12::VerifyCommandBufferSize()
7531 {
7532 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7533
7534 CODECHAL_ENCODE_FUNCTION_ENTER;
7535
7536 if (UseRenderCommandBuffer() || m_numPipe == 1)
7537 {
7538 // legacy mode & resize CommandBuffer Size for every BRC pass
7539 if (!m_singleTaskPhaseSupported)
7540 {
7541 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
7542 }
7543 return eStatus;
7544 }
7545
7546 // virtual engine
7547 uint32_t requestedSize =
7548 m_pictureStatesSize +
7549 m_extraPictureStatesSize +
7550 (m_sliceStatesSize * m_numSlices);
7551
7552 requestedSize += (requestedSize * m_numPassesInOnePipe + m_hucCommandsSize);
7553
7554 // Running in the multiple VDBOX mode
7555 int currentPipe = GetCurrentPipe();
7556 if (currentPipe < 0 || currentPipe >= m_numPipe)
7557 {
7558 eStatus = MOS_STATUS_INVALID_PARAMETER;
7559 return eStatus;
7560 }
7561 int currentPass = GetCurrentPass();
7562 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
7563 {
7564 eStatus = MOS_STATUS_INVALID_PARAMETER;
7565 return eStatus;
7566 }
7567
7568 if (IsFirstPipe() && m_osInterface->bUsesPatchList)
7569 {
7570 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
7571 }
7572
7573 PMOS_COMMAND_BUFFER pCmdBuffer;
7574 if (m_osInterface->phasedSubmission)
7575 {
7576 m_osInterface->pfnVerifyCommandBufferSize(m_osInterface, requestedSize, 0);
7577 return eStatus;
7578 }
7579 else
7580 {
7581 pCmdBuffer = m_singleTaskPhaseSupported ? &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass];
7582 }
7583
7584 if (Mos_ResourceIsNull(&pCmdBuffer->OsResource) ||
7585 m_sizeOfVeBatchBuffer < requestedSize)
7586 {
7587 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
7588
7589 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
7590 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
7591 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
7592 allocParamsForBufferLinear.Format = Format_Buffer;
7593 allocParamsForBufferLinear.dwBytes = requestedSize;
7594 allocParamsForBufferLinear.pBufName = "Batch buffer for each VDBOX";
7595
7596 if (!Mos_ResourceIsNull(&pCmdBuffer->OsResource))
7597 {
7598 if (pCmdBuffer->pCmdBase)
7599 {
7600 m_osInterface->pfnUnlockResource(m_osInterface, &pCmdBuffer->OsResource);
7601 }
7602 m_osInterface->pfnFreeResource(m_osInterface, &pCmdBuffer->OsResource);
7603 }
7604
7605 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
7606 m_osInterface,
7607 &allocParamsForBufferLinear,
7608 &pCmdBuffer->OsResource));
7609
7610 m_sizeOfVeBatchBuffer = requestedSize;
7611 }
7612
7613 if (pCmdBuffer->pCmdBase == nullptr)
7614 {
7615 MOS_LOCK_PARAMS lockParams;
7616 MOS_ZeroMemory(&lockParams, sizeof(lockParams));
7617 lockParams.WriteOnly = true;
7618 pCmdBuffer->pCmdPtr = pCmdBuffer->pCmdBase = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &pCmdBuffer->OsResource, &lockParams);
7619 pCmdBuffer->iRemaining = m_sizeOfVeBatchBuffer;
7620 pCmdBuffer->iOffset = 0;
7621 pCmdBuffer->is1stLvlBB = true;
7622
7623 if (pCmdBuffer->pCmdBase == nullptr)
7624 {
7625 eStatus = MOS_STATUS_NULL_POINTER;
7626 return eStatus;
7627 }
7628 }
7629
7630 return eStatus;
7631 }
7632
GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)7633 MOS_STATUS CodechalVdencHevcStateG12::GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)
7634 {
7635 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7636
7637 CODECHAL_ENCODE_FUNCTION_ENTER;
7638
7639 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
7640 CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
7641
7642 if (UseRenderCommandBuffer() || m_numPipe == 1)
7643 {
7644 // legacy mode
7645 m_realCmdBuffer.pCmdBase = m_realCmdBuffer.pCmdPtr = nullptr;
7646 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, 0));
7647 return eStatus;
7648 }
7649
7650 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &m_realCmdBuffer, 0));
7651
7652 int currentPipe = GetCurrentPipe();
7653 if (currentPipe < 0 || currentPipe >= m_numPipe)
7654 {
7655 eStatus = MOS_STATUS_INVALID_PARAMETER;
7656 return eStatus;
7657 }
7658 int currentPass = GetCurrentPass();
7659 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
7660 {
7661 eStatus = MOS_STATUS_INVALID_PARAMETER;
7662 return eStatus;
7663 }
7664
7665 if (m_osInterface->phasedSubmission)
7666 {
7667 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, currentPipe + 1));
7668
7669 CodecHalEncodeScalability_EncodePhaseToSubmissionType(IsFirstPipe(), cmdBuffer);
7670 if (IsLastPipe())
7671 {
7672 cmdBuffer->iSubmissionType |= SUBMISSION_TYPE_MULTI_PIPE_FLAGS_LAST_PIPE;
7673 }
7674 }
7675 else
7676 {
7677 *cmdBuffer = m_singleTaskPhaseSupported ? m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass];
7678 }
7679
7680 if (m_osInterface->osCpInterface->IsCpEnabled() && cmdBuffer->iOffset == 0)
7681 {
7682 // Insert CP Prolog
7683 CODECHAL_ENCODE_NORMALMESSAGE("Adding cp prolog for secure scalable encode");
7684 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, cmdBuffer));
7685 }
7686 return eStatus;
7687 }
7688
ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)7689 MOS_STATUS CodechalVdencHevcStateG12::ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer)
7690 {
7691 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7692
7693 CODECHAL_ENCODE_FUNCTION_ENTER;
7694
7695 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
7696
7697 if (UseRenderCommandBuffer() || m_numPipe == 1)
7698 {
7699 // legacy mode
7700 m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, 0);
7701 return eStatus;
7702 }
7703
7704 int currentPipe = GetCurrentPipe();
7705 if (currentPipe < 0 || currentPipe >= m_numPipe)
7706 {
7707 eStatus = MOS_STATUS_INVALID_PARAMETER;
7708 return eStatus;
7709 }
7710 int currentPass = GetCurrentPass();
7711 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
7712 {
7713 eStatus = MOS_STATUS_INVALID_PARAMETER;
7714 return eStatus;
7715 }
7716
7717 if (m_osInterface->phasedSubmission)
7718 {
7719 m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, currentPipe + 1);
7720 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
7721 }
7722 else
7723 {
7724 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
7725 m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][passIndex] = *cmdBuffer;
7726 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
7727 }
7728
7729 return eStatus;
7730 }
7731
SubmitCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer,bool bNullRendering)7732 MOS_STATUS CodechalVdencHevcStateG12::SubmitCommandBuffer(
7733 PMOS_COMMAND_BUFFER cmdBuffer,
7734 bool bNullRendering)
7735 {
7736 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7737
7738 CODECHAL_ENCODE_FUNCTION_ENTER;
7739
7740 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
7741
7742 if (IsLastPass())
7743 {
7744 HalOcaInterface::On1stLevelBBEnd(*cmdBuffer, *m_osInterface);
7745 }
7746
7747 if (UseRenderCommandBuffer() || m_numPipe == 1)
7748 {
7749 // legacy mode
7750 if (!UseRenderCommandBuffer()) // Set VE Hints for video contexts only
7751 {
7752 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(cmdBuffer));
7753 }
7754 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, bNullRendering));
7755 return eStatus;
7756 }
7757
7758 bool cmdBufferReadyForSubmit = IsLastPipe();
7759
7760 // In STF, Hold the command buffer submission till last pass
7761 if (m_singleTaskPhaseSupported)
7762 {
7763 cmdBufferReadyForSubmit = cmdBufferReadyForSubmit && IsLastPass();
7764 }
7765
7766 if(!cmdBufferReadyForSubmit)
7767 {
7768 return eStatus;
7769 }
7770
7771 int currentPass = GetCurrentPass();
7772 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
7773 {
7774 eStatus = MOS_STATUS_INVALID_PARAMETER;
7775 return eStatus;
7776 }
7777
7778 if (m_osInterface->phasedSubmission)
7779 {
7780 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
7781 }
7782 else
7783 {
7784 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
7785
7786 for (uint32_t i = 0; i < m_numPipe; i++)
7787 {
7788 PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex];
7789
7790 if(cmdBuffer->pCmdBase)
7791 {
7792 m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
7793 }
7794
7795 cmdBuffer->pCmdBase = 0;
7796 cmdBuffer->iOffset = cmdBuffer->iRemaining = 0;
7797 }
7798 m_sizeOfVeBatchBuffer = 0;
7799
7800 if(eStatus == MOS_STATUS_SUCCESS)
7801 {
7802 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(&m_realCmdBuffer));
7803 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, bNullRendering));
7804 }
7805 }
7806
7807 return eStatus;
7808 }
7809
SendPrologWithFrameTracking(PMOS_COMMAND_BUFFER cmdBuffer,bool frameTrackingRequested,MHW_MI_MMIOREGISTERS * mmioRegister)7810 MOS_STATUS CodechalVdencHevcStateG12::SendPrologWithFrameTracking(
7811 PMOS_COMMAND_BUFFER cmdBuffer,
7812 bool frameTrackingRequested,
7813 MHW_MI_MMIOREGISTERS *mmioRegister)
7814 {
7815 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7816
7817 CODECHAL_ENCODE_FUNCTION_ENTER;
7818
7819 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
7820
7821 CODECHAL_ENCODE_CHK_NULL_RETURN(m_hevcSeqParams);
7822
7823 // Set flag bIsMdfLoad in remote gaming scenario to boost GPU frequency for low latency
7824 cmdBuffer->Attributes.bFrequencyBoost = (m_hevcSeqParams->ScenarioInfo == ESCENARIO_REMOTEGAMING);
7825
7826 MOS_GPU_CONTEXT gpuContext = m_osInterface->pfnGetGpuContext(m_osInterface);
7827
7828 if (UseRenderCommandBuffer())
7829 {
7830 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SendPrologWithFrameTracking(cmdBuffer, frameTrackingRequested, mmioRegister));
7831 return eStatus;
7832 }
7833
7834 #ifdef _MMC_SUPPORTED
7835 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
7836 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SendPrologCmd(m_miInterface, cmdBuffer, gpuContext));
7837 #endif
7838
7839 if (!IsLastPipe())
7840 {
7841 return eStatus;
7842 }
7843
7844 PMOS_COMMAND_BUFFER commandBufferInUse;
7845 if (m_realCmdBuffer.pCmdBase)
7846 {
7847 commandBufferInUse = &m_realCmdBuffer;
7848 }
7849 else
7850 if (cmdBuffer && cmdBuffer->pCmdBase)
7851 {
7852 commandBufferInUse = cmdBuffer;
7853 }
7854 else
7855 {
7856 eStatus = MOS_STATUS_INVALID_PARAMETER;
7857 return eStatus;
7858 }
7859
7860 // initialize command buffer attributes
7861 commandBufferInUse->Attributes.bTurboMode = m_hwInterface->m_turboMode;
7862 commandBufferInUse->Attributes.dwNumRequestedEUSlices = m_hwInterface->m_numRequestedEuSlices;
7863 commandBufferInUse->Attributes.dwNumRequestedSubSlices = m_hwInterface->m_numRequestedSubSlices;
7864 commandBufferInUse->Attributes.dwNumRequestedEUs = m_hwInterface->m_numRequestedEus;
7865 commandBufferInUse->Attributes.bValidPowerGatingRequest = true;
7866
7867 if (frameTrackingRequested && m_frameTrackingEnabled)
7868 {
7869 commandBufferInUse->Attributes.bEnableMediaFrameTracking = true;
7870 commandBufferInUse->Attributes.resMediaFrameTrackingSurface =
7871 &m_encodeStatusBuf.resStatusBuffer;
7872 commandBufferInUse->Attributes.dwMediaFrameTrackingTag = m_storeData;
7873 // Set media frame tracking address offset(the offset from the encoder status buffer page)
7874 commandBufferInUse->Attributes.dwMediaFrameTrackingAddrOffset = 0;
7875 }
7876
7877 MHW_GENERIC_PROLOG_PARAMS genericPrologParams;
7878 MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
7879 genericPrologParams.pOsInterface = m_hwInterface->GetOsInterface();
7880 genericPrologParams.pvMiInterface = m_hwInterface->GetMiInterface();
7881 genericPrologParams.bMmcEnabled = m_mmcState ? m_mmcState->IsMmcEnabled() : false;
7882 genericPrologParams.dwStoreDataValue = m_storeData - 1;
7883
7884 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(commandBufferInUse, &genericPrologParams));
7885
7886 return eStatus;
7887 }
7888
SetSliceStructs()7889 MOS_STATUS CodechalVdencHevcStateG12::SetSliceStructs()
7890 {
7891 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7892 eStatus = CodechalEncodeHevcBase::SetSliceStructs();
7893 CODECHAL_ENCODE_CHK_COND_RETURN((m_lookaheadPass && !m_lowDelay), "RA B frame is not expected in lookahead pass.");
7894 m_numPassesInOnePipe = m_numPasses;
7895 m_numPasses = (m_numPasses + 1) * m_numPipe - 1;
7896 return eStatus;
7897 }
7898
AllocateTileStatistics()7899 MOS_STATUS CodechalVdencHevcStateG12::AllocateTileStatistics()
7900 {
7901 CODECHAL_ENCODE_FUNCTION_ENTER;
7902
7903 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7904
7905 if (!m_hevcPicParams->tiles_enabled_flag)
7906 {
7907 return eStatus;
7908 }
7909
7910 auto num_tile_rows = m_hevcPicParams->num_tile_rows_minus1 + 1;
7911 auto num_tile_columns = m_hevcPicParams->num_tile_columns_minus1 + 1;
7912 auto num_tiles = num_tile_rows * num_tile_columns;
7913
7914 MOS_ZeroMemory(&m_hevcFrameStatsOffset, sizeof(HEVC_TILE_STATS_INFO));
7915 MOS_ZeroMemory(&m_hevcTileStatsOffset, sizeof(HEVC_TILE_STATS_INFO));
7916 MOS_ZeroMemory(&m_hevcStatsSize, sizeof(HEVC_TILE_STATS_INFO));
7917
7918 MOS_LOCK_PARAMS lockFlagsWriteOnly;
7919 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
7920 lockFlagsWriteOnly.WriteOnly = true;
7921
7922 // Set the maximum size based on frame level statistics.
7923 m_hevcStatsSize.uiTileSizeRecord = CODECHAL_CACHELINE_SIZE;
7924 m_hevcStatsSize.uiHevcPakStatistics = m_sizeOfHcpPakFrameStats;
7925 m_hevcStatsSize.uiVdencStatistics = CODECHAL_HEVC_VDENC_STATS_SIZE;
7926 m_hevcStatsSize.uiHevcSliceStreamout = CODECHAL_CACHELINE_SIZE;
7927
7928 // Maintain the offsets to use for patching addresses in to the HuC Pak Integration kernel Aggregated Frame Statistics Output Buffer
7929 // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions
7930 m_hevcFrameStatsOffset.uiTileSizeRecord = 0; // Tile Size Record is not present in resHuCPakAggregatedFrameStatsBuffer
7931 m_hevcFrameStatsOffset.uiHevcPakStatistics = 0;
7932 m_hevcFrameStatsOffset.uiVdencStatistics = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcPakStatistics + m_hevcStatsSize.uiHevcPakStatistics, CODECHAL_PAGE_SIZE);
7933 m_hevcFrameStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiVdencStatistics + m_hevcStatsSize.uiVdencStatistics, CODECHAL_PAGE_SIZE);
7934
7935 // Frame level statistics
7936 m_hwInterface->m_pakIntAggregatedFrameStatsSize = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcSliceStreamout + (m_hevcStatsSize.uiHevcSliceStreamout * m_numLcu), CODECHAL_PAGE_SIZE);
7937
7938 // HEVC Frame Statistics Buffer - Output from HuC PAK Integration kernel
7939 if (Mos_ResourceIsNull(&m_resHuCPakAggregatedFrameStatsBuffer.sResource))
7940 {
7941 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
7942 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
7943 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
7944 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
7945 allocParamsForBufferLinear.Format = Format_Buffer;
7946 allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntAggregatedFrameStatsSize;
7947 allocParamsForBufferLinear.pBufName = "GEN12 HCP Aggregated Frame Statistics Streamout Buffer";
7948
7949 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
7950 m_osInterface,
7951 &allocParamsForBufferLinear,
7952 &m_resHuCPakAggregatedFrameStatsBuffer.sResource));
7953 m_resHuCPakAggregatedFrameStatsBuffer.dwSize = m_hwInterface->m_pakIntAggregatedFrameStatsSize;
7954
7955 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
7956 m_osInterface,
7957 &m_resHuCPakAggregatedFrameStatsBuffer.sResource,
7958 &lockFlagsWriteOnly);
7959
7960 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
7961 MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
7962 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource);
7963 }
7964
7965 // Maintain the offsets to use for patching addresses in to the Tile Based Statistics Buffer
7966 // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions
7967 m_hevcTileStatsOffset.uiTileSizeRecord = 0; // TileReord is in a separated resource
7968 m_hevcTileStatsOffset.uiHevcPakStatistics = 0; // PakStaticstics is head of m_resTileBasedStatisticsBuffer;
7969 m_hevcTileStatsOffset.uiVdencStatistics = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcPakStatistics + (m_hevcStatsSize.uiHevcPakStatistics * num_tiles), CODECHAL_PAGE_SIZE);
7970 m_hevcTileStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiVdencStatistics + (m_hevcStatsSize.uiVdencStatistics * num_tiles), CODECHAL_PAGE_SIZE);
7971 // Combined statistics size for all tiles
7972 m_hwInterface->m_pakIntTileStatsSize = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcSliceStreamout + m_hevcStatsSize.uiHevcSliceStreamout * m_numLcu, CODECHAL_PAGE_SIZE);
7973
7974 // Tile size record size for all tiles
7975 m_hwInterface->m_tileRecordSize = m_hevcStatsSize.uiTileSizeRecord * num_tiles;
7976
7977 if (Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource) || m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_pakIntTileStatsSize)
7978 {
7979 if (!Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource))
7980 {
7981 m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource);
7982 }
7983 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
7984 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
7985 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
7986 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
7987 allocParamsForBufferLinear.Format = Format_Buffer;
7988 allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntTileStatsSize;
7989 allocParamsForBufferLinear.pBufName = "GEN12 HCP Tile Level Statistics Streamout Buffer";
7990
7991 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
7992 m_osInterface,
7993 &allocParamsForBufferLinear,
7994 &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource));
7995 m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize = m_hwInterface->m_pakIntTileStatsSize;
7996
7997 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
7998 m_osInterface,
7999 &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
8000 &lockFlagsWriteOnly);
8001 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8002
8003 MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
8004 m_osInterface->pfnUnlockResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource);
8005 }
8006
8007 // Allocate the updated tile size buffer for PAK integration kernel
8008 if (Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource) || m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_tileRecordSize)
8009 {
8010 if (!Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource))
8011 {
8012 m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource);
8013 }
8014 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
8015 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
8016 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
8017 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
8018 allocParamsForBufferLinear.Format = Format_Buffer;
8019 allocParamsForBufferLinear.dwBytes = m_hwInterface->m_tileRecordSize;
8020 allocParamsForBufferLinear.pBufName = "Tile Record buffer";
8021
8022 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
8023 m_osInterface,
8024 &allocParamsForBufferLinear,
8025 &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource),
8026 "Failed to create GEN12 Tile Record buffer");
8027
8028 m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize = allocParamsForBufferLinear.dwBytes;
8029 }
8030
8031 // Only needed when tile & BRC is enabled, but the size is not changing at frame level
8032 // Move to more properiate place later
8033 if (Mos_ResourceIsNull(&m_resBrcDataBuffer))
8034 {
8035 uint8_t* data;
8036 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
8037
8038 // Pak stitch DMEM
8039 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
8040 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
8041 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
8042 allocParamsForBufferLinear.Format = Format_Buffer;
8043 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemVdencG12), CODECHAL_CACHELINE_SIZE);
8044 allocParamsForBufferLinear.pBufName = "PAK Stitch Dmem Buffer";
8045 auto numOfPasses = CODECHAL_VDENC_BRC_NUM_OF_PASSES;
8046
8047 for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
8048 {
8049 for (auto i = 0; i < numOfPasses; i++)
8050 {
8051 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
8052 m_osInterface->pfnAllocateResource(
8053 m_osInterface,
8054 &allocParamsForBufferLinear,
8055 &m_resHucPakStitchDmemBuffer[k][i]),
8056 "Failed to allocate PAK Stitch Dmem Buffer.");
8057
8058 MOS_LOCK_PARAMS lockFlagsWriteOnly;
8059 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
8060 lockFlagsWriteOnly.WriteOnly = 1;
8061
8062 data = (uint8_t*)m_osInterface->pfnLockResource(
8063 m_osInterface,
8064 &m_resHucPakStitchDmemBuffer[k][i],
8065 &lockFlagsWriteOnly);
8066
8067 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8068
8069 MOS_ZeroMemory(
8070 data,
8071 allocParamsForBufferLinear.dwBytes);
8072
8073 m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucPakStitchDmemBuffer[k][i]);
8074 }
8075 }
8076
8077 // BRC Data Buffer
8078 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_numTiles * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
8079 allocParamsForBufferLinear.pBufName = "BRC Data Buffer";
8080
8081 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(
8082 m_osInterface->pfnAllocateResource(
8083 m_osInterface,
8084 &allocParamsForBufferLinear,
8085 &m_resBrcDataBuffer),
8086 "Failed to allocate BRC Data Buffer Buffer.");
8087
8088 MOS_LOCK_PARAMS lockFlags;
8089 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
8090 lockFlags.WriteOnly = 1;
8091
8092 data = (uint8_t*)m_osInterface->pfnLockResource(
8093 m_osInterface,
8094 &m_resBrcDataBuffer,
8095 &lockFlags);
8096
8097 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8098
8099 MOS_ZeroMemory(
8100 data,
8101 allocParamsForBufferLinear.dwBytes);
8102
8103 m_osInterface->pfnUnlockResource(m_osInterface, &m_resBrcDataBuffer);
8104 }
8105
8106 return eStatus;
8107 }
8108
ReadSseStatistics(PMOS_COMMAND_BUFFER cmdBuffer)8109 MOS_STATUS CodechalVdencHevcStateG12::ReadSseStatistics(PMOS_COMMAND_BUFFER cmdBuffer)
8110 {
8111 CODECHAL_ENCODE_FUNCTION_ENTER;
8112
8113 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8114
8115 // encodeStatus is offset by 2 DWs in the resource
8116 uint32_t sseOffsetinBytes = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2 + m_encodeStatusBuf.dwSumSquareErrorOffset;
8117 for (auto i = 0; i < 6; i++) // 64 bit SSE values for luma/ chroma channels need to be copied
8118 {
8119 MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
8120 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
8121 miCpyMemMemParams.presSrc = m_hevcPicParams->tiles_enabled_flag && (m_numPipe > 1) ? &m_resHuCPakAggregatedFrameStatsBuffer.sResource : &m_resFrameStatStreamOutBuffer;
8122 miCpyMemMemParams.dwSrcOffset = (HEVC_PAK_STATISTICS_SSE_OFFSET + i) * sizeof(uint32_t); // SSE luma offset is located at DW32 in Frame statistics, followed by chroma
8123 miCpyMemMemParams.presDst = &m_encodeStatusBuf.resStatusBuffer;
8124 miCpyMemMemParams.dwDstOffset = sseOffsetinBytes + i * sizeof(uint32_t);
8125 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
8126 }
8127 return eStatus;
8128 }
8129
SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS & indObjBaseAddrParams)8130 void CodechalVdencHevcStateG12::SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS& indObjBaseAddrParams)
8131 {
8132 PCODECHAL_ENCODE_BUFFER tileRecordBuffer = &m_tileRecordBuffer[m_virtualEngineBbIndex];
8133 bool useTileRecordBuffer = !Mos_ResourceIsNull(&tileRecordBuffer->sResource);
8134
8135 MOS_ZeroMemory(&indObjBaseAddrParams, sizeof(indObjBaseAddrParams));
8136 indObjBaseAddrParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
8137 indObjBaseAddrParams.presMvObjectBuffer = &m_resMbCodeSurface;
8138 indObjBaseAddrParams.dwMvObjectOffset = m_mvOffset;
8139 indObjBaseAddrParams.dwMvObjectSize = m_mbCodeSize - m_mvOffset;
8140 indObjBaseAddrParams.presPakBaseObjectBuffer = &m_resBitstreamBuffer;
8141 indObjBaseAddrParams.dwPakBaseObjectSize = m_bitstreamUpperBound;
8142 indObjBaseAddrParams.presPakTileSizeStasBuffer = useTileRecordBuffer ? &tileRecordBuffer->sResource : nullptr;
8143 indObjBaseAddrParams.dwPakTileSizeStasBufferSize = useTileRecordBuffer ? m_hwInterface->m_tileRecordSize : 0;
8144 indObjBaseAddrParams.dwPakTileSizeRecordOffset = useTileRecordBuffer ? m_hevcTileStatsOffset.uiTileSizeRecord : 0;
8145 }
8146
HuCLookaheadInit()8147 MOS_STATUS CodechalVdencHevcStateG12::HuCLookaheadInit()
8148 {
8149 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8150
8151 CODECHAL_ENCODE_FUNCTION_ENTER;
8152
8153 m_firstTaskInPhase = !m_singleTaskPhaseSupported;
8154 m_lastTaskInPhase = !m_singleTaskPhaseSupported;
8155
8156 // set DMEM
8157 uint32_t initVbvFullness = MOS_MIN(m_hevcSeqParams->InitVBVBufferFullnessInBit, m_hevcSeqParams->VBVBufferSizeInBit);
8158 MOS_LOCK_PARAMS lockFlagsWriteOnly;
8159 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
8160 lockFlagsWriteOnly.WriteOnly = true;
8161
8162 auto dmem = (PCodechalVdencHevcLaDmem)m_osInterface->pfnLockResource(
8163 m_osInterface, &m_vdencLaInitDmemBuffer, &lockFlagsWriteOnly);
8164 CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
8165 MOS_ZeroMemory(dmem, sizeof(CodechalVdencHevcLaDmem));
8166
8167 uint8_t downscaleRatioIndicator = 2; // 4x downscaling
8168 if (m_hevcPicParams->DownScaleRatio.fields.X16Minus1_X == 15 && m_hevcPicParams->DownScaleRatio.fields.X16Minus1_Y == 15)
8169 {
8170 downscaleRatioIndicator = 0; // no downscaling
8171 }
8172
8173 dmem->lookAheadFunc = 0;
8174 dmem->lengthAhead = m_lookaheadDepth;
8175 dmem->vbvBufferSize = m_hevcSeqParams->VBVBufferSizeInBit / m_averageFrameSize;
8176 dmem->vbvInitialFullness = initVbvFullness / m_averageFrameSize;
8177 dmem->statsRecords = m_numLaDataEntry;
8178 dmem->avgFrameSizeInByte = m_averageFrameSize >> 3;
8179 dmem->downscaleRatio = downscaleRatioIndicator;
8180 dmem->enc_frame_width = m_frameWidth;
8181 dmem->enc_frame_height = m_frameHeight;
8182 dmem->codec_type = 2;
8183 dmem->mbr_ratio = (m_hevcSeqParams->TargetBitRate > 0 && m_hevcSeqParams->MaxBitRate >= m_hevcSeqParams->TargetBitRate) ?
8184 m_hevcSeqParams->MaxBitRate * 100 / m_hevcSeqParams->TargetBitRate : 100;
8185
8186 if (m_hevcSeqParams->bLookAheadPhase)
8187 {
8188 if (m_hevcSeqParams->GopRefDist == 1)
8189 {
8190 dmem->PGop = 4;
8191 }
8192 else
8193 {
8194 dmem->BGop = m_hevcSeqParams->GopRefDist;
8195 dmem->maxGop = m_hevcSeqParams->GopPicSize;
8196 }
8197
8198 dmem->GopOpt = m_hevcSeqParams->GopFlags.fields.StrictGop ? 2 : m_hevcSeqParams->GopFlags.fields.ClosedGop;
8199 dmem->AGop = m_hevcSeqParams->GopFlags.fields.AdaptiveGop;
8200 if (m_hevcSeqParams->GopFlags.fields.AdaptiveGop)
8201 {
8202 dmem->AGop_Threshold = 30;
8203 }
8204
8205 dmem->maxGop = m_hevcSeqParams->MaxAdaptiveGopPicSize;
8206 dmem->minGop = m_hevcSeqParams->MinAdaptiveGopPicSize;
8207 dmem->adaptiveIDR = (uint8_t)m_lookaheadAdaptiveI;
8208 }
8209
8210 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaInitDmemBuffer);
8211
8212 // set HuC regions
8213 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
8214 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
8215 virtualAddrParams.regionParams[0].presRegion = &m_vdencLaHistoryBuffer;
8216 virtualAddrParams.regionParams[0].isWritable = true;
8217
8218 #if USE_CODECHAL_DEBUG_TOOL && _ENCODE_VDENC_RESERVED
8219 if (m_swLaMode)
8220 {
8221 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgCallSwLookaheadImpl(
8222 m_debugInterface,
8223 m_swLaMode,
8224 CODECHAL_MEDIA_STATE_BRC_INIT_RESET,
8225 &m_vdencLaInitDmemBuffer,
8226 nullptr,
8227 &virtualAddrParams));
8228
8229 return eStatus;
8230 }
8231 #endif
8232
8233 MOS_COMMAND_BUFFER cmdBuffer;
8234 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
8235
8236 if (!m_singleTaskPhaseSupported || m_firstTaskInPhase)
8237 {
8238 // Send command buffer header at the beginning (OS dependent)
8239 bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : 0;
8240 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
8241 }
8242
8243 // load kernel from WOPCM into L2 storage RAM
8244 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
8245 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
8246 imemParams.dwKernelDescriptor = m_vdboxHucHevcLaAnalysisKernelDescriptor;
8247
8248 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
8249
8250 // pipe mode select
8251 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
8252 pipeModeSelectParams.Mode = m_mode;
8253 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
8254
8255 // set HuC DMEM param
8256 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
8257 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
8258 dmemParams.presHucDataSource = &m_vdencLaInitDmemBuffer;
8259 dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencLaInitDmemBufferSize, CODECHAL_CACHELINE_SIZE);
8260 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
8261 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
8262 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
8263 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
8264 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
8265
8266 // wait Huc completion (use HEVC bit for now)
8267 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
8268 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
8269 vdPipeFlushParams.Flags.bFlushHEVC = 1;
8270 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
8271 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
8272
8273 // Flush the engine to ensure memory written out
8274 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
8275 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
8276 flushDwParams.bVideoPipelineCacheInvalidate = true;
8277 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
8278
8279 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
8280 {
8281 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
8282 }
8283
8284 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
8285
8286 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
8287 {
8288 bool renderingFlags = m_videoContextUsesNullHw;
8289
8290 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
8291 }
8292
8293 return eStatus;
8294 }
8295
HuCLookaheadUpdate()8296 MOS_STATUS CodechalVdencHevcStateG12::HuCLookaheadUpdate()
8297 {
8298 uint8_t currentPass = (uint8_t)GetCurrentPass();
8299 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8300
8301 CODECHAL_ENCODE_FUNCTION_ENTER;
8302
8303 m_firstTaskInPhase = !m_singleTaskPhaseSupported;
8304 m_lastTaskInPhase = (currentPass == m_numPasses);
8305
8306 // set DMEM
8307 MOS_LOCK_PARAMS lockFlagsWriteOnly;
8308 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
8309 lockFlagsWriteOnly.WriteOnly = true;
8310
8311 auto dmem = (PCodechalVdencHevcLaDmem)m_osInterface->pfnLockResource(
8312 m_osInterface, &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx][currentPass], &lockFlagsWriteOnly);
8313 CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
8314 MOS_ZeroMemory(dmem, sizeof(CodechalVdencHevcLaDmem));
8315
8316 dmem->lookAheadFunc = 1;
8317 dmem->validStatsRecords = m_numValidLaRecords;
8318 dmem->offset = (m_numLaDataEntry + m_currLaDataIdx + 1 - m_numValidLaRecords) % m_numLaDataEntry;
8319 dmem->cqmQpThreshold = m_cqmQpThreshold;
8320 dmem->currentPass = currentPass;
8321
8322 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx][currentPass]);
8323
8324 // set HuC regions
8325 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
8326 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
8327 virtualAddrParams.regionParams[0].presRegion = &m_vdencLaHistoryBuffer;
8328 virtualAddrParams.regionParams[0].isWritable = true;
8329 virtualAddrParams.regionParams[1].presRegion = &m_vdencLaStatsBuffer;
8330 virtualAddrParams.regionParams[2].presRegion = &m_vdencLaDataBuffer;
8331 virtualAddrParams.regionParams[2].isWritable = true;
8332
8333 #if USE_CODECHAL_DEBUG_TOOL && _ENCODE_VDENC_RESERVED
8334 if (m_swLaMode)
8335 {
8336 bool isLaAnalysisRequired = true;
8337 MOS_LOCK_PARAMS lockFlags;
8338 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
8339 lockFlags.ReadOnly = true;
8340
8341 if (!IsFirstPass())
8342 {
8343 uint32_t *data = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &m_resPakMmioBuffer, &lockFlags);
8344 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8345 isLaAnalysisRequired = (*data == CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK);
8346 m_osInterface->pfnUnlockResource(m_osInterface, &m_resPakMmioBuffer);
8347 }
8348
8349 if (isLaAnalysisRequired)
8350 {
8351 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgCallSwLookaheadImpl(
8352 m_debugInterface,
8353 m_swLaMode,
8354 CODECHAL_MEDIA_STATE_BRC_UPDATE,
8355 &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx][currentPass],
8356 &m_resPakMmioBuffer,
8357 &virtualAddrParams));
8358
8359 EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
8360 uint32_t baseOffset = (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize);
8361
8362 CodechalVdencHevcLaData *data = (CodechalVdencHevcLaData *)m_osInterface->pfnLockResource(m_osInterface, &m_vdencLaDataBuffer, &lockFlags);
8363 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8364
8365 LookaheadReport *lookaheadStatus = (LookaheadReport *)(encodeStatusBuf.pEncodeStatus + baseOffset + encodeStatusBuf.dwLookaheadStatusOffset);
8366 lookaheadStatus->targetFrameSize = data[dmem->offset].targetFrameSize;
8367 lookaheadStatus->targetBufferFulness = data[dmem->offset].targetBufferFulness;
8368 lookaheadStatus->encodeHints = data[dmem->offset].encodeHints;
8369 lookaheadStatus->pyramidDeltaQP = data[dmem->offset].pyramidDeltaQP;
8370
8371 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaDataBuffer);
8372 }
8373
8374 return eStatus;
8375 }
8376 #endif
8377
8378 MOS_COMMAND_BUFFER cmdBuffer;
8379 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
8380
8381 if (!m_singleTaskPhaseSupported || m_firstTaskInPhase)
8382 {
8383 // Send command buffer header at the beginning (OS dependent)
8384 bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
8385 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
8386 }
8387
8388 if (!IsFirstPass() && m_firstTaskInPhase)
8389 {
8390 // VDENC uses HuC FW generated semaphore for conditional 2nd pass
8391 MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
8392 MOS_ZeroMemory(
8393 &miConditionalBatchBufferEndParams,
8394 sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
8395 miConditionalBatchBufferEndParams.presSemaphoreBuffer =
8396 &m_resPakMmioBuffer;
8397 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
8398 &cmdBuffer,
8399 &miConditionalBatchBufferEndParams));
8400 }
8401
8402 CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
8403
8404 // load kernel from WOPCM into L2 storage RAM
8405 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
8406 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
8407 imemParams.dwKernelDescriptor = m_vdboxHucHevcLaAnalysisKernelDescriptor;
8408
8409 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
8410
8411 // pipe mode select
8412 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
8413 pipeModeSelectParams.Mode = m_mode;
8414 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
8415
8416 // set HuC DMEM param
8417 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
8418 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
8419 dmemParams.presHucDataSource = &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx][currentPass];
8420 dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencLaUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
8421 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
8422 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
8423 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
8424 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
8425 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
8426
8427 // wait Huc completion (use HEVC bit for now)
8428 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
8429 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
8430 vdPipeFlushParams.Flags.bFlushHEVC = 1;
8431 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
8432 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
8433
8434 // Flush the engine to ensure memory written out
8435 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
8436 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
8437 flushDwParams.bVideoPipelineCacheInvalidate = true;
8438 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
8439
8440 if (IsFirstPass())
8441 {
8442 // Write HUC_STATUS mask: DW1 (mask value)
8443 MHW_MI_STORE_DATA_PARAMS storeDataParams;
8444 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
8445 storeDataParams.pOsResource = &m_resPakMmioBuffer;
8446 storeDataParams.dwResourceOffset = sizeof(uint32_t);
8447 storeDataParams.dwValue = CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK;
8448 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
8449
8450 // store HUC_STATUS register: DW0 (actual value)
8451 CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum");
8452 auto mmioRegisters = m_hucInterface->GetMmioRegisters(m_vdboxIndex);
8453 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
8454 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
8455 storeRegParams.presStoreBuffer = &m_resPakMmioBuffer;
8456 storeRegParams.dwOffset = 0;
8457 storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
8458 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
8459 }
8460
8461 // Write lookahead status to encode status buffer
8462 MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
8463 EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
8464 uint32_t baseOffset =
8465 (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource
8466 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS));
8467 miCpyMemMemParams.presSrc = &m_vdencLaDataBuffer;
8468 miCpyMemMemParams.dwSrcOffset = dmem->offset * sizeof(CodechalVdencHevcLaData) + CODECHAL_OFFSETOF(CodechalVdencHevcLaData, encodeHints);
8469 miCpyMemMemParams.presDst = &encodeStatusBuf.resStatusBuffer;
8470 miCpyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf.dwLookaheadStatusOffset + CODECHAL_OFFSETOF(LookaheadReport, encodeHints);
8471 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
8472 miCpyMemMemParams.dwSrcOffset = dmem->offset * sizeof(CodechalVdencHevcLaData) + CODECHAL_OFFSETOF(CodechalVdencHevcLaData, targetFrameSize);
8473 miCpyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf.dwLookaheadStatusOffset + CODECHAL_OFFSETOF(LookaheadReport, targetFrameSize);
8474 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
8475 miCpyMemMemParams.dwSrcOffset = dmem->offset * sizeof(CodechalVdencHevcLaData) + CODECHAL_OFFSETOF(CodechalVdencHevcLaData, targetBufferFulness);
8476 miCpyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf.dwLookaheadStatusOffset + CODECHAL_OFFSETOF(LookaheadReport, targetBufferFulness);
8477 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
8478 miCpyMemMemParams.dwSrcOffset = dmem->offset * sizeof(CodechalVdencHevcLaData) + CODECHAL_OFFSETOF(CodechalVdencHevcLaData, pyramidDeltaQP);
8479 miCpyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf.dwLookaheadStatusOffset + CODECHAL_OFFSETOF(LookaheadReport, pyramidDeltaQP);
8480 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams));
8481
8482 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
8483 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
8484
8485 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
8486
8487 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
8488 {
8489 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
8490 }
8491
8492 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
8493
8494 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
8495 {
8496 bool renderingFlags = m_videoContextUsesNullHw;
8497 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
8498 }
8499
8500 return eStatus;
8501 }
8502
AnalyzeLookaheadStats()8503 MOS_STATUS CodechalVdencHevcStateG12::AnalyzeLookaheadStats()
8504 {
8505 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8506
8507 CODECHAL_ENCODE_FUNCTION_ENTER;
8508
8509 if(IsFirstPass())
8510 {
8511 m_numValidLaRecords++;
8512 }
8513
8514 if (m_lookaheadInit)
8515 {
8516 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCLookaheadInit());
8517 m_lookaheadInit = false;
8518 }
8519
8520 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCLookaheadUpdate());
8521 if (IsLastPass() && (m_numValidLaRecords >= m_lookaheadDepth))
8522 {
8523 m_numValidLaRecords--;
8524 m_lookaheadReport = true;
8525 }
8526
8527 int32_t currentPass = GetCurrentPass();
8528 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
8529 &m_vdencLaUpdateDmemBuffer[m_currRecycledBufIdx][currentPass],
8530 CodechalDbgAttr::attrVdencOutput,
8531 "_LookaheadDmem",
8532 sizeof(CodechalVdencHevcLaDmem),
8533 0,
8534 CODECHAL_NUM_MEDIA_STATES)));
8535
8536 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
8537 &m_vdencLaDataBuffer,
8538 CodechalDbgAttr::attrVdencOutput,
8539 "_LookaheadData",
8540 m_brcLooaheadDataBufferSize,
8541 0,
8542 CODECHAL_NUM_MEDIA_STATES)));
8543
8544 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
8545 &m_vdencLaHistoryBuffer,
8546 CodechalDbgAttr::attrVdencOutput,
8547 "_LookaheadHistory",
8548 m_LaHistoryBufSize,
8549 0,
8550 CODECHAL_NUM_MEDIA_STATES)));
8551
8552 if (m_hevcPicParams->bLastPicInStream)
8553 {
8554 // Flush the last frames
8555 while (m_numValidLaRecords > 0)
8556 {
8557 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCLookaheadUpdate());
8558 m_numValidLaRecords--;
8559 }
8560 }
8561
8562 return eStatus;
8563 }
8564
HuCBrcInitReset()8565 MOS_STATUS CodechalVdencHevcStateG12::HuCBrcInitReset()
8566 {
8567 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8568
8569 CODECHAL_ENCODE_FUNCTION_ENTER;
8570
8571 #if (_DEBUG || _RELEASE_INTERNAL) && _ENCODE_VDENC_RESERVED
8572 if (m_swBrcMode != nullptr && !m_enableTileReplay && !m_hevcVdencWeightedPredEnabled)
8573 {
8574 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
8575
8576 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
8577 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
8578 virtualAddrParams.regionParams[0].presRegion = &m_vdencBrcHistoryBuffer;
8579 virtualAddrParams.regionParams[0].isWritable = true;
8580
8581 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgCallHevcVdencSwBrcImpl(
8582 m_debugInterface,
8583 m_swBrcMode,
8584 CODECHAL_MEDIA_STATE_BRC_INIT_RESET,
8585 !m_brcInit,
8586 &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx],
8587 &m_resPakMmioBuffer,
8588 &virtualAddrParams));
8589
8590 CODECHAL_DEBUG_TOOL(DumpHucBrcInit());
8591
8592 return eStatus;
8593 }
8594 #endif
8595
8596 MOS_COMMAND_BUFFER cmdBuffer;
8597 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
8598
8599 if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && (m_numPipe == 1))
8600 {
8601 // Send command buffer header at the beginning (OS dependent)
8602 bool requestFrameTracking = m_singleTaskPhaseSupported ?
8603 m_firstTaskInPhase : 0;
8604 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
8605 }
8606
8607 // load kernel from WOPCM into L2 storage RAM
8608 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
8609 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
8610 imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcInitKernelDescriptor;
8611
8612 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
8613
8614 // pipe mode select
8615 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
8616 pipeModeSelectParams.Mode = m_mode;
8617 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
8618
8619 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
8620
8621 // set HuC DMEM param
8622 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
8623 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
8624 dmemParams.presHucDataSource = &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx];
8625 dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcInitDmemBufferSize, CODECHAL_CACHELINE_SIZE);
8626 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
8627 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
8628
8629 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
8630 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
8631 virtualAddrParams.regionParams[0].presRegion = &m_vdencBrcHistoryBuffer;
8632 virtualAddrParams.regionParams[0].isWritable = true;
8633 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
8634
8635 // Store HUC_STATUS2 register bit 6 before HUC_Start command
8636 // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
8637 // (HUC_Start command with last start bit set).
8638 CODECHAL_DEBUG_TOOL(
8639 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
8640 )
8641
8642 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
8643
8644 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
8645
8646 // wait Huc completion (use HEVC bit for now)
8647 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
8648 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
8649 vdPipeFlushParams.Flags.bFlushHEVC = 1;
8650 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
8651 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
8652
8653 // Flush the engine to ensure memory written out
8654 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
8655 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
8656 flushDwParams.bVideoPipelineCacheInvalidate = true;
8657 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
8658
8659 CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum");
8660 auto mmioRegisters = m_hucInterface->GetMmioRegisters(m_vdboxIndex);
8661 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, &cmdBuffer, true));
8662 CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(&cmdBuffer));
8663
8664 if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd) && (m_numPipe == 1))
8665 {
8666 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
8667 }
8668
8669 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
8670
8671 if (!m_singleTaskPhaseSupported)
8672 {
8673 bool renderingFlags = m_videoContextUsesNullHw;
8674
8675 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
8676 &cmdBuffer,
8677 CODECHAL_MEDIA_STATE_BRC_INIT_RESET,
8678 nullptr)));
8679 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
8680 }
8681
8682 CODECHAL_DEBUG_TOOL(DumpHucBrcInit());
8683 return eStatus;
8684 }
8685
HuCBrcUpdate()8686 MOS_STATUS CodechalVdencHevcStateG12::HuCBrcUpdate()
8687 {
8688 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8689
8690 CODECHAL_ENCODE_FUNCTION_ENTER;
8691
8692 *m_pipeBufAddrParams = {};
8693 if (m_pictureCodingType != I_TYPE)
8694 {
8695 for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
8696 {
8697 if (!m_picIdx[i].bValid || !m_currUsedRefPic[i])
8698 {
8699 continue;
8700 }
8701
8702 uint8_t idx = m_picIdx[i].ucPicIdx;
8703 CodecHalGetResourceInfo(m_osInterface, &(m_refList[idx]->sRefReconBuffer));
8704
8705 uint8_t frameStoreId = (uint8_t)m_refIdxMapping[i];
8706 m_pipeBufAddrParams->presReferences[frameStoreId] = &(m_refList[idx]->sRefReconBuffer.OsResource);
8707 }
8708 }
8709
8710 if (m_enableSCC && m_hevcPicParams->pps_curr_pic_ref_enabled_flag)
8711 {
8712 // I frame is much simpler
8713 if (m_pictureCodingType == I_TYPE)
8714 {
8715 m_slotForRecNotFiltered = 0;
8716 }
8717 // LDB
8718 else
8719 {
8720 unsigned int i;
8721
8722 // Find one available slot
8723 for (i = 0; i < CODECHAL_MAX_CUR_NUM_REF_FRAME_HEVC; i++)
8724 {
8725 if (m_pipeBufAddrParams->presReferences[i] == nullptr)
8726 {
8727 break;
8728 }
8729 }
8730
8731 CODECHAL_ENCODE_ASSERT(i < CODECHAL_MAX_CUR_NUM_REF_FRAME_HEVC);
8732
8733 //record the slot for HCP_REF_IDX_STATE
8734 m_slotForRecNotFiltered = (unsigned char)i;
8735 }
8736 }
8737
8738 int32_t currentPass = GetCurrentPass();
8739 if (currentPass < 0)
8740 {
8741 eStatus = MOS_STATUS_INVALID_PARAMETER;
8742 return eStatus;
8743 }
8744
8745 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructBatchBufferHuCBRC(&m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass]));
8746
8747 #if (_DEBUG || _RELEASE_INTERNAL) && _ENCODE_VDENC_RESERVED
8748 if (m_swBrcMode != nullptr && !m_enableTileReplay && !m_hevcVdencWeightedPredEnabled)
8749 {
8750 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
8751 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetConstDataHuCBrcUpdate());
8752 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCBrcUpdate(&m_virtualAddrParams));
8753
8754 CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(true));
8755
8756 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgCallHevcVdencSwBrcImpl(
8757 m_debugInterface,
8758 m_swBrcMode,
8759 CODECHAL_MEDIA_STATE_BRC_UPDATE,
8760 (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW),
8761 &m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][m_currPass],
8762 &m_resPakMmioBuffer,
8763 &m_virtualAddrParams));
8764
8765 CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(false));
8766
8767 return eStatus;
8768 }
8769 #endif
8770
8771 MOS_COMMAND_BUFFER cmdBuffer;
8772 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
8773
8774 if (((!m_singleTaskPhaseSupported) || ((m_firstTaskInPhase) && (!m_brcInit))) && (m_numPipe == 1))
8775 {
8776 // Send command buffer header at the beginning (OS dependent)
8777 bool requestFrameTracking = m_singleTaskPhaseSupported ?
8778 m_firstTaskInPhase : 0;
8779 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
8780 }
8781
8782 // load kernel from WOPCM into L2 storage RAM
8783 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
8784 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
8785
8786 if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW) // Low Delay BRC
8787 {
8788 imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcLowdelayKernelDescriptor;
8789 }
8790 else
8791 {
8792 imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcUpdateKernelDescriptor;
8793 }
8794
8795 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
8796
8797 // pipe mode select
8798 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
8799 pipeModeSelectParams.Mode = m_mode;
8800 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
8801
8802 // DMEM set
8803 m_CurrentPassForOverAll = 0;
8804 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
8805
8806 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
8807 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
8808 dmemParams.presHucDataSource = &(m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][currentPass]);
8809 dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
8810 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
8811
8812 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
8813
8814 // Set Const Data buffer
8815 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetConstDataHuCBrcUpdate());
8816
8817 // Add Virtual addr
8818 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCBrcUpdate(&m_virtualAddrParams));
8819 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &m_virtualAddrParams));
8820
8821 // Store HUC_STATUS2 register bit 6 before HUC_Start command
8822 // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
8823 // (HUC_Start command with last start bit set).
8824 CODECHAL_DEBUG_TOOL(
8825 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
8826 )
8827
8828 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
8829
8830 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
8831
8832 // wait Huc completion (use HEVC bit for now)
8833 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
8834 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
8835 vdPipeFlushParams.Flags.bFlushHEVC = 1;
8836 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
8837 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
8838
8839 // Flush the engine to ensure memory written out
8840 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
8841 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
8842 flushDwParams.bVideoPipelineCacheInvalidate = true;
8843 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
8844
8845 // Write HUC_STATUS mask: DW1 (mask value)
8846 MHW_MI_STORE_DATA_PARAMS storeDataParams;
8847 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
8848 storeDataParams.pOsResource = &m_resPakMmioBuffer;
8849 storeDataParams.dwResourceOffset = sizeof(uint32_t);
8850 storeDataParams.dwValue = CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK;
8851 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
8852
8853 // store HUC_STATUS register: DW0 (actual value)
8854 CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum");
8855 auto mmioRegisters = m_hucInterface->GetMmioRegisters(m_vdboxIndex);
8856 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
8857 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
8858 storeRegParams.presStoreBuffer = &m_resPakMmioBuffer;
8859 storeRegParams.dwOffset = 0;
8860 storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
8861 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
8862
8863 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, &cmdBuffer, true));
8864 CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(&cmdBuffer));
8865
8866 // DW0 & DW1 will considered together for conditional batch buffer end cmd later
8867 if ((!m_singleTaskPhaseSupported) && (m_osInterface->bNoParsingAssistanceInKmd) && (m_numPipe == 1))
8868 {
8869 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
8870 }
8871
8872 // HuC Input
8873 CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(true));
8874
8875 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
8876
8877 if (!m_singleTaskPhaseSupported)
8878 {
8879 bool renderingFlags = m_videoContextUsesNullHw;
8880
8881 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
8882 &cmdBuffer,
8883 CODECHAL_MEDIA_STATE_BRC_UPDATE,
8884 nullptr)));
8885 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
8886 }
8887
8888 // HuC Output
8889 CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(false));
8890
8891 return eStatus;
8892 }
8893
HuCBrcTileRowUpdate(PMOS_COMMAND_BUFFER cmdBuffer)8894 MOS_STATUS CodechalVdencHevcStateG12::HuCBrcTileRowUpdate(PMOS_COMMAND_BUFFER cmdBuffer)
8895 {
8896 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8897
8898 CODECHAL_ENCODE_FUNCTION_ENTER;
8899
8900 MOS_LOCK_PARAMS lockFlags;
8901 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
8902 lockFlags.WriteOnly = true;
8903
8904 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow].OsResource), &lockFlags);
8905 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
8906
8907 MOS_COMMAND_BUFFER tileRowBRCBatchBuf;
8908 MOS_ZeroMemory(&tileRowBRCBatchBuf, sizeof(tileRowBRCBatchBuf));
8909 tileRowBRCBatchBuf.pCmdBase = tileRowBRCBatchBuf.pCmdPtr = (uint32_t *)data;
8910 tileRowBRCBatchBuf.iRemaining = m_hwInterface->m_hucCommandBufferSize;
8911
8912 // Add batch buffer start for tile row BRC batch
8913 HalOcaInterface::OnSubLevelBBStart(*cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, &m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow].OsResource, 0, true, 0);
8914 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(cmdBuffer, &m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow]));
8915
8916 // load kernel from WOPCM into L2 storage RAM
8917 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
8918 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
8919
8920 if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW) // Low Delay BRC
8921 {
8922 imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcLowdelayKernelDescriptor;
8923 }
8924 else
8925 {
8926 imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcUpdateKernelDescriptor;
8927 }
8928
8929 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&tileRowBRCBatchBuf, &imemParams));
8930
8931 // pipe mode select
8932 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
8933 pipeModeSelectParams.Mode = m_mode;
8934 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&tileRowBRCBatchBuf, &pipeModeSelectParams));
8935
8936 // DMEM set
8937 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
8938
8939 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
8940 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
8941 dmemParams.presHucDataSource = &(m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][m_CurrentPassForOverAll]);
8942 dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
8943 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
8944
8945 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&tileRowBRCBatchBuf, &dmemParams));
8946
8947 // Set Const Data buffer
8948 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetConstDataHuCBrcUpdate());
8949
8950 // Add Virtual addr
8951 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
8952 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCTileRowBrcUpdate(&virtualAddrParams));
8953 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&tileRowBRCBatchBuf, &virtualAddrParams));
8954
8955 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(cmdBuffer));
8956
8957 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&tileRowBRCBatchBuf, true));
8958
8959 // wait Huc completion (use HEVC bit for now)
8960 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
8961 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
8962 vdPipeFlushParams.Flags.bFlushHEVC = 1;
8963 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
8964 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&tileRowBRCBatchBuf, &vdPipeFlushParams));
8965
8966 // Flush the engine to ensure memory written out
8967 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
8968 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
8969 flushDwParams.bVideoPipelineCacheInvalidate = true;
8970 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&tileRowBRCBatchBuf, &flushDwParams));
8971
8972 // Write HUC_STATUS mask: DW1 (mask value)
8973 MHW_MI_STORE_DATA_PARAMS storeDataParams;
8974 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
8975 storeDataParams.pOsResource = &m_resPakMmioBuffer;
8976 storeDataParams.dwResourceOffset = sizeof(uint32_t);
8977 storeDataParams.dwValue = CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK;
8978 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&tileRowBRCBatchBuf, &storeDataParams));
8979
8980 // store HUC_STATUS register: DW0 (actual value)
8981 CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum");
8982 auto mmioRegisters = m_hucInterface->GetMmioRegisters(m_vdboxIndex);
8983 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
8984 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
8985 storeRegParams.presStoreBuffer = &m_resPakMmioBuffer;
8986 storeRegParams.dwOffset = 0;
8987 storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
8988 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&tileRowBRCBatchBuf, &storeRegParams));
8989
8990 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, &tileRowBRCBatchBuf, true));
8991 CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(&tileRowBRCBatchBuf));
8992
8993 // Set the tile row BRC update sync semaphore
8994 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
8995 storeDataParams.pOsResource = &m_resTileRowBRCsyncSemaphore;
8996 storeDataParams.dwValue = 0xFF;
8997 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&tileRowBRCBatchBuf, &storeDataParams));
8998
8999 (&m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow])->iCurrent = tileRowBRCBatchBuf.iOffset;
9000 (&m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow])->iRemaining = tileRowBRCBatchBuf.iRemaining;
9001 (&m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow])->pData = data;
9002 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(nullptr, &m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow]));
9003
9004 if (data)
9005 {
9006 m_osInterface->pfnUnlockResource(m_osInterface, &(m_TileRowBRCBatchBuffer[m_CurrentPassForTileReplay][m_CurrentTileRow].OsResource));
9007 }
9008
9009 return eStatus;
9010 }
9011
UpdateCmdBufAttribute(PMOS_COMMAND_BUFFER cmdBuffer,bool renderEngineInUse)9012 MOS_STATUS CodechalVdencHevcStateG12::UpdateCmdBufAttribute(
9013 PMOS_COMMAND_BUFFER cmdBuffer,
9014 bool renderEngineInUse)
9015 {
9016 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9017
9018 // should not be there. Will remove it in the next change
9019 CODECHAL_ENCODE_FUNCTION_ENTER;
9020 if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe)
9021 {
9022 PMOS_CMD_BUF_ATTRI_VE attriExt =
9023 (PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe);
9024
9025 memset(attriExt, 0, sizeof(MOS_CMD_BUF_ATTRI_VE));
9026 attriExt->bUseVirtualEngineHint =
9027 attriExt->VEngineHintParams.NeedSyncWithPrevious = !renderEngineInUse;
9028 }
9029
9030 return eStatus;
9031 }
9032
AddMediaVfeCmd(PMOS_COMMAND_BUFFER cmdBuffer,SendKernelCmdsParams * params)9033 MOS_STATUS CodechalVdencHevcStateG12::AddMediaVfeCmd(
9034 PMOS_COMMAND_BUFFER cmdBuffer,
9035 SendKernelCmdsParams *params)
9036 {
9037 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
9038
9039 MHW_VFE_PARAMS_G12 vfeParams = {};
9040 vfeParams.pKernelState = params->pKernelState;
9041 vfeParams.eVfeSliceDisable = MHW_VFE_SLICE_ALL;
9042 vfeParams.dwMaximumNumberofThreads = m_encodeVfeMaxThreads;
9043 vfeParams.bFusedEuDispatch = false; // legacy mode
9044
9045 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaVfeCmd(cmdBuffer, &vfeParams));
9046
9047 return MOS_STATUS_SUCCESS;
9048 }
9049
SetStreaminDataPerLcu(PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,void * streaminData)9050 void CodechalVdencHevcStateG12::SetStreaminDataPerLcu(
9051 PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,
9052 void* streaminData)
9053 {
9054 CODECHAL_ENCODE_FUNCTION_ENTER;
9055 PCODECHAL_VDENC_HEVC_STREAMIN_STATE_G12 data = (PCODECHAL_VDENC_HEVC_STREAMIN_STATE_G12)streaminData;
9056 if (streaminParams->setQpRoiCtrl)
9057 {
9058 if (m_vdencNativeROIEnabled || m_brcAdaptiveRegionBoostEnable)
9059 {
9060 data->DW0.RoiCtrl = streaminParams->roiCtrl;
9061 }
9062 else
9063 {
9064 data->DW7.QpEnable = 0xf;
9065 data->DW14.ForceQp_0 = streaminParams->forceQp[0];
9066 data->DW14.ForceQp_1 = streaminParams->forceQp[1];
9067 data->DW14.ForceQp_2 = streaminParams->forceQp[2];
9068 data->DW14.ForceQp_3 = streaminParams->forceQp[3];
9069 }
9070 }
9071 else
9072 {
9073 data->DW0.MaxTuSize = streaminParams->maxTuSize;
9074 data->DW0.MaxCuSize = streaminParams->maxCuSize;
9075 data->DW0.NumImePredictors = streaminParams->numImePredictors;
9076 data->DW0.PuTypeCtrl = streaminParams->puTypeCtrl;
9077 data->DW6.NumMergeCandidateCu64x64 = streaminParams->numMergeCandidateCu64x64;
9078 data->DW6.NumMergeCandidateCu32x32 = streaminParams->numMergeCandidateCu32x32;
9079 data->DW6.NumMergeCandidateCu16x16 = streaminParams->numMergeCandidateCu16x16;
9080 data->DW6.NumMergeCandidateCu8x8 = streaminParams->numMergeCandidateCu8x8;
9081 }
9082 }
9083
GetTileInfo(uint32_t xPosition,uint32_t yPosition,uint32_t * tileId,uint32_t * tileEndLCUX,uint32_t * tileEndLCUY)9084 void CodechalVdencHevcStateG12::GetTileInfo(
9085 uint32_t xPosition,
9086 uint32_t yPosition,
9087 uint32_t* tileId,
9088 uint32_t* tileEndLCUX,
9089 uint32_t* tileEndLCUY)
9090 {
9091 *tileId = 0;
9092 uint32_t ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
9093 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
9094
9095 for (uint8_t i = 0; i < m_numTiles; i++)
9096 {
9097 uint32_t tileWidthInLCU = MOS_ROUNDUP_DIVIDE(((tileParams[i].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
9098 uint32_t tileHeightInLCU = MOS_ROUNDUP_DIVIDE(((tileParams[i].TileHeightInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
9099 *tileEndLCUX = tileParams[i].TileStartLCUX + tileWidthInLCU;
9100 *tileEndLCUY = tileParams[i].TileStartLCUY + tileHeightInLCU;
9101
9102 if (xPosition >= (tileParams[i].TileStartLCUX * 2) &&
9103 yPosition >= (tileParams[i].TileStartLCUY * 2) &&
9104 xPosition < (*tileEndLCUX * 2) &&
9105 yPosition < (*tileEndLCUY * 2))
9106 {
9107 *tileId = i;
9108 break;
9109 }
9110 }
9111 }
9112
PrepareVDEncStreamInData()9113 MOS_STATUS CodechalVdencHevcStateG12::PrepareVDEncStreamInData()
9114 {
9115 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9116
9117 CODECHAL_ENCODE_FUNCTION_ENTER;
9118
9119 if (m_lookaheadPass && m_firstFrame)
9120 {
9121 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupForceIntraStreamIn(&m_resVdencStreamInBuffer[0]));
9122 }
9123
9124 if (m_hevcPicParams->tiles_enabled_flag)
9125 {
9126 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData(m_tileParams[m_virtualEngineBbIndex]));
9127 }
9128 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::PrepareVDEncStreamInData());
9129
9130 return eStatus;
9131 }
9132
SetStreaminDataPerRegion(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,void * streaminData)9133 void CodechalVdencHevcStateG12::SetStreaminDataPerRegion(
9134 uint32_t streamInWidth,
9135 uint32_t top,
9136 uint32_t bottom,
9137 uint32_t left,
9138 uint32_t right,
9139 PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,
9140 void* streaminData)
9141 {
9142 CODECHAL_ENCODE_FUNCTION_ENTER;
9143
9144 if (!m_hevcPicParams->tiles_enabled_flag)
9145 {
9146 CodechalVdencHevcState::SetStreaminDataPerRegion(streamInWidth, top, bottom, left, right, streaminParams, streaminData);
9147 return;
9148 }
9149
9150 uint8_t* data = (uint8_t*)streaminData;
9151 uint32_t tileId = 0, tileEndLCUX = 0, tileEndLCUY = 0;
9152 uint32_t ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
9153 GetTileInfo(left, top, &tileId, &tileEndLCUX, &tileEndLCUY);
9154
9155 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
9156
9157 for (auto y = top; y < bottom; y++)
9158 {
9159 for (auto x = left; x < right; x++)
9160 {
9161 uint32_t streamInBaseOffset = 0, offset = 0, xyOffset = 0;
9162
9163 if (x < (tileParams[tileId].TileStartLCUX * 2) ||
9164 y < (tileParams[tileId].TileStartLCUY * 2) ||
9165 x >= (tileEndLCUX * 2) ||
9166 y >= (tileEndLCUY * 2))
9167 {
9168 GetTileInfo(x, y, &tileId, &tileEndLCUX, &tileEndLCUY);
9169 }
9170 streamInBaseOffset = tileParams[tileId].TileStreaminOffset;
9171
9172 auto xPositionInTile = x - (tileParams[tileId].TileStartLCUX * 2);
9173 auto yPositionInTile = y - (tileParams[tileId].TileStartLCUY * 2);
9174 auto tileWidthInLCU = MOS_ROUNDUP_DIVIDE(((tileParams[tileId].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
9175
9176 StreaminZigZagToLinearMap(tileWidthInLCU * 2, xPositionInTile, yPositionInTile, &offset, &xyOffset);
9177
9178 SetStreaminDataPerLcu(streaminParams, data + (streamInBaseOffset + offset + xyOffset) * 64);
9179 }
9180 }
9181 }
9182
SetBrcRoiDeltaQpMap(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,uint8_t regionId,PDeltaQpForROI deltaQpMap)9183 void CodechalVdencHevcStateG12::SetBrcRoiDeltaQpMap(
9184 uint32_t streamInWidth,
9185 uint32_t top,
9186 uint32_t bottom,
9187 uint32_t left,
9188 uint32_t right,
9189 uint8_t regionId,
9190 PDeltaQpForROI deltaQpMap)
9191 {
9192
9193 CODECHAL_ENCODE_FUNCTION_ENTER;
9194
9195 if (!m_hevcPicParams->tiles_enabled_flag)
9196 {
9197 CodechalVdencHevcState::SetBrcRoiDeltaQpMap(streamInWidth, top, bottom, left, right, regionId, deltaQpMap);
9198 return;
9199 }
9200
9201 uint32_t tileId = 0, tileEndLCUX = 0, tileEndLCUY = 0;
9202 uint32_t ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
9203 GetTileInfo(left, top, &tileId, &tileEndLCUX, &tileEndLCUY);
9204
9205 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
9206
9207 for (auto y = top; y < bottom; y++)
9208 {
9209 for (auto x = left; x < right; x++)
9210 {
9211 uint32_t streamInBaseOffset = 0, offset = 0, xyOffset = 0;
9212
9213 if (x < (tileParams[tileId].TileStartLCUX * 2) ||
9214 y < (tileParams[tileId].TileStartLCUY * 2) ||
9215 x >= (tileEndLCUX * 2) ||
9216 y >= (tileEndLCUY * 2))
9217 {
9218 GetTileInfo(x, y, &tileId, &tileEndLCUX, &tileEndLCUY);
9219 }
9220 streamInBaseOffset = tileParams[tileId].TileStreaminOffset;
9221
9222 auto xPositionInTile = x - (tileParams[tileId].TileStartLCUX * 2);
9223 auto yPositionInTile = y - (tileParams[tileId].TileStartLCUY * 2);
9224 auto tileWidthInLCU = MOS_ROUNDUP_DIVIDE(((tileParams[tileId].TileWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3)), ctbSize);
9225
9226 StreaminZigZagToLinearMap(tileWidthInLCU * 2, xPositionInTile, yPositionInTile, &offset, &xyOffset);
9227
9228 (deltaQpMap + (streamInBaseOffset + offset + xyOffset))->iDeltaQp = m_hevcPicParams->ROI[regionId].PriorityLevelOrDQp;
9229 }
9230 }
9231 }
9232
SetAndPopulateVEHintParams(PMOS_COMMAND_BUFFER cmdBuffer)9233 MOS_STATUS CodechalVdencHevcStateG12::SetAndPopulateVEHintParams(
9234 PMOS_COMMAND_BUFFER cmdBuffer)
9235 {
9236 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9237
9238 CODECHAL_ENCODE_FUNCTION_ENTER;
9239
9240 if (!MOS_VE_SUPPORTED(m_osInterface))
9241 {
9242 return eStatus;
9243 }
9244
9245 CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS scalSetParms;
9246 MOS_ZeroMemory(&scalSetParms, sizeof(CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS));
9247
9248 if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
9249 {
9250 scalSetParms.bNeedSyncWithPrevious = true;
9251 }
9252
9253 int32_t currentPass = GetCurrentPass();
9254 if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES)
9255 {
9256 eStatus = MOS_STATUS_INVALID_PARAMETER;
9257 return eStatus;
9258 }
9259 uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
9260 if (m_numPipe >= 2)
9261 {
9262 for (auto i = 0; i < m_numPipe; i++)
9263 {
9264 scalSetParms.veBatchBuffer[i] = m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex].OsResource;
9265 }
9266 }
9267
9268 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_SetHintParams(this, m_scalabilityState, &scalSetParms));
9269 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
9270 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_PopulateHintParams(m_scalabilityState, cmdBuffer));
9271
9272 return eStatus;
9273 }
9274
9275 #if USE_CODECHAL_DEBUG_TOOL
DumpVdencOutputs()9276 MOS_STATUS CodechalVdencHevcStateG12::DumpVdencOutputs()
9277 {
9278 CODECHAL_ENCODE_FUNCTION_ENTER;
9279
9280 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::DumpVdencOutputs());
9281
9282 if (m_hevcPicParams->tiles_enabled_flag)
9283 {
9284 PMOS_RESOURCE presVdencTileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;
9285 auto num_tiles = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1);
9286 auto vdencStatsSizeAllTiles = num_tiles * m_vdencBrcStatsBufferSize;
9287 auto vdencStatsOffset = m_hevcTileStatsOffset.uiVdencStatistics;
9288 auto pakStatsSizeAllTiles = num_tiles * 9 * CODECHAL_CACHELINE_SIZE;
9289 auto pakStatsOffset = m_hevcTileStatsOffset.uiHevcPakStatistics;
9290
9291 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
9292 presVdencTileStatisticsBuffer,
9293 CodechalDbgAttr::attrVdencOutput,
9294 "_TileVDEncStats",
9295 vdencStatsSizeAllTiles,
9296 vdencStatsOffset,
9297 CODECHAL_NUM_MEDIA_STATES));
9298
9299 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
9300 presVdencTileStatisticsBuffer,
9301 CodechalDbgAttr::attrPakOutput,
9302 "_TilePAKStats",
9303 pakStatsSizeAllTiles,
9304 pakStatsOffset,
9305 CODECHAL_NUM_MEDIA_STATES));
9306
9307 // Slice Size Conformance
9308 if (m_hevcSeqParams->SliceSizeControl)
9309 {
9310 PMOS_RESOURCE presLcuBaseAddressBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource;
9311 auto sliceStreamoutOffset = m_hevcTileStatsOffset.uiHevcSliceStreamout;
9312 uint32_t size = m_numLcu * CODECHAL_CACHELINE_SIZE;
9313 // Slice Size StreamOut Surface
9314 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
9315 presLcuBaseAddressBuffer,
9316 CodechalDbgAttr::attrVdencOutput,
9317 "_SliceSize",
9318 size,
9319 sliceStreamoutOffset,
9320 CODECHAL_NUM_MEDIA_STATES));
9321 }
9322 }
9323
9324 return MOS_STATUS_SUCCESS;
9325 }
9326
DumpHucDebugOutputBuffers()9327 MOS_STATUS CodechalVdencHevcStateG12::DumpHucDebugOutputBuffers()
9328 {
9329 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9330
9331 // Virtual Engine does only one submit per pass. Dump all HuC debug outputs
9332 bool dumpDebugBuffers = IsLastPipe() && (m_numPipe > 1);
9333 if (m_singleTaskPhaseSupported)
9334 {
9335 dumpDebugBuffers = dumpDebugBuffers && IsLastPass();
9336 }
9337
9338 if (dumpDebugBuffers)
9339 {
9340 CODECHAL_DEBUG_TOOL(
9341 DumpHucPakIntegrate();
9342 DumpHucCqp();
9343 )
9344 }
9345
9346 return eStatus;
9347 }
9348
DumpHucPakIntegrate()9349 MOS_STATUS CodechalVdencHevcStateG12::DumpHucPakIntegrate()
9350 {
9351 int32_t currentPass = GetCurrentPass();
9352 // HuC Input
9353 // HuC DMEM
9354 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
9355 &m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass],
9356 MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemVdencG12), CODECHAL_CACHELINE_SIZE),
9357 currentPass,
9358 hucRegionDumpPakIntegrate));
9359
9360 CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9361 &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource,
9362 0,
9363 m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize,
9364 0,
9365 "",
9366 true,
9367 currentPass,
9368 hucRegionDumpPakIntegrate));
9369
9370 CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9371 &m_resHuCPakAggregatedFrameStatsBuffer.sResource,
9372 0,
9373 m_resHuCPakAggregatedFrameStatsBuffer.dwSize,
9374 1,
9375 "",
9376 false,
9377 currentPass,
9378 hucRegionDumpPakIntegrate));
9379
9380 PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileParams = m_tileParams[m_virtualEngineBbIndex];
9381 CODECHAL_ENCODE_CHK_NULL_RETURN(tileParams);
9382
9383 auto bitStreamSize = m_encodeParams.dwBitstreamSize -
9384 MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
9385
9386 CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9387 &m_resBitstreamBuffer,
9388 MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE),
9389 bitStreamSize,
9390 4,
9391 "",
9392 true,
9393 currentPass,
9394 hucRegionDumpPakIntegrate));
9395
9396 CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9397 &m_resBitstreamBuffer,
9398 MOS_ALIGN_FLOOR(tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE),
9399 bitStreamSize,
9400 5,
9401 "",
9402 false,
9403 currentPass,
9404 hucRegionDumpPakIntegrate));
9405
9406 // Region 6 - BRC History buffer
9407 CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9408 &m_vdencBrcHistoryBuffer,
9409 0,
9410 CODECHAL_VDENC_HEVC_BRC_HISTORY_BUF_SIZE,
9411 6,
9412 "",
9413 false,
9414 currentPass,
9415 hucRegionDumpPakIntegrate));
9416
9417 CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9418 &m_thirdLevelBatchBuffer.OsResource,
9419 0,
9420 m_thirdLBSize,
9421 7,
9422 "",
9423 true,
9424 currentPass,
9425 hucRegionDumpPakIntegrate));
9426
9427 // Region 8
9428 CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9429 &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass],
9430 0,
9431 MOS_ALIGN_CEIL(sizeof(HucCommandDataVdencG12), CODECHAL_PAGE_SIZE),
9432 8,
9433 "",
9434 true,
9435 currentPass,
9436 hucRegionDumpPakIntegrate));
9437
9438 // Region 9 - HCP BRC Data Output
9439 CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9440 &m_resBrcDataBuffer,
9441 0,
9442 CODECHAL_CACHELINE_SIZE,
9443 9,
9444 "",
9445 false,
9446 currentPass,
9447 hucRegionDumpPakIntegrate));
9448
9449 // Region 10
9450 CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9451 &m_HucStitchCmdBatchBuffer.OsResource,
9452 0,
9453 m_hwInterface->m_HucStitchCmdBatchBufferSize,
9454 10,
9455 "",
9456 false,
9457 currentPass,
9458 hucRegionDumpPakIntegrate));
9459
9460 CODECHAL_DEBUG_CHK_STATUS(m_debugInterface->DumpHucRegion(
9461 &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource,
9462 0,
9463 m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize,
9464 15,
9465 "",
9466 true,
9467 currentPass,
9468 hucRegionDumpPakIntegrate));
9469
9470 return MOS_STATUS_SUCCESS;
9471 }
9472
DumpHucCqp()9473 MOS_STATUS CodechalVdencHevcStateG12::DumpHucCqp()
9474 {
9475 CODECHAL_ENCODE_FUNCTION_ENTER;
9476 int32_t currentPass = GetCurrentPass();
9477
9478 // Region 5 - Output SLB Buffer
9479 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
9480 &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource,
9481 0,
9482 m_hwInterface->m_vdenc2ndLevelBatchBufferSize,
9483 5,
9484 "_Out_Slb",
9485 false,
9486 currentPass,
9487 hucRegionDumpUpdate));
9488
9489 return MOS_STATUS_SUCCESS;
9490 }
9491 #endif
9492
SetRoundingValues()9493 MOS_STATUS CodechalVdencHevcStateG12::SetRoundingValues()
9494 {
9495 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9496
9497 CODECHAL_ENCODE_FUNCTION_ENTER;
9498
9499 if (m_hevcPicParams->CustomRoundingOffsetsParams.fields.EnableCustomRoudingIntra)
9500 {
9501 m_roundIntraValue = m_hevcPicParams->CustomRoundingOffsetsParams.fields.RoundingOffsetIntra;
9502 }
9503 else
9504 {
9505 if (m_hevcPicParams->CodingType == I_TYPE)
9506 {
9507 m_roundIntraValue = 10;
9508 }
9509 else if (m_hevcSeqParams->HierarchicalFlag && m_hevcPicParams->HierarchLevelPlus1 > 0)
9510 {
9511 if (m_hevcPicParams->HierarchLevelPlus1 == 1)
9512 {
9513 m_roundIntraValue = 10;
9514 }
9515 else if (m_hevcPicParams->HierarchLevelPlus1 == 2)
9516 {
9517 m_roundIntraValue = 9;
9518 }
9519 else
9520 {
9521 m_roundIntraValue = 8;
9522 }
9523 }
9524 else
9525 {
9526 m_roundIntraValue = 10;
9527 }
9528 }
9529
9530 if (m_hevcPicParams->CustomRoundingOffsetsParams.fields.EnableCustomRoudingInter)
9531 {
9532 m_roundInterValue = m_hevcPicParams->CustomRoundingOffsetsParams.fields.RoundingOffsetInter;
9533 }
9534 else
9535 {
9536 if (m_hevcPicParams->CodingType == I_TYPE)
9537 {
9538 m_roundInterValue = 4;
9539 }
9540 else if (m_hevcSeqParams->HierarchicalFlag && m_hevcPicParams->HierarchLevelPlus1 > 0)
9541 {
9542 if (m_hevcPicParams->HierarchLevelPlus1 == 1)
9543 {
9544 m_roundInterValue = 4;
9545 }
9546 else if (m_hevcPicParams->HierarchLevelPlus1 == 2)
9547 {
9548 m_roundInterValue = 3;
9549 }
9550 else
9551 {
9552 m_roundInterValue = 2;
9553 }
9554 }
9555 else
9556 {
9557 m_roundInterValue = 4;
9558 }
9559 }
9560
9561 return eStatus;
9562 }
9563
SetAddCommands(uint32_t commandType,PMOS_COMMAND_BUFFER cmdBuffer,bool addToBatchBufferHuCBRC,uint32_t roundInterValue,uint32_t roundIntraValue,bool isLowDelayB,int8_t * pRefIdxMapping,int8_t recNotFilteredID)9564 MOS_STATUS CodechalVdencHevcStateG12::SetAddCommands(uint32_t commandType, PMOS_COMMAND_BUFFER cmdBuffer, bool addToBatchBufferHuCBRC, uint32_t roundInterValue, uint32_t roundIntraValue, bool isLowDelayB, int8_t * pRefIdxMapping, int8_t recNotFilteredID)
9565 {
9566 #ifdef _HEVC_ENCODE_VDENC_SUPPORTED
9567 void *pCmdParams = nullptr;
9568
9569 if (commandType == CODECHAL_CMD1)
9570 {
9571 // Send CMD1 command
9572 MHW_VDBOX_VDENC_CMD1_PARAMS cmd1Params;
9573 MOS_ZeroMemory(&cmd1Params, sizeof(cmd1Params));
9574 cmd1Params.Mode = CODECHAL_ENCODE_MODE_HEVC;
9575 cmd1Params.pHevcEncPicParams = m_hevcPicParams;
9576 cmd1Params.pHevcEncSlcParams = m_hevcSliceParams;
9577 cmd1Params.pInputParams = pCmdParams;
9578 cmd1Params.bHevcVisualQualityImprovement = m_hevcVisualQualityImprovement;
9579 //down cast?
9580 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencCmd1Cmd(cmdBuffer, nullptr, &cmd1Params));
9581 }
9582 else if (commandType == CODECHAL_CMD2)
9583 {
9584 PMHW_VDBOX_VDENC_CMD2_STATE cmd2Params(new MHW_VDBOX_VDENC_CMD2_STATE);
9585 CODECHAL_ENCODE_CHK_NULL_RETURN(cmd2Params);
9586
9587 // set CMD2 command
9588 cmd2Params->Mode = CODECHAL_ENCODE_MODE_HEVC;
9589 cmd2Params->pHevcEncSeqParams = m_hevcSeqParams;
9590 cmd2Params->pHevcEncPicParams = m_hevcPicParams;
9591 cmd2Params->pHevcEncSlcParams = m_hevcSliceParams;
9592 cmd2Params->bRoundingEnabled = m_hevcVdencRoundingEnabled;
9593 cmd2Params->bPakOnlyMultipassEnable = m_pakOnlyPass;
9594 cmd2Params->bUseDefaultQpDeltas = (m_hevcVdencAcqpEnabled && cmd2Params->pHevcEncSeqParams->QpAdjustment) ||
9595 (m_brcEnabled && cmd2Params->pHevcEncSeqParams->MBBRC != mbBrcDisabled);
9596 cmd2Params->bPanicEnabled = (m_brcEnabled) && (m_panicEnable) && (IsLastPass()) && !m_pakOnlyPass;
9597 cmd2Params->bStreamInEnabled = m_vdencStreamInEnabled;
9598 cmd2Params->bROIStreamInEnabled = m_vdencNativeROIEnabled;
9599 cmd2Params->bTileReplayEnable = m_enableTileReplay;
9600 cmd2Params->bIsLowDelayB = isLowDelayB;
9601 cmd2Params->bCaptureModeEnable = m_CaptureModeEnable;
9602 cmd2Params->m_WirelessSessionID = 0;
9603 cmd2Params->pRefIdxMapping = pRefIdxMapping;
9604 cmd2Params->recNotFilteredID = recNotFilteredID;
9605 cmd2Params->pInputParams = pCmdParams;
9606 cmd2Params->ucNumRefIdxL0ActiveMinus1 = cmd2Params->pHevcEncSlcParams->num_ref_idx_l0_active_minus1;
9607 cmd2Params->bHevcVisualQualityImprovement = m_hevcVisualQualityImprovement;
9608 cmd2Params->roundInterValue = roundInterValue;
9609 cmd2Params->roundIntraValue = roundIntraValue;
9610 cmd2Params->bROIStreamInEnabled = m_brcAdaptiveRegionBoostEnable ? true : cmd2Params->bROIStreamInEnabled;
9611 cmd2Params->bEnableSubPelMode = m_encodeParams.bEnableSubPelMode;
9612 cmd2Params->SubPelMode = m_encodeParams.SubPelMode;
9613
9614 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencCmd2Cmd(cmdBuffer, nullptr, cmd2Params));
9615 }
9616 #endif
9617 return MOS_STATUS_SUCCESS;
9618 }
9619
InsertConditionalBBEndWithHucErrorStatus(PMOS_COMMAND_BUFFER cmdBuffer)9620 MOS_STATUS CodechalVdencHevcStateG12::InsertConditionalBBEndWithHucErrorStatus(PMOS_COMMAND_BUFFER cmdBuffer)
9621 {
9622 MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS miEnhancedConditionalBatchBufferEndParams;
9623
9624 MOS_ZeroMemory(
9625 &miEnhancedConditionalBatchBufferEndParams,
9626 sizeof(MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
9627
9628 miEnhancedConditionalBatchBufferEndParams.presSemaphoreBuffer = &m_resHucErrorStatusBuffer;
9629
9630 miEnhancedConditionalBatchBufferEndParams.dwParamsType = MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS::ENHANCED_PARAMS;
9631 miEnhancedConditionalBatchBufferEndParams.enableEndCurrentBatchBuffLevel = false;
9632 miEnhancedConditionalBatchBufferEndParams.compareOperation = MAD_EQUAL_IDD;
9633
9634 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
9635 cmdBuffer,
9636 (PMHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS)(&miEnhancedConditionalBatchBufferEndParams)));
9637
9638 return MOS_STATUS_SUCCESS;
9639 }
9640
9641