1 /*
2 * Copyright (c) 2017-2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_vdenc_hevc.cpp
24 //! \brief Defines base class for HEVC VDEnc encoder.
25 //!
26
27 #include "codechal_vdenc_hevc.h"
28 #if USE_CODECHAL_DEBUG_TOOL
29 #include "codechal_debug_kernel.h"
30 #endif
31
32 //!< \cond SKIP_DOXYGEN
33 const uint8_t CodechalVdencHevcState::m_estRateThreshP0[7] =
34 {
35 4, 8, 12, 16, 20, 24, 28
36 };
37
38 const uint8_t CodechalVdencHevcState::m_estRateThreshB0[7] =
39 {
40 4, 8, 12, 16, 20, 24, 28
41 };
42
43 const uint8_t CodechalVdencHevcState::m_estRateThreshI0[7] =
44 {
45 4, 8, 12, 16, 20, 24, 28
46 };
47
48 const int8_t CodechalVdencHevcState::m_instRateThreshP0[4] =
49 {
50 40, 60, 80, 120
51 };
52
53 const int8_t CodechalVdencHevcState::m_instRateThreshB0[4] =
54 {
55 35, 60, 80, 120
56 };
57
58 const int8_t CodechalVdencHevcState::m_instRateThreshI0[4] =
59 {
60 40, 60, 90, 115
61 };
62
63 const uint16_t CodechalVdencHevcState::m_startGAdjFrame[4] =
64 {
65 10, 50, 100, 150
66 };
67
68 const uint8_t CodechalVdencHevcState::m_startGAdjMult[5] =
69 {
70 1, 1, 3, 2, 1
71 };
72
73 const uint8_t CodechalVdencHevcState::m_startGAdjDiv[5] =
74 {
75 40, 5, 5, 3, 1
76 };
77
78 const uint8_t CodechalVdencHevcState::m_rateRatioThreshold[7] =
79 {
80 40, 75, 97, 103, 125, 160, 0
81 };
82
83 const uint8_t CodechalVdencHevcState::m_rateRatioThresholdQP[8] =
84 {
85 253, 254, 255, 0, 1, 2, 3, 0
86 };
87
88 const uint32_t CodechalVdencHevcState::m_hucModeCostsIFrame[] = {
89 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
90 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
91 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
92 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000,
93 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000,
94 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
95 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
96 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
97 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
98 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
99 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000,
100 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000,
101 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
102 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
103 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
104 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
105 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
106 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000,
107 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000,
108 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
109 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
110 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
111 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
112 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
113 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000,
114 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000,
115 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
116 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
117 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
118 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
119 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
120 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000,
121 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000,
122 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
123 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
124 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
125 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
126 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
127 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000,
128 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000,
129 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
130 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
131 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
132 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
133 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
134 0x00000000, 0x00000000, 0x00000000, 0x00000000
135 };
136
137 const uint32_t CodechalVdencHevcState::m_hucModeCostsPbFrame[] = {
138 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
139 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
140 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
141 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15,
142 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d,
143 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333,
144 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b,
145 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
146 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
147 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
148 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15,
149 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d,
150 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333,
151 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b,
152 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
153 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
154 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
155 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15,
156 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d,
157 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333,
158 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b,
159 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
160 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
161 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
162 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15,
163 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d,
164 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333,
165 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b,
166 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
167 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
168 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
169 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15,
170 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d,
171 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333,
172 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b,
173 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
174 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
175 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
176 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15,
177 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d,
178 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333,
179 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b,
180 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
181 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
182 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
183 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b
184 };
185
186 const uint16_t CodechalVdencHevcState::m_sadQpLambdaI[] = {
187 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0004, 0x0004,
188 0x0005, 0x0006, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000E, 0x0010, 0x0012, 0x0014, 0x0016, 0x0019, 0x001C,
189 0x001F, 0x0023, 0x0027, 0x002C, 0x0032, 0x0038, 0x003E, 0x0046, 0x004F, 0x0058, 0x0063, 0x006F, 0x007D, 0x008C, 0x009D, 0x00B1,
190 0x00C6, 0x00DF, 0x00FA, 0x0118
191 };
192
193 // new table for visual quality improvement
194 const uint16_t CodechalVdencHevcState::m_sadQpLambdaI_VQI[] = {
195 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0004, 0x0004,
196 0x0005, 0x0006, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000D, 0x000F, 0x0011, 0x0014, 0x0017, 0x001A, 0x001E, 0x0022,
197 0x0027, 0x002D, 0x0033, 0x003B, 0x0043, 0x004D, 0x0057, 0x0064, 0x0072, 0x0082, 0x0095, 0x00A7, 0x00BB, 0x00D2, 0x00EC, 0x0109,
198 0x0129, 0x014E, 0x0177, 0x01A5
199 };
200
201 const uint16_t CodechalVdencHevcState::m_sadQpLambdaP[] = {
202 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0004, 0x0004, 0x0005,
203 0x0005, 0x0006, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000D, 0x000E, 0x0010, 0x0012, 0x0014, 0x0017, 0x001A, 0x001D,
204 0x0021, 0x0024, 0x0029, 0x002E, 0x0034, 0x003A, 0x0041, 0x0049, 0x0052, 0x005C, 0x0067, 0x0074, 0x0082, 0x0092, 0x00A4, 0x00B8,
205 0x00CE, 0x00E8, 0x0104, 0x0124
206 };
207
208 const uint16_t CodechalVdencHevcState::m_rdQpLambdaI[] = {
209 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0003, 0x0004, 0x0005,
210 0x0006, 0x0008, 0x000A, 0x000C, 0x000F, 0x0013, 0x0018, 0x001E, 0x0026, 0x0030, 0x003D, 0x004D, 0x0061, 0x007A, 0x009A, 0x00C2,
211 0x00F4, 0x0133, 0x0183, 0x01E8, 0x0266, 0x0306, 0x03CF, 0x04CD, 0x060C, 0x079F, 0x099A, 0x0C18, 0x0F3D, 0x1333, 0x1831, 0x1E7A,
212 0x2666, 0x3062, 0x3CF5, 0x4CCD
213 };
214
215 const uint16_t CodechalVdencHevcState::m_rdQpLambdaP[] = {
216 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0004, 0x0005,
217 0x0007, 0x0008, 0x000A, 0x000D, 0x0011, 0x0015, 0x001A, 0x0021, 0x002A, 0x0034, 0x0042, 0x0053, 0x0069, 0x0084, 0x00A6, 0x00D2,
218 0x0108, 0x014D, 0x01A3, 0x0210, 0x029A, 0x0347, 0x0421, 0x0533, 0x068D, 0x0841, 0x0A66, 0x0D1A, 0x1082, 0x14CD, 0x1A35, 0x2105,
219 0x299A, 0x346A, 0x4209, 0x5333
220 };
221
222 // Originial CodechalVdencHevcState::m_penaltyForIntraNonDC32x32PredMode table
223 const uint8_t CodechalVdencHevcState::m_penaltyForIntraNonDC32x32PredMode[] = {
224 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
225 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
226 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
227 0x00, 0x00, 0x00, 0x00
228 };
229
230 // New table for visual quality improvement
231 const uint8_t CodechalVdencHevcState::m_penaltyForIntraNonDC32x32PredMode_VQI[] = {
232 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
233 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0C, 0x12, 0x19, 0x1f, 0x25, 0x2C, 0x32, 0x38,
234 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
235 0x3F, 0x3F, 0x3F, 0x3F
236 };
237 //! \endcond
238
GetMaxAllowedSlices(uint8_t levelIdc)239 uint32_t CodechalVdencHevcState::GetMaxAllowedSlices(uint8_t levelIdc)
240 {
241 uint32_t maxAllowedNumSlices = 0;
242
243 switch (levelIdc)
244 {
245 case 10:
246 case 20:
247 maxAllowedNumSlices = 16;
248 break;
249 case 21:
250 maxAllowedNumSlices = 20;
251 break;
252 case 30:
253 maxAllowedNumSlices = 30;
254 break;
255 case 31:
256 maxAllowedNumSlices = 40;
257 break;
258 case 40:
259 case 41:
260 maxAllowedNumSlices = 75;
261 break;
262 case 50:
263 case 51:
264 case 52:
265 maxAllowedNumSlices = 200;
266 break;
267 case 60:
268 case 61:
269 case 62:
270 maxAllowedNumSlices = 600;
271 break;
272 default:
273 maxAllowedNumSlices = 0;
274 break;
275 }
276
277 return maxAllowedNumSlices;
278 }
279
SetPakPassType()280 void CodechalVdencHevcState::SetPakPassType()
281 {
282 CODECHAL_ENCODE_FUNCTION_ENTER;
283
284 // default: VDEnc+PAK pass
285 m_pakOnlyPass = false;
286
287 // BRC
288 if (m_brcEnabled)
289 {
290 // BRC with SSC, BRC without SSC
291 // BRC fast 2nd pass needed, but weighted prediction/SSC 2nd pass not needed
292 // HuC will update PAK pass type to be VDEnc+PAK if WP/SSC 2nd pass is needed
293 if (GetCurrentPass() == 1)
294 {
295 m_pakOnlyPass = true;
296 }
297 }
298
299 // CQP, ACQP, BRC
300 if (m_hevcSeqParams->SAO_enabled_flag)
301 {
302 // SAO 2nd pass is always PAK only pass
303 if (m_b2NdSaoPassNeeded && (GetCurrentPass() == m_uc2NdSaoPass))
304 {
305 m_pakOnlyPass = true;
306 }
307 }
308
309 return;
310 }
311
ComputeVDEncInitQP(int32_t & initQPIP,int32_t & initQPB)312 void CodechalVdencHevcState::ComputeVDEncInitQP(int32_t& initQPIP, int32_t& initQPB)
313 {
314 CODECHAL_ENCODE_FUNCTION_ENTER;
315
316 const float x0 = 0, y0 = 1.19f, x1 = 1.75f, y1 = 1.75f;
317 uint32_t frameSize = ((m_frameWidth * m_frameHeight * 3) >> 1);
318
319 initQPIP = (int)(1. / 1.2 * pow(10.0, (log10(frameSize * 2. / 3. * ((float)m_hevcSeqParams->FrameRate.Numerator / ((float)m_hevcSeqParams->FrameRate.Denominator * (float)m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS))) - x0) * (y1 - y0) / (x1 - x0) + y0) + 0.5);
320
321 initQPIP += 2;
322
323 int32_t gopP = (m_hevcSeqParams->GopRefDist) ? ((m_hevcSeqParams->GopPicSize - 1) / m_hevcSeqParams->GopRefDist) : 0;
324 int32_t gopB = m_hevcSeqParams->GopPicSize - 1 - gopP;
325 int32_t gopB1 = 0;
326 int32_t gopB2 = 0;
327 int32_t gopSize = 1 + gopP + gopB + gopB1 + gopB2;
328
329 if (gopSize == 1)
330 {
331 initQPIP += 12;
332 }
333 else if (gopSize < 15)
334 {
335 initQPIP += ((14 - gopSize) >> 1);
336 }
337
338 initQPIP = CodecHal_Clip3((int32_t)m_hevcPicParams->BRCMinQp, (int32_t)m_hevcPicParams->BRCMaxQp, initQPIP);
339 initQPIP--;
340
341 if (initQPIP < 0)
342 {
343 initQPIP = 1;
344 }
345
346 initQPB = ((initQPIP + initQPIP) * 563 >> 10) + 1;
347 initQPB = CodecHal_Clip3((int32_t)m_hevcPicParams->BRCMinQp, (int32_t)m_hevcPicParams->BRCMaxQp, initQPB);
348
349 if (gopSize > 300) //if intra frame is not inserted frequently
350 {
351 initQPIP -= 8;
352 initQPB -= 8;
353 }
354 else
355 {
356 initQPIP -= 2;
357 initQPB -= 2;
358 }
359
360 initQPIP = CodecHal_Clip3((int32_t)m_hevcPicParams->BRCMinQp, (int32_t)m_hevcPicParams->BRCMaxQp, initQPIP);
361 initQPB = CodecHal_Clip3((int32_t)m_hevcPicParams->BRCMinQp, (int32_t)m_hevcPicParams->BRCMaxQp, initQPB);
362 }
363
StoreHuCStatus2Register(PMOS_COMMAND_BUFFER cmdBuffer)364 MOS_STATUS CodechalVdencHevcState::StoreHuCStatus2Register(PMOS_COMMAND_BUFFER cmdBuffer)
365 {
366 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
367
368 CODECHAL_ENCODE_FUNCTION_ENTER;
369
370 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
371
372 // Write HUC_STATUS2 mask - bit 6 - valid IMEM loaded
373 MHW_MI_STORE_DATA_PARAMS storeDataParams;
374 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
375 storeDataParams.pOsResource = &m_resHucStatus2Buffer;
376 storeDataParams.dwResourceOffset = 0;
377 storeDataParams.dwValue = m_hucInterface->GetHucStatus2ImemLoadedMask();
378 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
379
380 // Store HUC_STATUS2 register
381 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
382 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
383 storeRegParams.presStoreBuffer = &m_resHucStatus2Buffer;
384 storeRegParams.dwOffset = sizeof(uint32_t);
385 storeRegParams.dwRegister = m_hucInterface->GetMmioRegisters(m_vdboxIndex)->hucStatus2RegOffset;
386 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams));
387
388 return eStatus;
389 }
390
HuCBrcInitReset()391 MOS_STATUS CodechalVdencHevcState::HuCBrcInitReset()
392 {
393 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
394
395 CODECHAL_ENCODE_FUNCTION_ENTER;
396
397 MOS_COMMAND_BUFFER cmdBuffer;
398 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
399
400 if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) )
401 {
402 // Send command buffer header at the beginning (OS dependent)
403 bool requestFrameTracking = m_singleTaskPhaseSupported ?
404 m_firstTaskInPhase : 0;
405 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
406 }
407
408 // load kernel from WOPCM into L2 storage RAM
409 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
410 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
411 imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcInitKernelDescriptor;
412
413 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
414
415 // pipe mode select
416 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
417 pipeModeSelectParams.Mode = m_mode;
418 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
419
420 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
421
422 // set HuC DMEM param
423 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
424 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
425 dmemParams.presHucDataSource = &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx];
426 dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcInitDmemBufferSize, CODECHAL_CACHELINE_SIZE);
427 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
428 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
429
430 MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
431 MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
432 virtualAddrParams.regionParams[0].presRegion = &m_vdencBrcHistoryBuffer;
433 virtualAddrParams.regionParams[0].isWritable = true;
434 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
435
436 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcDummyStreamObject(&cmdBuffer));
437
438 // Store HUC_STATUS2 register bit 6 before HUC_Start command
439 // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
440 // (HUC_Start command with last start bit set).
441 CODECHAL_DEBUG_TOOL(
442 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
443 )
444
445 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
446
447 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
448
449 // wait Huc completion (use HEVC bit for now)
450 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
451 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
452 vdPipeFlushParams.Flags.bFlushHEVC = 1;
453 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
454 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
455
456 // Flush the engine to ensure memory written out
457 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
458 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
459 flushDwParams.bVideoPipelineCacheInvalidate = true;
460 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
461
462 if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd))
463 {
464 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
465 }
466
467 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
468
469 if (!m_singleTaskPhaseSupported)
470 {
471 bool renderingFlags = m_videoContextUsesNullHw;
472
473 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN( m_debugInterface->DumpCmdBuffer(
474 &cmdBuffer,
475 CODECHAL_MEDIA_STATE_BRC_INIT_RESET,
476 "ENC")));
477
478 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
479 }
480
481 CODECHAL_DEBUG_TOOL(DumpHucBrcInit());
482 return eStatus;
483 }
484
SetupBRCROIStreamIn(PMOS_RESOURCE streamIn,PMOS_RESOURCE deltaQpBuffer)485 MOS_STATUS CodechalVdencHevcState::SetupBRCROIStreamIn(PMOS_RESOURCE streamIn, PMOS_RESOURCE deltaQpBuffer)
486 {
487 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
488
489 CODECHAL_ENCODE_FUNCTION_ENTER;
490
491 CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
492 CODECHAL_ENCODE_CHK_NULL_RETURN(deltaQpBuffer);
493
494 MOS_LOCK_PARAMS lockFlags;
495 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
496 lockFlags.WriteOnly = true;
497
498 PDeltaQpForROI deltaQpData = (PDeltaQpForROI)m_osInterface->pfnLockResource(
499 m_osInterface,
500 deltaQpBuffer,
501 &lockFlags);
502 CODECHAL_ENCODE_CHK_NULL_RETURN(deltaQpData);
503
504 MOS_ZeroMemory(deltaQpData, m_deltaQpRoiBufferSize);
505
506 uint32_t streamInWidth = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32);
507 uint32_t streamInHeight = (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32);
508 uint32_t deltaQpBufWidth = (MOS_ALIGN_CEIL(m_frameWidth, 32) / 32);
509 uint32_t deltaQpBufHeight = (MOS_ALIGN_CEIL(m_frameHeight, 32) / 32);
510 bool cu64Align = true;
511
512 if ((m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR ||
513 m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR ||
514 m_hevcSeqParams->RateControlMethod == RATECONTROL_QVBR) &&
515 m_encodeParams.bMbQpDataEnabled)
516 {
517 cu64Align = false;
518
519 MOS_LOCK_PARAMS LockFlagsReadOnly;
520 MOS_ZeroMemory(&LockFlagsReadOnly, sizeof(MOS_LOCK_PARAMS));
521 LockFlagsReadOnly.ReadOnly = true;
522
523 auto pInputDataGfx = (PDeltaQpForROI)m_osInterface->pfnLockResource(
524 m_osInterface, &(m_encodeParams.psMbQpDataSurface->OsResource), &LockFlagsReadOnly);
525
526 CODECHAL_ENCODE_CHK_NULL_RETURN(pInputDataGfx);
527
528 for (uint32_t curY = 0; curY < deltaQpBufHeight; curY++)
529 {
530 for (uint32_t curX = 0; curX < deltaQpBufWidth; curX++)
531 {
532 uint32_t iMB = curY * deltaQpBufHeight + curX;
533 deltaQpData[iMB] = *(pInputDataGfx + m_encodeParams.psMbQpDataSurface->dwPitch * curY + curX);
534 }
535 }
536
537 m_osInterface->pfnUnlockResource(
538 m_osInterface,
539 &(m_encodeParams.psMbQpDataSurface->OsResource));
540 }
541 else
542 {
543 for (auto i = m_hevcPicParams->NumROI - 1; i >= 0; i--)
544 {
545 //Check if the region is with in the borders
546 uint16_t top = (uint16_t)CodecHal_Clip3(0, (deltaQpBufHeight - 1), m_hevcPicParams->ROI[i].Top);
547 uint16_t bottom = (uint16_t)CodecHal_Clip3(0, deltaQpBufHeight, m_hevcPicParams->ROI[i].Bottom);
548 uint16_t left = (uint16_t)CodecHal_Clip3(0, (deltaQpBufWidth - 1), m_hevcPicParams->ROI[i].Left);
549 uint16_t right = (uint16_t)CodecHal_Clip3(0, deltaQpBufWidth, m_hevcPicParams->ROI[i].Right);
550
551 //Check if all the sides of ROI regions are aligned to 64CU
552 if ((top % 2 == 1) || (bottom % 2 == 1) || (left % 2 == 1) || (right % 2 == 1))
553 {
554 cu64Align = false;
555 }
556
557 SetBrcRoiDeltaQpMap(streamInWidth, top, bottom, left, right, (uint8_t)i, deltaQpData);
558 }
559 }
560
561 m_osInterface->pfnUnlockResource(
562 m_osInterface,
563 deltaQpBuffer);
564
565 uint8_t* data = (uint8_t*) m_osInterface->pfnLockResource(
566 m_osInterface,
567 streamIn,
568 &lockFlags);
569 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
570
571 MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
572 MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
573 streaminDataParams.maxTuSize = 3; //Maximum TU Size allowed, restriction to be set to 3
574 streaminDataParams.maxCuSize = (cu64Align) ? 3 : 2;
575 switch (m_hevcSeqParams->TargetUsage)
576 {
577 case 1:
578 case 4:
579 streaminDataParams.numMergeCandidateCu64x64 = 4;
580 streaminDataParams.numMergeCandidateCu32x32 = 3;
581 streaminDataParams.numMergeCandidateCu16x16 = 2;
582 streaminDataParams.numMergeCandidateCu8x8 = 1;
583 streaminDataParams.numImePredictors = m_imgStateImePredictors;
584 break;
585 case 7:
586 streaminDataParams.numMergeCandidateCu64x64 = 2;
587 streaminDataParams.numMergeCandidateCu32x32 = 2;
588 streaminDataParams.numMergeCandidateCu16x16 = 2;
589 streaminDataParams.numMergeCandidateCu8x8 = 0;
590 streaminDataParams.numImePredictors = 4;
591 break;
592 }
593
594 int32_t streamInNumCUs = streamInWidth * streamInHeight;
595 for (auto i = 0; i < streamInNumCUs; i++)
596 {
597 SetStreaminDataPerLcu(&streaminDataParams, data+(i*64));
598 }
599
600 m_osInterface->pfnUnlockResource(
601 m_osInterface,
602 streamIn);
603
604 return eStatus;
605 }
606
SetBrcRoiDeltaQpMap(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,uint8_t regionId,PDeltaQpForROI deltaQpMap)607 void CodechalVdencHevcState::SetBrcRoiDeltaQpMap(
608 uint32_t streamInWidth,
609 uint32_t top,
610 uint32_t bottom,
611 uint32_t left,
612 uint32_t right,
613 uint8_t regionId,
614 PDeltaQpForROI deltaQpMap)
615 {
616 CODECHAL_ENCODE_FUNCTION_ENTER;
617
618 for (auto y = top; y < bottom; y++)
619 {
620 for (auto x = left; x < right; x++)
621 {
622 uint32_t offset = 0, xyOffset = 0;
623 StreaminZigZagToLinearMap(streamInWidth, x, y, &offset, &xyOffset);
624
625 (deltaQpMap + (offset + xyOffset))->iDeltaQp = m_hevcPicParams->ROI[regionId].PriorityLevelOrDQp;
626 }
627 }
628 }
629
ProcessRoiDeltaQp()630 void CodechalVdencHevcState::ProcessRoiDeltaQp()
631 {
632 CODECHAL_ENCODE_FUNCTION_ENTER;
633
634 // Intialize ROIDistinctDeltaQp to be min expected delta qp, setting to -128
635 // Check if forceQp is needed or not
636 // forceQp is enabled if there are greater than 3 distinct delta qps or if the deltaqp is beyond range (-8, 7)
637
638 for (auto k = 0; k < m_maxNumROI; k++)
639 {
640 m_hevcPicParams->ROIDistinctDeltaQp[k] = -128;
641 }
642
643 int32_t numQp = 0;
644 for (int32_t i = 0; i < m_hevcPicParams->NumROI; i++)
645 {
646 bool dqpNew = true;
647
648 //Get distinct delta Qps among all ROI regions, index 0 having the lowest delta qp
649 int32_t k = numQp - 1;
650 for (; k >= 0; k--)
651 {
652 if (m_hevcPicParams->ROI[i].PriorityLevelOrDQp == m_hevcPicParams->ROIDistinctDeltaQp[k] || m_hevcPicParams->ROI[i].PriorityLevelOrDQp == 0)
653 {
654 dqpNew = false;
655 break;
656 }
657 else if (m_hevcPicParams->ROI[i].PriorityLevelOrDQp < m_hevcPicParams->ROIDistinctDeltaQp[k])
658 {
659 continue;
660 }
661 else
662 {
663 break;
664 }
665 }
666
667 if (dqpNew)
668 {
669 for (int32_t j = numQp - 1; (j >= k + 1 && j >= 0); j--)
670 {
671 m_hevcPicParams->ROIDistinctDeltaQp[j + 1] = m_hevcPicParams->ROIDistinctDeltaQp[j];
672 }
673 m_hevcPicParams->ROIDistinctDeltaQp[k + 1] = m_hevcPicParams->ROI[i].PriorityLevelOrDQp;
674 numQp++;
675 }
676 }
677
678 //Set the ROI DeltaQp to zero for remaining array elements
679 for (auto k = numQp; k < m_maxNumROI; k++)
680 {
681 m_hevcPicParams->ROIDistinctDeltaQp[k] = 0;
682 }
683
684 m_vdencNativeROIEnabled = !(numQp > m_maxNumNativeROI || m_hevcPicParams->ROIDistinctDeltaQp[0] < -8 || m_hevcPicParams->ROIDistinctDeltaQp[numQp - 1] > 7);
685 }
686
SetupROIStreamIn(PMOS_RESOURCE streamIn)687 MOS_STATUS CodechalVdencHevcState::SetupROIStreamIn(PMOS_RESOURCE streamIn)
688 {
689 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
690
691 CODECHAL_ENCODE_FUNCTION_ENTER;
692
693 CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
694
695 MOS_LOCK_PARAMS lockFlags;
696 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
697 lockFlags.WriteOnly = true;
698
699 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(
700 m_osInterface,
701 streamIn,
702 &lockFlags);
703 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
704
705 uint32_t streamInWidth = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32);
706 uint32_t streamInHeight = (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32);
707 int32_t streamInNumCUs = streamInWidth * streamInHeight;
708
709 MOS_ZeroMemory(data, streamInNumCUs * 64);
710
711 MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
712
713 //ROI higher priority for smaller index.
714 bool cu64Align = true;
715 for (int32_t i = m_hevcPicParams->NumROI - 1; i >= 0; i--)
716 {
717
718 //Check if the region is with in the borders
719 uint16_t top = (uint16_t)CodecHal_Clip3(0, (streamInHeight - 1), m_hevcPicParams->ROI[i].Top);
720 uint16_t bottom = (uint16_t)CodecHal_Clip3(0, streamInHeight, m_hevcPicParams->ROI[i].Bottom);
721 uint16_t left = (uint16_t)CodecHal_Clip3(0, (streamInWidth - 1), m_hevcPicParams->ROI[i].Left);
722 uint16_t right = (uint16_t)CodecHal_Clip3(0, streamInWidth, m_hevcPicParams->ROI[i].Right);
723
724 //Check if all the sides of ROI regions are aligned to 64CU
725 if ((top % 2 == 1) || (bottom % 2 == 1) || (left % 2 == 1) || (right % 2 == 1))
726 {
727 cu64Align = false;
728 }
729
730 // For native ROI, determine Region ID based on distinct delta Qps and set ROI control
731 uint32_t roiCtrl = 0;
732 for (auto j = 0; j < m_maxNumNativeROI; j++)
733 {
734 if (m_hevcPicParams->ROIDistinctDeltaQp[j] == m_hevcPicParams->ROI[i].PriorityLevelOrDQp)
735 {
736 //All four 16x16 blocks within the 32x32 blocks should share the same region ID j
737 roiCtrl = j + 1;
738 for (auto k = 0; k < 3; k++)
739 {
740 roiCtrl = roiCtrl << 2;
741 roiCtrl = roiCtrl + j + 1;
742 }
743 break;
744 }
745 }
746 // Calculate ForceQp
747 int8_t forceQp = (int8_t)CodecHal_Clip3(10, 51, m_hevcPicParams->QpY + m_hevcPicParams->ROI[i].PriorityLevelOrDQp + m_hevcSliceParams->slice_qp_delta);
748
749 MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
750 streaminDataParams.setQpRoiCtrl = true;
751 if (m_vdencNativeROIEnabled)
752 {
753 streaminDataParams.roiCtrl = (uint8_t)roiCtrl;
754 }
755 else
756 {
757 streaminDataParams.forceQp[0] = forceQp;
758 streaminDataParams.forceQp[1] = forceQp;
759 streaminDataParams.forceQp[2] = forceQp;
760 streaminDataParams.forceQp[3] = forceQp;
761 }
762
763 SetStreaminDataPerRegion(streamInWidth, top, bottom, left, right, &streaminDataParams, data);
764 }
765
766 MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
767 streaminDataParams.maxTuSize = 3; //Maximum TU Size allowed, restriction to be set to 3
768 streaminDataParams.maxCuSize = (cu64Align) ? 3 : 2;
769 switch (m_hevcSeqParams->TargetUsage)
770 {
771 case 1:
772 case 4:
773 streaminDataParams.numMergeCandidateCu64x64 = 4;
774 streaminDataParams.numMergeCandidateCu32x32 = 3;
775 streaminDataParams.numMergeCandidateCu16x16 = 2;
776 streaminDataParams.numMergeCandidateCu8x8 = 1;
777 streaminDataParams.numImePredictors = m_imgStateImePredictors;
778 break;
779 case 7:
780 streaminDataParams.numMergeCandidateCu64x64 = 2;
781 streaminDataParams.numMergeCandidateCu32x32 = 2;
782 streaminDataParams.numMergeCandidateCu16x16 = 2;
783 streaminDataParams.numMergeCandidateCu8x8 = 0;
784 streaminDataParams.numImePredictors = 4;
785 break;
786 }
787
788 for (auto i = 0; i < streamInNumCUs; i++)
789 {
790 SetStreaminDataPerLcu(&streaminDataParams, data + (i * 64));
791 }
792
793 m_osInterface->pfnUnlockResource(
794 m_osInterface,
795 streamIn);
796
797 return eStatus;
798 }
SetupMbQpStreamIn(PMOS_RESOURCE streamIn)799 MOS_STATUS CodechalVdencHevcState::SetupMbQpStreamIn(PMOS_RESOURCE streamIn)
800 {
801 CODECHAL_ENCODE_FUNCTION_ENTER;
802
803 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
804 CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
805
806 MOS_LOCK_PARAMS LockFlags;
807 MOS_ZeroMemory(&LockFlags, sizeof(MOS_LOCK_PARAMS));
808 LockFlags.WriteOnly = true;
809
810 auto dataGfx = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface,
811 streamIn,
812 &LockFlags);
813 CODECHAL_ENCODE_CHK_NULL_RETURN(dataGfx);
814 MOS_SURFACE surfInfo = {};
815 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetResourceInfo(m_osInterface, streamIn, &surfInfo));
816
817 uint32_t uiSize = surfInfo.dwSize;
818 uint32_t uiAlign = 64;
819 auto data = (uint8_t*)MOS_AllocMemory(uiSize + uiAlign);
820 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
821
822 auto dataBase = (uint8_t*)((((uint64_t)(data) + uiAlign - 1) / uiAlign) * uiAlign);
823
824 MOS_SecureMemcpy(dataBase, uiSize, dataGfx, uiSize);
825
826 uint32_t streamInWidth = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32);
827 uint32_t streamInHeight = (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32);
828 int32_t streamInNumCUs = streamInWidth * streamInHeight;
829
830 MOS_LOCK_PARAMS LockFlagsReadOnly;
831 MOS_ZeroMemory(&LockFlagsReadOnly, sizeof(MOS_LOCK_PARAMS));
832 LockFlagsReadOnly.ReadOnly = true;
833
834 auto pInputDataGfx = (uint8_t*)m_osInterface->pfnLockResource(
835 m_osInterface, &(m_encodeParams.psMbQpDataSurface->OsResource),
836 &LockFlagsReadOnly);
837 if (pInputDataGfx == nullptr)
838 {
839 MOS_SafeFreeMemory(data);
840 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid (nullptr) Pointer from LockResource!");
841 return MOS_STATUS_NULL_POINTER;
842 }
843
844 eStatus = m_osInterface->pfnGetResourceInfo(
845 m_osInterface, &(m_encodeParams.psMbQpDataSurface->OsResource),
846 &surfInfo);
847 if (eStatus != MOS_STATUS_SUCCESS)
848 {
849 MOS_SafeFreeMemory(data);
850 CODECHAL_ENCODE_ASSERTMESSAGE("Get psMbQpDataSurface ResourceInfo Failed!");
851 return eStatus;
852 }
853
854 auto pInputData = (int8_t*)MOS_AllocMemory(surfInfo.dwSize);
855 if (pInputData == nullptr)
856 {
857 MOS_SafeFreeMemory(data);
858 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid (nullptr) Pointer from MOS_AllocMemory!");
859 return MOS_STATUS_NULL_POINTER;
860 }
861
862 MOS_SecureMemcpy(pInputData, surfInfo.dwSize, pInputDataGfx, surfInfo.dwSize);
863
864 MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams = {};
865
866 for (uint32_t h = 0; h < streamInHeight; h++)
867 {
868 for (uint32_t w = 0; w < streamInWidth; w++)
869 {
870 //Calculate X Y Offset for the zig zag scan with in each 64x64 LCU
871 //dwOffset gives the 64 LCU row
872 // uint32_t Offset = StreamInWidth * (h/2) * 2;
873 // uint32_t YOffset = (h % 2) * 2;
874 // uint32_t XOffset = 2 * (w/2 * 2) + w % 2;
875
876 // (pData + (Offset + XOffset + YOffset))->DW7.QpEnable = 0xf;
877 // (pData + (Offset + XOffset + YOffset))->DW14.ForceQp_0 = ForceQp;
878 // (pData + (Offset + XOffset + YOffset))->DW14.ForceQp_1 = ForceQp;
879 // (pData + (Offset + XOffset + YOffset))->DW14.ForceQp_2 = ForceQp;
880 // (pData + (Offset + XOffset + YOffset))->DW14.ForceQp_3 = ForceQp;
881 streaminDataParams.setQpRoiCtrl = true;
882
883 streaminDataParams.forceQp[0] = (int8_t) ( pInputData[(h * 2) * m_encodeParams.psMbQpDataSurface->dwPitch + (w * 2)]);
884 streaminDataParams.forceQp[1] = (int8_t) ( pInputData[(h * 2) * m_encodeParams.psMbQpDataSurface->dwPitch + (w * 2 + 1)]);
885 streaminDataParams.forceQp[2] = (int8_t) ( pInputData[(h * 2 + 1) * m_encodeParams.psMbQpDataSurface->dwPitch + (w * 2)]);
886 streaminDataParams.forceQp[3] = (int8_t) ( pInputData[(h * 2 + 1) * m_encodeParams.psMbQpDataSurface->dwPitch + (w * 2 + 1)]);
887
888 SetStreaminDataPerRegion(streamInWidth, h, h+1, w, w+1, &streaminDataParams, dataBase);
889
890 }
891 }
892
893 MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
894 streaminDataParams.maxTuSize = 3; //Maximum TU Size allowed, restriction to be set to 3
895 streaminDataParams.maxCuSize = 3;
896 switch (m_hevcSeqParams->TargetUsage)
897 {
898 case 1:
899 case 4:
900 streaminDataParams.numMergeCandidateCu64x64 = 4;
901 streaminDataParams.numMergeCandidateCu32x32 = 3;
902 streaminDataParams.numMergeCandidateCu16x16 = 2;
903 streaminDataParams.numMergeCandidateCu8x8 = 1;
904 streaminDataParams.numImePredictors = m_imgStateImePredictors;
905 break;
906 case 7:
907 streaminDataParams.numMergeCandidateCu64x64 = 2;
908 streaminDataParams.numMergeCandidateCu32x32 = 2;
909 streaminDataParams.numMergeCandidateCu16x16 = 2;
910 streaminDataParams.numMergeCandidateCu8x8 = 0;
911 streaminDataParams.numImePredictors = 4;
912 break;
913 }
914
915 for (auto i = 0; i < streamInNumCUs; i++)
916 {
917 SetStreaminDataPerLcu(&streaminDataParams, dataBase + (i * 64));
918 }
919
920 MOS_SecureMemcpy(dataGfx, uiSize, dataBase, uiSize);
921 MOS_SafeFreeMemory(data);
922 MOS_SafeFreeMemory(pInputData);
923
924 m_osInterface->pfnUnlockResource(
925 m_osInterface,
926 &(m_encodeParams.psMbQpDataSurface->OsResource));
927 m_osInterface->pfnUnlockResource(
928 m_osInterface,
929 streamIn);
930 return eStatus;
931 }
932
StreaminSetDirtyRectRegion(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,uint8_t maxcu,void * streaminData)933 void CodechalVdencHevcState::StreaminSetDirtyRectRegion(
934 uint32_t streamInWidth,
935 uint32_t top,
936 uint32_t bottom,
937 uint32_t left,
938 uint32_t right,
939 uint8_t maxcu,
940 void* streaminData)
941 {
942 CODECHAL_ENCODE_FUNCTION_ENTER;
943
944 MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
945 MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
946 streaminDataParams.maxTuSize = 3;
947 streaminDataParams.maxCuSize = maxcu;
948 streaminDataParams.puTypeCtrl = 0;
949
950 switch (m_hevcSeqParams->TargetUsage)
951 {
952 case 1:
953 case 4:
954 streaminDataParams.numMergeCandidateCu64x64 = 4;
955 streaminDataParams.numMergeCandidateCu32x32 = 3;
956 streaminDataParams.numMergeCandidateCu16x16 = 2;
957 streaminDataParams.numMergeCandidateCu8x8 = 1;
958 streaminDataParams.numImePredictors = m_imgStateImePredictors;
959 break;
960 case 7:
961 streaminDataParams.numMergeCandidateCu64x64 = 2;
962 streaminDataParams.numMergeCandidateCu32x32 = 2;
963 streaminDataParams.numMergeCandidateCu16x16 = 2;
964 streaminDataParams.numMergeCandidateCu8x8 = 0;
965 streaminDataParams.numImePredictors = 4;
966 break;
967 }
968
969 SetStreaminDataPerRegion(streamInWidth, top, bottom, left, right, &streaminDataParams, streaminData);
970 }
971
SetStreaminDataPerRegion(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,void * streaminData)972 void CodechalVdencHevcState::SetStreaminDataPerRegion(
973 uint32_t streamInWidth,
974 uint32_t top,
975 uint32_t bottom,
976 uint32_t left,
977 uint32_t right,
978 PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,
979 void* streaminData)
980 {
981 uint8_t* data = (uint8_t*)streaminData;
982
983 for (auto y = top; y < bottom; y++)
984 {
985 for (auto x = left; x < right; x++)
986 {
987 //Calculate X Y for the zig zag scan
988 uint32_t offset = 0, xyOffset = 0;
989 StreaminZigZagToLinearMap(streamInWidth, x, y, &offset, &xyOffset);
990
991 SetStreaminDataPerLcu(streaminParams, data + (offset + xyOffset) * 64);
992 }
993 }
994 }
995
StreaminZigZagToLinearMap(uint32_t streamInWidth,uint32_t x,uint32_t y,uint32_t * offset,uint32_t * xyOffset)996 void CodechalVdencHevcState::StreaminZigZagToLinearMap(
997 uint32_t streamInWidth,
998 uint32_t x,
999 uint32_t y,
1000 uint32_t* offset,
1001 uint32_t* xyOffset)
1002 {
1003 CODECHAL_ENCODE_FUNCTION_ENTER;
1004
1005 *offset = streamInWidth * y;
1006 uint32_t yOffset = 0;
1007 uint32_t xOffset = 2 * x;
1008
1009 //Calculate X Y Offset for the zig zag scan with in each 64x64 LCU
1010 //dwOffset gives the 64 LCU row
1011 if (y % 2)
1012 {
1013 *offset = streamInWidth * (y - 1);
1014 yOffset = 2;
1015 }
1016
1017 if (x % 2)
1018 {
1019 xOffset = (2 * x) - 1;
1020 }
1021
1022 *xyOffset = xOffset + yOffset;
1023 }
1024
StreaminSetBorderNon64AlignStaticRegion(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,void * streaminData)1025 void CodechalVdencHevcState::StreaminSetBorderNon64AlignStaticRegion(
1026 uint32_t streamInWidth,
1027 uint32_t top,
1028 uint32_t bottom,
1029 uint32_t left,
1030 uint32_t right,
1031 void* streaminData)
1032 {
1033 CODECHAL_ENCODE_FUNCTION_ENTER;
1034
1035 MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
1036 MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
1037 streaminDataParams.maxTuSize = 3;
1038 streaminDataParams.maxCuSize = 2;
1039 streaminDataParams.numMergeCandidateCu64x64 = 0; // MergeCand setting for Force MV
1040 streaminDataParams.numMergeCandidateCu32x32 = 1; // this is always set to 1
1041 streaminDataParams.numMergeCandidateCu16x16 = 0;
1042 streaminDataParams.numMergeCandidateCu8x8 = 0;
1043 streaminDataParams.numImePredictors = 0;
1044 streaminDataParams.puTypeCtrl = 0xff; //Force MV
1045
1046 SetStreaminDataPerRegion(streamInWidth, top, bottom, left, right, &streaminDataParams, streaminData);
1047 }
1048
SetupDirtyRectStreamIn(PMOS_RESOURCE streamIn)1049 MOS_STATUS CodechalVdencHevcState::SetupDirtyRectStreamIn(PMOS_RESOURCE streamIn)
1050 {
1051 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1052
1053 CODECHAL_ENCODE_FUNCTION_ENTER;
1054
1055 CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
1056
1057 MOS_LOCK_PARAMS lockFlags;
1058 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1059 lockFlags.WriteOnly = true;
1060
1061 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(
1062 m_osInterface,
1063 streamIn,
1064 &lockFlags);
1065 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1066
1067 uint32_t streamInWidth = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32);
1068 uint32_t streamInHeight = (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32);
1069 int32_t streamInNumCUs = streamInWidth * streamInHeight;
1070
1071 MOS_ZeroMemory(data, streamInNumCUs * 64);
1072
1073 MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
1074 MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
1075 streaminDataParams.maxTuSize = 3;
1076 streaminDataParams.maxCuSize = 3;
1077 streaminDataParams.numImePredictors = 0;
1078 streaminDataParams.puTypeCtrl = 0xff; //Force MV
1079 streaminDataParams.numMergeCandidateCu64x64 = 1; // MergeCand setting for Force MV
1080 streaminDataParams.numMergeCandidateCu32x32 = 0; // this is always set to 1
1081 streaminDataParams.numMergeCandidateCu16x16 = 0;
1082 streaminDataParams.numMergeCandidateCu8x8 = 0;
1083
1084 for (auto i = 0; i < streamInNumCUs; i++)
1085 {
1086 SetStreaminDataPerLcu(&streaminDataParams, data + (i * 64));
1087 }
1088
1089 uint32_t streamInWidthNo64Align = (MOS_ALIGN_CEIL(m_frameWidth, 32) / 32);
1090 uint32_t streamInHeightNo64Align = (MOS_ALIGN_CEIL(m_frameHeight, 32) / 32);
1091
1092 bool bActualWidth32Align = (m_frameWidth % 32) == 0;
1093 bool bActualHeight32Align = (m_frameHeight % 32) == 0;
1094
1095 // Set the static region when the width is not 64 CU aligned.
1096 if (streamInWidthNo64Align != streamInWidth || !bActualWidth32Align)
1097 {
1098 auto border_top = 0;
1099 auto border_bottom = streamInHeight;
1100 auto border_left = streamInWidthNo64Align - 1;
1101 auto border_right = streamInWidth;
1102
1103 if (!bActualWidth32Align)
1104 {
1105 StreaminSetDirtyRectRegion(streamInWidth, border_top, border_bottom, border_left, border_right, 3, data);
1106 if (streamInWidthNo64Align == streamInWidth)
1107 {
1108 StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left-1, border_right-1, data);
1109 }
1110 }
1111 else
1112 {
1113 StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left, border_right, data);
1114 }
1115 }
1116
1117 // Set the static region when the height is not 64 CU aligned.
1118 if (streamInHeightNo64Align != streamInHeight || !bActualHeight32Align)
1119 {
1120 auto border_top = streamInHeightNo64Align - 1;
1121 auto border_bottom = streamInHeight;
1122 auto border_left = 0;
1123 auto border_right = streamInWidth;
1124
1125 if (!bActualHeight32Align)
1126 {
1127 StreaminSetDirtyRectRegion(streamInWidth, border_top, border_bottom, border_left, border_right, 3, data);
1128 if (streamInHeightNo64Align == streamInHeight)
1129 {
1130 StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top - 1, border_bottom - 1, border_left, border_right, data);
1131 }
1132 }
1133 else
1134 {
1135 StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left, border_right, data);
1136 }
1137 }
1138
1139 for (int i = m_hevcPicParams->NumDirtyRects - 1; i >= 0; i--)
1140 {
1141 //Check if the region is with in the borders
1142 uint16_t top = (uint16_t)CodecHal_Clip3(0, (streamInHeight - 1), m_hevcPicParams->pDirtyRect[i].Top);
1143 uint16_t bottom = (uint16_t)CodecHal_Clip3(0, (streamInHeight - 1), m_hevcPicParams->pDirtyRect[i].Bottom) + 1;
1144 uint16_t left = (uint16_t)CodecHal_Clip3(0, (streamInWidth - 1), m_hevcPicParams->pDirtyRect[i].Left);
1145 uint16_t right = (uint16_t)CodecHal_Clip3(0, (streamInWidth - 1), m_hevcPicParams->pDirtyRect[i].Right) + 1;
1146
1147 auto dirtyrect_top = top;
1148 auto dirtyrect_bottom = bottom;
1149 auto dirtyrect_left = left;
1150 auto dirtyrect_right = right;
1151
1152 //If the border of the DirtyRect is not aligned with 64 CU, different setting in the border
1153 if (top % 2 != 0)
1154 {
1155 auto border_top = top;
1156 auto border_bottom = top + 1;
1157 auto border_left = left;
1158 auto border_right = right;
1159
1160 StreaminSetDirtyRectRegion(streamInWidth, border_top, border_bottom, border_left, border_right, 2, data);
1161
1162 border_top = top - 1;
1163 border_bottom = top;
1164 border_left = (left % 2 != 0) ? left - 1 : left;
1165 border_right = (right % 2 != 0) ? right + 1 : right;
1166
1167 StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left, border_right, data);
1168
1169 dirtyrect_top = top + 1;
1170 }
1171
1172 if (bottom % 2 != 0)
1173 {
1174 auto border_top = bottom - 1;
1175 auto border_bottom = bottom;
1176 auto border_left = left;
1177 auto border_right = right;
1178
1179 StreaminSetDirtyRectRegion(streamInWidth, border_top, border_bottom, border_left, border_right, 2, data);
1180
1181 border_top = bottom;
1182 border_bottom = bottom + 1;
1183 border_left = (left % 2 != 0) ? left - 1 : left;
1184 border_right = (right % 2 != 0) ? right + 1 : right;
1185
1186 StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left, border_right, data);
1187
1188 dirtyrect_bottom = bottom - 1;
1189 }
1190
1191 if (left % 2 != 0)
1192 {
1193 auto border_top = top;
1194 auto border_bottom = bottom;
1195 auto border_left = left;
1196 auto border_right = left + 1;
1197
1198 StreaminSetDirtyRectRegion(streamInWidth, border_top, border_bottom, border_left, border_right, 2, data);
1199
1200 border_top = (top % 2 != 0) ? top - 1 : top;
1201 border_bottom = (bottom % 2 != 0) ? bottom + 1 : bottom;
1202 border_left = left - 1;
1203 border_right = left;
1204
1205 StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left, border_right, data);
1206
1207 dirtyrect_left = left + 1;
1208 }
1209
1210 if (right % 2 != 0)
1211 {
1212 auto border_top = top;
1213 auto border_bottom = bottom;
1214 auto border_left = right - 1;
1215 auto border_right = right;
1216
1217 StreaminSetDirtyRectRegion(streamInWidth, border_top, border_bottom, border_left, border_right, 2, data);
1218
1219 border_top = (top % 2 != 0) ? top - 1 : top;
1220 border_bottom = (bottom % 2 != 0) ? bottom + 1 : bottom;
1221 border_left = right;
1222 border_right = right + 1;
1223
1224 StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left, border_right, data);
1225 dirtyrect_right = right - 1;
1226 }
1227
1228 StreaminSetDirtyRectRegion(streamInWidth, dirtyrect_top, dirtyrect_bottom, dirtyrect_left, dirtyrect_right, 3, data);
1229 }
1230
1231 m_osInterface->pfnUnlockResource(
1232 m_osInterface,
1233 streamIn);
1234
1235 return eStatus;
1236 }
1237
SetRegionsHuCBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)1238 MOS_STATUS CodechalVdencHevcState::SetRegionsHuCBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
1239 {
1240 CODECHAL_ENCODE_FUNCTION_ENTER;
1241
1242 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1243
1244 int32_t currentPass = GetCurrentPass();
1245 if (currentPass < 0)
1246 {
1247 eStatus = MOS_STATUS_INVALID_PARAMETER;
1248 return eStatus;
1249 }
1250 // Add Virtual addr
1251 MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
1252 virtualAddrParams->regionParams[0].presRegion = &m_vdencBrcHistoryBuffer; // Region 0 - History Buffer (Input/Output)
1253 virtualAddrParams->regionParams[0].isWritable = true;
1254 virtualAddrParams->regionParams[1].presRegion =
1255 (MOS_RESOURCE*)m_allocator->GetResource(m_standard, vdencStats); // Region 1 VDEnc Statistics Buffer (Input) - VDENC_HEVC_VP9_FRAME_BASED_STATISTICS_STREAMOUT
1256 virtualAddrParams->regionParams[2].presRegion = &m_resFrameStatStreamOutBuffer; // Region 2 PAK Statistics Buffer (Input) - MFX_PAK_FRAME_STATISTICS
1257 virtualAddrParams->regionParams[3].presRegion = &m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass]; // Region 3 - Input SLB Buffer (Input)
1258 virtualAddrParams->regionParams[4].presRegion = &m_vdencBrcConstDataBuffer[m_currRecycledBufIdx]; // Region 4 - Constant Data (Input)
1259 virtualAddrParams->regionParams[5].presRegion = &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource; // Region 5 - Output SLB Buffer (Output)
1260 virtualAddrParams->regionParams[5].isWritable = true;
1261 virtualAddrParams->regionParams[6].presRegion = &m_dataFromPicsBuffer; // Region 6 - Data Buffer of Current and Reference Pictures for Weighted Prediction (Input/Output)
1262 virtualAddrParams->regionParams[6].isWritable = true;
1263 virtualAddrParams->regionParams[7].presRegion = &m_resLcuBaseAddressBuffer; // Region 7 Slice Stat Streamout (Input)
1264 virtualAddrParams->regionParams[8].presRegion =
1265 (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakInfo); // Region 8 - PAK Information (Input)
1266 virtualAddrParams->regionParams[9].presRegion = &m_resVdencStreamInBuffer[m_currRecycledBufIdx]; // Region 9 - Streamin Buffer for ROI (Input)
1267 virtualAddrParams->regionParams[10].presRegion = &m_vdencDeltaQpBuffer[m_currRecycledBufIdx]; // Region 10 - Delta QP Buffer for ROI (Input)
1268 virtualAddrParams->regionParams[11].presRegion = &m_vdencOutputROIStreaminBuffer; // Region 11 - Streamin Buffer for ROI (Output)
1269 virtualAddrParams->regionParams[11].isWritable = true;
1270
1271 // region 15 always in clear
1272 virtualAddrParams->regionParams[15].presRegion = &m_vdencBrcDbgBuffer; // Region 15 - Debug Buffer (Output)
1273 virtualAddrParams->regionParams[15].isWritable = true;
1274
1275 return eStatus;
1276 }
1277
HuCBrcUpdate()1278 MOS_STATUS CodechalVdencHevcState::HuCBrcUpdate()
1279 {
1280 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1281
1282 CODECHAL_ENCODE_FUNCTION_ENTER;
1283
1284 MOS_COMMAND_BUFFER cmdBuffer;
1285 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
1286
1287 if (!m_singleTaskPhaseSupported || (m_firstTaskInPhase && !m_brcInit))
1288 {
1289 // Send command buffer header at the beginning (OS dependent)
1290 bool requestFrameTracking = m_singleTaskPhaseSupported ?
1291 m_firstTaskInPhase : 0;
1292 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
1293 }
1294
1295 int32_t currentPass = GetCurrentPass();
1296 if (currentPass < 0)
1297 {
1298 eStatus = MOS_STATUS_INVALID_PARAMETER;
1299 return eStatus;
1300 }
1301 CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructBatchBufferHuCBRC(&m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass]));
1302
1303 // load kernel from WOPCM into L2 storage RAM
1304 MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
1305 MOS_ZeroMemory(&imemParams, sizeof(imemParams));
1306
1307 if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW) // Low Delay BRC
1308 {
1309 imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcLowdelayKernelDescriptor;
1310 }
1311 else
1312 {
1313 imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcUpdateKernelDescriptor;
1314 }
1315
1316 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
1317
1318 // pipe mode select
1319 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
1320 pipeModeSelectParams.Mode = m_mode;
1321 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
1322
1323 // DMEM set
1324 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
1325
1326 MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
1327 MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
1328 dmemParams.presHucDataSource = &(m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][currentPass]);
1329 dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
1330 dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
1331
1332 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
1333
1334 // Set Const Data buffer
1335 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetConstDataHuCBrcUpdate());
1336
1337 // Add Virtual addr
1338 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCBrcUpdate(&m_virtualAddrParams));
1339 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &m_virtualAddrParams));
1340
1341 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcDummyStreamObject(&cmdBuffer));
1342
1343 // Store HUC_STATUS2 register bit 6 before HUC_Start command
1344 // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
1345 // (HUC_Start command with last start bit set).
1346 CODECHAL_DEBUG_TOOL(
1347 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
1348 )
1349
1350 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
1351
1352 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
1353
1354 // wait Huc completion (use HEVC bit for now)
1355 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
1356 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
1357 vdPipeFlushParams.Flags.bFlushHEVC = 1;
1358 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
1359 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
1360
1361 // Flush the engine to ensure memory written out
1362 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
1363 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
1364 flushDwParams.bVideoPipelineCacheInvalidate = true;
1365 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
1366
1367 // Write HUC_STATUS mask: DW1 (mask value)
1368 MHW_MI_STORE_DATA_PARAMS storeDataParams;
1369 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
1370 storeDataParams.pOsResource = &m_resPakMmioBuffer;
1371 storeDataParams.dwResourceOffset = sizeof(uint32_t);
1372 storeDataParams.dwValue = CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK;
1373 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
1374
1375 // store HUC_STATUS register: DW0 (actual value)
1376 CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum");
1377 auto mmioRegisters = m_hucInterface->GetMmioRegisters(m_vdboxIndex);
1378 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
1379 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
1380 storeRegParams.presStoreBuffer = &m_resPakMmioBuffer;
1381 storeRegParams.dwOffset = 0;
1382 storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
1383 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
1384
1385 // DW0 & DW1 will considered together for conditional batch buffer end cmd later
1386 if ((!m_singleTaskPhaseSupported) && (m_osInterface->bNoParsingAssistanceInKmd))
1387 {
1388 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
1389 }
1390
1391 // HuC Input
1392 CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(true));
1393
1394 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
1395
1396 if (!m_singleTaskPhaseSupported)
1397 {
1398 bool renderingFlags = m_videoContextUsesNullHw;
1399
1400 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN( m_debugInterface->DumpCmdBuffer(
1401 &cmdBuffer,
1402 CODECHAL_MEDIA_STATE_BRC_UPDATE,
1403 "ENC")));
1404
1405 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
1406 }
1407
1408 CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(false));
1409
1410 return eStatus;
1411 }
1412
HuCBrcDummyStreamObject(PMOS_COMMAND_BUFFER cmdBuffer)1413 MOS_STATUS CodechalVdencHevcState::HuCBrcDummyStreamObject(PMOS_COMMAND_BUFFER cmdBuffer)
1414 {
1415 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1416
1417 CODECHAL_ENCODE_FUNCTION_ENTER;
1418
1419 // pass dummy buffer by Ind Obj Addr command
1420 MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjParams;
1421 MOS_ZeroMemory(&indObjParams, sizeof(indObjParams));
1422 indObjParams.presDataBuffer = &m_vdencBrcDbgBuffer;
1423 indObjParams.dwDataSize = 1;
1424 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucIndObjBaseAddrStateCmd(cmdBuffer, &indObjParams));
1425
1426 MHW_VDBOX_HUC_STREAM_OBJ_PARAMS streamObjParams;
1427 MOS_ZeroMemory(&streamObjParams, sizeof(streamObjParams));
1428 streamObjParams.dwIndStreamInLength = 1;
1429 streamObjParams.bHucProcessing = true;
1430 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStreamObjectCmd(cmdBuffer, &streamObjParams));
1431
1432 return eStatus;
1433 }
1434
SetVdencPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS & pipeModeSelectParams)1435 void CodechalVdencHevcState::SetVdencPipeModeSelectParams(
1436 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS& pipeModeSelectParams)
1437 {
1438 pipeModeSelectParams.ucVdencBitDepthMinus8 = m_hevcSeqParams->bit_depth_luma_minus8;
1439 pipeModeSelectParams.bPakThresholdCheckEnable = m_hevcSeqParams->SliceSizeControl;
1440 pipeModeSelectParams.ChromaType = m_hevcSeqParams->chroma_format_idc;
1441 pipeModeSelectParams.bTlbPrefetchEnable = true;
1442 pipeModeSelectParams.Format = m_rawSurfaceToPak->Format;
1443
1444 // can be enabled by reg key (disabled by default)
1445 pipeModeSelectParams.bVdencPakObjCmdStreamOutEnable = m_vdencPakObjCmdStreamOutEnabled;
1446
1447 int32_t currentPass = GetCurrentPass();
1448
1449 // needs to be enabled for 1st pass in multi-pass case
1450 // This bit is ignored if PAK only second pass is enabled.
1451 if ((currentPass == 0) && (currentPass != m_numPasses))
1452 {
1453 pipeModeSelectParams.bVdencPakObjCmdStreamOutEnable = true;
1454 }
1455 }
1456
SetVdencSurfaceStateParams(MHW_VDBOX_SURFACE_PARAMS & srcSurfaceParams,MHW_VDBOX_SURFACE_PARAMS & reconSurfaceParams,MHW_VDBOX_SURFACE_PARAMS & ds8xSurfaceParams,MHW_VDBOX_SURFACE_PARAMS & ds4xSurfaceParams)1457 void CodechalVdencHevcState::SetVdencSurfaceStateParams(
1458 MHW_VDBOX_SURFACE_PARAMS& srcSurfaceParams,
1459 MHW_VDBOX_SURFACE_PARAMS& reconSurfaceParams,
1460 MHW_VDBOX_SURFACE_PARAMS& ds8xSurfaceParams,
1461 MHW_VDBOX_SURFACE_PARAMS& ds4xSurfaceParams)
1462 {
1463 // VDENC_SRC_SURFACE_STATE parameters
1464 srcSurfaceParams.dwActualWidth = ((m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3));
1465 srcSurfaceParams.dwActualHeight = ((m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3));
1466 srcSurfaceParams.bColorSpaceSelection = (m_hevcSeqParams->InputColorSpace == ECOLORSPACE_P709) ? 1 : 0;
1467
1468 // VDENC_REF_SURFACE_STATE parameters
1469 reconSurfaceParams.dwActualWidth = srcSurfaceParams.dwActualWidth;
1470 reconSurfaceParams.dwActualHeight = srcSurfaceParams.dwActualHeight;
1471 reconSurfaceParams.dwReconSurfHeight = m_rawSurfaceToPak->dwHeight;
1472
1473 // VDENC_DS_REF_SURFACE_STATE parameters
1474 MOS_ZeroMemory(&ds8xSurfaceParams, sizeof(ds8xSurfaceParams));
1475 ds8xSurfaceParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
1476 ds8xSurfaceParams.psSurface = m_trackedBuf->Get8xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
1477 ds8xSurfaceParams.ucSurfaceStateId = CODECHAL_MFX_DSRECON_SURFACE_ID;
1478 ds8xSurfaceParams.dwActualWidth = ds8xSurfaceParams.psSurface->dwWidth;
1479 ds8xSurfaceParams.dwActualHeight = ds8xSurfaceParams.psSurface->dwHeight;
1480
1481 MOS_ZeroMemory(&ds4xSurfaceParams, sizeof(ds4xSurfaceParams));
1482 ds4xSurfaceParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
1483 ds4xSurfaceParams.psSurface = m_trackedBuf->Get4xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
1484 ds4xSurfaceParams.ucSurfaceStateId = CODECHAL_MFX_DSRECON_SURFACE_ID;
1485 ds4xSurfaceParams.dwActualWidth = ds4xSurfaceParams.psSurface->dwWidth;
1486 ds4xSurfaceParams.dwActualHeight = ds4xSurfaceParams.psSurface->dwHeight;
1487 }
1488
SetVdencPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams)1489 void CodechalVdencHevcState::SetVdencPipeBufAddrParams(
1490 MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams)
1491 {
1492 pipeBufAddrParams = {};
1493
1494 //set MMC flag
1495 if (m_mmcState->IsMmcEnabled())
1496 {
1497 pipeBufAddrParams.bMmcEnabled = true;
1498 }
1499
1500 pipeBufAddrParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
1501 pipeBufAddrParams.psRawSurface = m_rawSurfaceToPak;
1502 pipeBufAddrParams.ps4xDsSurface = m_trackedBuf->Get4xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
1503 pipeBufAddrParams.ps8xDsSurface = m_trackedBuf->Get8xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
1504 pipeBufAddrParams.presVdencStreamOutBuffer = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, vdencStats);
1505 pipeBufAddrParams.dwVdencStatsStreamOutOffset = 0;
1506 pipeBufAddrParams.presVdencIntraRowStoreScratchBuffer = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, vdencIntraRowStoreScratch);
1507 pipeBufAddrParams.presVdencPakObjCmdStreamOutBuffer = m_resVdencPakObjCmdStreamOutBuffer = &m_resMbCodeSurface;
1508 pipeBufAddrParams.dwNumRefIdxL0ActiveMinus1 = m_hevcSliceParams->num_ref_idx_l0_active_minus1;
1509 pipeBufAddrParams.dwNumRefIdxL1ActiveMinus1 = m_hevcSliceParams->num_ref_idx_l1_active_minus1;
1510
1511 if (m_vdencStreamInEnabled)
1512 {
1513 bool useBrcInDeltaQpMap = m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR ||
1514 m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR ||
1515 m_hevcSeqParams->RateControlMethod == RATECONTROL_QVBR;
1516 if (m_vdencHucUsed && ((m_hevcPicParams->NumROI && !m_vdencNativeROIEnabled) ||
1517 (useBrcInDeltaQpMap && m_encodeParams.bMbQpDataEnabled)))
1518 {
1519 pipeBufAddrParams.presVdencStreamInBuffer = &m_vdencOutputROIStreaminBuffer;
1520 }
1521 else if (m_lookaheadPass)
1522 {
1523 pipeBufAddrParams.presVdencStreamInBuffer = &m_resVdencStreamInBuffer[0];
1524 }
1525 else
1526 {
1527 pipeBufAddrParams.presVdencStreamInBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
1528 }
1529 }
1530
1531 PCODEC_PICTURE l0RefFrameList = m_hevcSliceParams->RefPicList[LIST_0];
1532 for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++)
1533 {
1534 CODEC_PICTURE refPic = l0RefFrameList[refIdx];
1535
1536 if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
1537 {
1538 // L0 references
1539 uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
1540 pipeBufAddrParams.presVdencReferences[refIdx] = &m_refList[refPicIdx]->sRefReconBuffer.OsResource;
1541
1542 // 4x/8x DS surface for VDEnc
1543 uint8_t scaledIdx = m_refList[refPicIdx]->ucScalingIdx;
1544 pipeBufAddrParams.presVdenc4xDsSurface[refIdx] = &(m_trackedBuf->Get4xDsReconSurface(scaledIdx))->OsResource;
1545 pipeBufAddrParams.presVdenc8xDsSurface[refIdx] = &(m_trackedBuf->Get8xDsReconSurface(scaledIdx))->OsResource;
1546 }
1547 }
1548
1549 if (!m_lowDelay)
1550 {
1551 PCODEC_PICTURE l1RefFrameList = m_hevcSliceParams->RefPicList[LIST_1];
1552 for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l1_active_minus1; refIdx++)
1553 {
1554 CODEC_PICTURE refPic = l1RefFrameList[refIdx];
1555
1556 if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
1557 {
1558 // L1 references
1559 uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
1560 pipeBufAddrParams.presVdencReferences[refIdx + m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1] =
1561 &m_refList[refPicIdx]->sRefReconBuffer.OsResource;
1562
1563 // 4x/8x DS surface for VDEnc
1564 uint8_t scaledIdx = m_refList[refPicIdx]->ucScalingIdx;
1565 pipeBufAddrParams.presVdenc4xDsSurface[refIdx + m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1] =
1566 &(m_trackedBuf->Get4xDsReconSurface(scaledIdx))->OsResource;
1567 pipeBufAddrParams.presVdenc8xDsSurface[refIdx + m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1] =
1568 &(m_trackedBuf->Get8xDsReconSurface(scaledIdx))->OsResource;
1569 }
1570 }
1571 }
1572
1573 uint8_t idxForTempMVP = 0xFF;
1574
1575 if (m_hevcPicParams->CollocatedRefPicIndex != 0xFF && m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC)
1576 {
1577 uint8_t frameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx;
1578 idxForTempMVP = m_refList[frameIdx]->ucScalingIdx;
1579 }
1580
1581 if (idxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag)
1582 {
1583 // Temporal reference MV index is invalid and so disable the temporal MVP
1584 m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
1585 }
1586 else
1587 {
1588 pipeBufAddrParams.presColMvTempBuffer[0] = m_trackedBuf->GetMvTemporalBuffer(idxForTempMVP);
1589 }
1590
1591 // Disable temporal MVP for LDB frames which only refer to I frame
1592 if (m_pictureCodingType == I_TYPE)
1593 {
1594 m_currGopIFramePOC = m_hevcPicParams->CurrPicOrderCnt;
1595 }
1596
1597 if (m_hevcSeqParams->sps_temporal_mvp_enable_flag == 0 && m_hevcSliceParams->slice_temporal_mvp_enable_flag == 1)
1598 {
1599 CODECHAL_ENCODE_NORMALMESSAGE("Attention: temporal MVP flag is inconsistent between seq and slice.");
1600 m_hevcSliceParams->slice_temporal_mvp_enable_flag = 0;
1601 }
1602
1603 if (!m_hevcPicParams->pps_curr_pic_ref_enabled_flag && m_lowDelay && m_hevcSliceParams->num_ref_idx_l0_active_minus1 == 0
1604 && m_currGopIFramePOC != -1 && m_hevcSliceParams->slice_temporal_mvp_enable_flag != 0)
1605 {
1606 auto idx = m_picIdx[m_hevcSliceParams->RefPicList[0][0].FrameIdx].ucPicIdx;
1607
1608 if (m_refList[idx]->iFieldOrderCnt[0] == m_currGopIFramePOC)
1609 {
1610 m_hevcSliceParams->slice_temporal_mvp_enable_flag = 0;
1611 }
1612 }
1613
1614 }
1615
SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceStateParams)1616 void CodechalVdencHevcState::SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE& sliceStateParams)
1617 {
1618 CODECHAL_ENCODE_FUNCTION_ENTER;
1619
1620 CodechalEncodeHevcBase::SetHcpSliceStateCommonParams(sliceStateParams);
1621
1622 sliceStateParams.bVdencInUse = true;
1623 sliceStateParams.bVdencHucInUse = m_hevcVdencAcqpEnabled || m_brcEnabled;
1624 sliceStateParams.bWeightedPredInUse = m_hevcVdencWeightedPredEnabled;
1625 sliceStateParams.pVdencBatchBuffer = &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx];
1626
1627 // This bit disables Top intra Reference pixel fetch in VDENC mode.
1628 // In PAK only second pass, this bit should be set to one.
1629 // "IntraRefFetchDisable" in HCP SLICE STATE should be set to 0 in first pass and 1 in subsequent passes.
1630 // For dynamic slice, 2nd pass is still VDEnc + PAK pass, not PAK only pass.
1631 sliceStateParams.bIntraRefFetchDisable = m_pakOnlyPass;
1632 }
1633
AddHcpPakInsertSliceHeader(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_BATCH_BUFFER batchBuffer,PMHW_VDBOX_HEVC_SLICE_STATE params)1634 MOS_STATUS CodechalVdencHevcState::AddHcpPakInsertSliceHeader(
1635 PMOS_COMMAND_BUFFER cmdBuffer,
1636 PMHW_BATCH_BUFFER batchBuffer,
1637 PMHW_VDBOX_HEVC_SLICE_STATE params)
1638 {
1639 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1640
1641 CODECHAL_ENCODE_FUNCTION_ENTER;
1642
1643 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1644 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pBsBuffer);
1645 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcSliceParams);
1646
1647 if (cmdBuffer == nullptr && batchBuffer == nullptr)
1648 {
1649 CODECHAL_ENCODE_ASSERTMESSAGE("There was no valid buffer to add the HW command to.");
1650 return MOS_STATUS_NULL_POINTER;
1651 }
1652
1653 // Insert slice header
1654 MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
1655 MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
1656 pakInsertObjectParams.bLastHeader = true;
1657 pakInsertObjectParams.bEmulationByteBitsInsert = true;
1658 pakInsertObjectParams.pBatchBufferForPakSlices = batchBuffer;
1659
1660 // App does the slice header packing, set the skip count passed by the app
1661 pakInsertObjectParams.uiSkipEmulationCheckCount = params->uiSkipEmulationCheckCount;
1662 pakInsertObjectParams.pBsBuffer = params->pBsBuffer;
1663 pakInsertObjectParams.dwBitSize = params->dwLength;
1664 pakInsertObjectParams.dwOffset = params->dwOffset;
1665 pakInsertObjectParams.bVdencInUse = params->bVdencInUse;
1666
1667 // For HEVC VDEnc Dynamic Slice
1668 PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = params->pEncodeHevcSliceParams;
1669 if (m_hevcSeqParams->SliceSizeControl)
1670 {
1671 pakInsertObjectParams.bLastHeader = false;
1672 pakInsertObjectParams.bEmulationByteBitsInsert = false;
1673 pakInsertObjectParams.dwBitSize = hevcSlcParams->BitLengthSliceHeaderStartingPortion;
1674 pakInsertObjectParams.bResetBitstreamStartingPos = true;
1675 }
1676
1677 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(
1678 cmdBuffer,
1679 &pakInsertObjectParams));
1680
1681 if (m_hevcSeqParams->SliceSizeControl)
1682 {
1683 // Send HCP_PAK_INSERT_OBJ command. For dynamic slice, we are skipping the beginning part of slice header.
1684 pakInsertObjectParams.bLastHeader = true;
1685 pakInsertObjectParams.dwBitSize = params->dwLength - hevcSlcParams->BitLengthSliceHeaderStartingPortion;
1686 pakInsertObjectParams.dwOffset += ((hevcSlcParams->BitLengthSliceHeaderStartingPortion + 7) / 8); // Skips the first 5 bytes which is Start Code + Nal Unit Header
1687 pakInsertObjectParams.bResetBitstreamStartingPos = true;
1688 pakInsertObjectParams.bVdencInUse = params->bVdencInUse;
1689 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(
1690 cmdBuffer,
1691 &pakInsertObjectParams));
1692 }
1693
1694 return eStatus;
1695 }
1696
AddHcpWeightOffsetStateCmd(PMOS_COMMAND_BUFFER cmdBuffer,PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams)1697 MOS_STATUS CodechalVdencHevcState::AddHcpWeightOffsetStateCmd(
1698 PMOS_COMMAND_BUFFER cmdBuffer,
1699 PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams)
1700 {
1701 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1702
1703 CODECHAL_ENCODE_FUNCTION_ENTER;
1704
1705 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
1706 CODECHAL_ENCODE_CHK_NULL_RETURN(hevcSlcParams);
1707
1708 MHW_VDBOX_HEVC_WEIGHTOFFSET_PARAMS hcpWeightOffsetParams;
1709 MOS_ZeroMemory(&hcpWeightOffsetParams, sizeof(hcpWeightOffsetParams));
1710
1711 for (auto k = 0; k < 2; k++) // k=0: LIST_0, k=1: LIST_1
1712 {
1713 // Luma, Chroma offset
1714 for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
1715 {
1716 hcpWeightOffsetParams.LumaOffsets[k][i] = (int16_t)hevcSlcParams->luma_offset[k][i];
1717 // Cb, Cr
1718 for (auto j = 0; j < 2; j++)
1719 {
1720 hcpWeightOffsetParams.ChromaOffsets[k][i][j] = (int16_t)hevcSlcParams->chroma_offset[k][i][j];
1721 }
1722 }
1723
1724 // Luma Weight
1725 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
1726 &hcpWeightOffsetParams.LumaWeights[k],
1727 sizeof(hcpWeightOffsetParams.LumaWeights[k]),
1728 &hevcSlcParams->delta_luma_weight[k],
1729 sizeof(hevcSlcParams->delta_luma_weight[k])));
1730
1731 // Chroma Weight
1732 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
1733 &hcpWeightOffsetParams.ChromaWeights[k],
1734 sizeof(hcpWeightOffsetParams.ChromaWeights[k]),
1735 &hevcSlcParams->delta_chroma_weight[k],
1736 sizeof(hevcSlcParams->delta_chroma_weight[k])));
1737 }
1738
1739 if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_P_SLICE || hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
1740 {
1741 hcpWeightOffsetParams.ucList = LIST_0;
1742 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpWeightOffsetStateCmd(cmdBuffer, nullptr, &hcpWeightOffsetParams));
1743 }
1744
1745 if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
1746 {
1747 hcpWeightOffsetParams.ucList = LIST_1;
1748 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpWeightOffsetStateCmd(cmdBuffer, nullptr, &hcpWeightOffsetParams));
1749 }
1750
1751 return eStatus;
1752 }
1753
AddVdencWeightOffsetStateCmd(PMOS_COMMAND_BUFFER cmdBuffer,PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams)1754 MOS_STATUS CodechalVdencHevcState::AddVdencWeightOffsetStateCmd(
1755 PMOS_COMMAND_BUFFER cmdBuffer,
1756 PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams)
1757 {
1758 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1759
1760 CODECHAL_ENCODE_FUNCTION_ENTER;
1761
1762 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
1763 CODECHAL_ENCODE_CHK_NULL_RETURN(hevcSlcParams);
1764
1765 MHW_VDBOX_VDENC_WEIGHT_OFFSET_PARAMS vdencWeightOffsetParams;
1766 MOS_ZeroMemory(&vdencWeightOffsetParams, sizeof(vdencWeightOffsetParams));
1767
1768 vdencWeightOffsetParams.bWeightedPredEnabled = m_hevcVdencWeightedPredEnabled;
1769
1770 if (vdencWeightOffsetParams.bWeightedPredEnabled)
1771 {
1772 vdencWeightOffsetParams.dwDenom = 1 << (hevcSlcParams->luma_log2_weight_denom);
1773
1774 // Luma offset
1775 for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
1776 {
1777 vdencWeightOffsetParams.LumaOffsets[0][i] = (int16_t)hevcSlcParams->luma_offset[0][i];
1778 vdencWeightOffsetParams.LumaOffsets[1][i] = (int16_t)hevcSlcParams->luma_offset[1][i];
1779 }
1780
1781 // Luma Weight
1782 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(MOS_SecureMemcpy(
1783 &vdencWeightOffsetParams.LumaWeights[0],
1784 sizeof(vdencWeightOffsetParams.LumaWeights[0]),
1785 &hevcSlcParams->delta_luma_weight[0],
1786 sizeof(hevcSlcParams->delta_luma_weight[0])),
1787 "Failed to copy luma weight 0 memory.");
1788
1789 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(MOS_SecureMemcpy(
1790 &vdencWeightOffsetParams.LumaWeights[1],
1791 sizeof(vdencWeightOffsetParams.LumaWeights[1]),
1792 &hevcSlcParams->delta_luma_weight[1],
1793 sizeof(hevcSlcParams->delta_luma_weight[1])),
1794 "Failed to copy luma weight 1 memory.");
1795 }
1796
1797 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWeightsOffsetsStateCmd(
1798 cmdBuffer,
1799 nullptr,
1800 &vdencWeightOffsetParams));
1801
1802 return eStatus;
1803 }
1804
AddVdencWalkerStateCmd(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_VDBOX_HEVC_SLICE_STATE params)1805 MOS_STATUS CodechalVdencHevcState::AddVdencWalkerStateCmd(
1806 PMOS_COMMAND_BUFFER cmdBuffer,
1807 PMHW_VDBOX_HEVC_SLICE_STATE params)
1808 {
1809 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1810
1811 CODECHAL_ENCODE_FUNCTION_ENTER;
1812
1813 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
1814 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1815
1816 MHW_VDBOX_VDENC_WALKER_STATE_PARAMS vdencWalkerStateParams;
1817 vdencWalkerStateParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
1818 vdencWalkerStateParams.pHevcEncSeqParams = params->pEncodeHevcSeqParams;
1819 vdencWalkerStateParams.pHevcEncPicParams = params->pEncodeHevcPicParams;
1820 vdencWalkerStateParams.pEncodeHevcSliceParams = params->pEncodeHevcSliceParams;
1821 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWalkerStateCmd(cmdBuffer, &vdencWalkerStateParams));
1822
1823 return eStatus;
1824 }
1825
ReadBrcPakStats(PMOS_COMMAND_BUFFER cmdBuffer)1826 MOS_STATUS CodechalVdencHevcState::ReadBrcPakStats(
1827 PMOS_COMMAND_BUFFER cmdBuffer)
1828 {
1829 CODECHAL_ENCODE_FUNCTION_ENTER;
1830
1831 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1832
1833 uint32_t offset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
1834 m_encodeStatusBuf.dwNumPassesOffset + // Num passes offset
1835 sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource
1836
1837 EncodeReadBrcPakStatsParams readBrcPakStatsParams;
1838 readBrcPakStatsParams.pHwInterface = m_hwInterface;
1839 readBrcPakStatsParams.presBrcPakStatisticBuffer = &m_vdencBrcBuffers.resBrcPakStatisticBuffer[m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite];
1840 readBrcPakStatsParams.presStatusBuffer = &m_encodeStatusBuf.resStatusBuffer;
1841 readBrcPakStatsParams.dwStatusBufNumPassesOffset = offset;
1842 readBrcPakStatsParams.ucPass = (uint8_t) GetCurrentPass();
1843 readBrcPakStatsParams.VideoContext = m_videoContext;
1844
1845 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStatistics(
1846 cmdBuffer,
1847 &readBrcPakStatsParams));
1848
1849 return eStatus;
1850 }
1851
StoreVdencStatistics(PMOS_COMMAND_BUFFER cmdBuffer)1852 MOS_STATUS CodechalVdencHevcState::StoreVdencStatistics(PMOS_COMMAND_BUFFER cmdBuffer)
1853 {
1854 CODECHAL_ENCODE_FUNCTION_ENTER;
1855
1856 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1857
1858 uint32_t offset = sizeof(CodechalVdencHevcLaStats) * m_currLaDataIdx;
1859 MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
1860 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS));
1861 miCpyMemMemParams.presSrc = m_resVdencStatsBuffer; // 8X8 Normalized intra CU count is in m_resVdencStatsBuffer DW1
1862 miCpyMemMemParams.dwSrcOffset = 4;
1863 miCpyMemMemParams.presDst = &m_vdencLaStatsBuffer;
1864 miCpyMemMemParams.dwDstOffset = offset + CODECHAL_OFFSETOF(CodechalVdencHevcLaStats, intraCuCount);
1865 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
1866
1867 return eStatus;
1868 }
1869
StoreLookaheadStatistics(PMOS_COMMAND_BUFFER cmdBuffer)1870 MOS_STATUS CodechalVdencHevcState::StoreLookaheadStatistics(PMOS_COMMAND_BUFFER cmdBuffer)
1871 {
1872 CODECHAL_ENCODE_FUNCTION_ENTER;
1873
1874 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1875
1876 if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()) \
1877 {
1878 CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum");
1879 eStatus = MOS_STATUS_INVALID_PARAMETER;
1880 return eStatus;
1881 }
1882
1883 auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
1884 CODECHAL_ENCODE_CHK_NULL_RETURN(mmioRegisters);
1885 uint32_t offset = sizeof(CodechalVdencHevcLaStats) * m_currLaDataIdx;
1886
1887 MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
1888 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
1889 miStoreRegMemParams.presStoreBuffer = &m_vdencLaStatsBuffer;
1890 miStoreRegMemParams.dwOffset = offset + CODECHAL_OFFSETOF(CodechalVdencHevcLaStats, frameByteCount);
1891 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset;
1892 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
1893
1894 // Calculate header size including LCU header
1895 uint32_t headerBitSize = 0;
1896 for (uint32_t i = 0; i < HEVC_MAX_NAL_UNIT_TYPE; i++)
1897 {
1898 headerBitSize += m_nalUnitParams[i]->uiSize * 8;
1899 }
1900 for (uint32_t i = 0; i < m_numSlices; i++)
1901 {
1902 headerBitSize += m_slcData[i].BitSize;
1903 }
1904
1905 // Store to headerBitCount in CodechalVdencHevcLaStats
1906 MHW_MI_STORE_DATA_PARAMS storeDataParams;
1907 storeDataParams.pOsResource = &m_vdencLaStatsBuffer;
1908 storeDataParams.dwResourceOffset = offset + CODECHAL_OFFSETOF(CodechalVdencHevcLaStats, headerBitCount);
1909 storeDataParams.dwValue = headerBitSize;
1910 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
1911
1912 auto mmioRegistersMfx = m_mfxInterface->GetMmioRegisters(m_vdboxIndex);
1913 MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
1914 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
1915 MHW_MI_ATOMIC_PARAMS atomicParams;
1916
1917 MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
1918 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
1919 MOS_ZeroMemory(&atomicParams, sizeof(atomicParams));
1920 // VCS_GPR0_Lo = LCUHdrBits
1921 miLoadRegMemParams.presStoreBuffer = &m_resFrameStatStreamOutBuffer; // LCUHdrBits is in m_resFrameStatStreamOutBuffer DW4
1922 miLoadRegMemParams.dwOffset = 4 * sizeof(uint32_t);
1923 miLoadRegMemParams.dwRegister = mmioRegistersMfx->generalPurposeRegister0LoOffset; // VCS_GPR0_Lo
1924 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(cmdBuffer, &miLoadRegMemParams));
1925
1926 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
1927 cmdBuffer,
1928 &flushDwParams));
1929
1930 // frame headerBitCount += LCUHdrBits
1931 atomicParams.pOsResource = &m_vdencLaStatsBuffer;
1932 atomicParams.dwResourceOffset = offset + CODECHAL_OFFSETOF(CodechalVdencHevcLaStats, headerBitCount);
1933 atomicParams.dwDataSize = sizeof(uint32_t);
1934 atomicParams.Operation = MHW_MI_ATOMIC_ADD;
1935 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd(
1936 cmdBuffer,
1937 &atomicParams));
1938
1939 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreVdencStatistics(cmdBuffer));
1940
1941 return eStatus;
1942 }
1943
ReadSliceSize(PMOS_COMMAND_BUFFER cmdBuffer)1944 MOS_STATUS CodechalVdencHevcState::ReadSliceSize(PMOS_COMMAND_BUFFER cmdBuffer)
1945 {
1946 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1947
1948 CODECHAL_ENCODE_FUNCTION_ENTER;
1949
1950 // Report slice size to app only when dynamic slice is enabled
1951 if (!m_hevcSeqParams->SliceSizeControl)
1952 {
1953 return eStatus;
1954 }
1955
1956 MOS_LOCK_PARAMS lockFlags;
1957 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1958 lockFlags.WriteOnly = true;
1959
1960 uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize + sizeof(uint32_t) * 2); // encodeStatus is offset by 2 DWs in the resource
1961 uint32_t sizeOfSliceSizesBuffer = MOS_ALIGN_CEIL(CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6 * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
1962
1963 if (IsFirstPass())
1964 {
1965 // Create/ Initialize slice report buffer once per frame, to be used across passes
1966 if (Mos_ResourceIsNull(&m_resSliceReport[m_encodeStatusBuf.wCurrIndex]))
1967 {
1968 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
1969 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1970 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
1971 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1972 allocParamsForBufferLinear.Format = Format_Buffer;
1973 allocParamsForBufferLinear.dwBytes = sizeOfSliceSizesBuffer;
1974
1975 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1976 m_osInterface,
1977 &allocParamsForBufferLinear,
1978 &m_resSliceReport[m_encodeStatusBuf.wCurrIndex]),
1979 "Failed to create HEVC VDEnc Slice Report Buffer ");
1980 }
1981
1982 // Clear slice size structure to be sent in EncodeStatusReport buffer
1983 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex], &lockFlags);
1984 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1985 MOS_ZeroMemory(data, sizeOfSliceSizesBuffer);
1986 m_osInterface->pfnUnlockResource(m_osInterface, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex]);
1987
1988 // Set slice size pointer in slice size structure
1989 MHW_MI_FLUSH_DW_PARAMS miFlushDwParams;
1990 MOS_ZeroMemory(&miFlushDwParams, sizeof(miFlushDwParams));
1991 miFlushDwParams.pOsResource = &m_encodeStatusBuf.resStatusBuffer;
1992 miFlushDwParams.dwResourceOffset = CODECHAL_OFFSETOF(EncodeStatusSliceReport, pSliceSize) + baseOffset + m_encodeStatusBuf.dwSliceReportOffset;
1993 miFlushDwParams.dwDataDW1 = (uint32_t)((uint64_t)&m_resSliceReport[m_encodeStatusBuf.wCurrIndex] & 0xFFFFFFFF);
1994 miFlushDwParams.dwDataDW2 = (uint32_t)(((uint64_t)&m_resSliceReport[m_encodeStatusBuf.wCurrIndex] & 0xFFFFFFFF00000000) >> 32);
1995 miFlushDwParams.bQWordEnable = 1;
1996 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
1997 cmdBuffer,
1998 &miFlushDwParams));
1999 }
2000
2001 // Copy Slize size data buffer from PAK to be sent back to App
2002 CODECHAL_ENCODE_CHK_STATUS_RETURN(CopyDataBlock(cmdBuffer,
2003 &m_resLcuBaseAddressBuffer, 0, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex], 0, sizeOfSliceSizesBuffer));
2004
2005 MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
2006 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS));
2007 miCpyMemMemParams.presSrc = &m_resFrameStatStreamOutBuffer; // Slice size overflow is in m_resFrameStatStreamOutBuffer DW0[16]
2008 miCpyMemMemParams.dwSrcOffset = 0;
2009 miCpyMemMemParams.presDst = &m_encodeStatusBuf.resStatusBuffer;
2010 miCpyMemMemParams.dwDstOffset = baseOffset + m_encodeStatusBuf.dwSliceReportOffset; // Slice size overflow is at DW0 EncodeStatusSliceReport
2011 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
2012
2013
2014 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS));
2015 miCpyMemMemParams.presSrc = m_resSliceCountBuffer; // Number of slice sizes are stored in this buffer. Updated at runtime
2016 miCpyMemMemParams.dwSrcOffset = 0;
2017 miCpyMemMemParams.presDst = &m_encodeStatusBuf.resStatusBuffer;
2018 miCpyMemMemParams.dwDstOffset = baseOffset + m_encodeStatusBuf.dwSliceReportOffset + 1; // Num slices is located at DW1 EncodeStatusSliceReport
2019 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
2020
2021 return eStatus;
2022 }
2023
CopyDataBlock(PMOS_COMMAND_BUFFER cmdBuffer,PMOS_RESOURCE sourceSurface,uint32_t sourceOffset,PMOS_RESOURCE destSurface,uint32_t destOffset,uint32_t copySize)2024 MOS_STATUS CodechalVdencHevcState::CopyDataBlock(
2025 PMOS_COMMAND_BUFFER cmdBuffer,
2026 PMOS_RESOURCE sourceSurface,
2027 uint32_t sourceOffset,
2028 PMOS_RESOURCE destSurface,
2029 uint32_t destOffset,
2030 uint32_t copySize)
2031 {
2032 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2033
2034 CODECHAL_ENCODE_FUNCTION_ENTER;
2035
2036 CodechalHucStreamoutParams hucStreamOutParams;
2037 MOS_ZeroMemory(&hucStreamOutParams, sizeof(hucStreamOutParams));
2038
2039 // Ind Obj Addr command
2040 hucStreamOutParams.dataBuffer = sourceSurface;
2041 hucStreamOutParams.dataSize = copySize + sourceOffset;
2042 hucStreamOutParams.dataOffset = MOS_ALIGN_FLOOR(sourceOffset, CODECHAL_PAGE_SIZE);
2043 hucStreamOutParams.streamOutObjectBuffer = destSurface;
2044 hucStreamOutParams.streamOutObjectSize = copySize + destOffset;
2045 hucStreamOutParams.streamOutObjectOffset = MOS_ALIGN_FLOOR(destOffset, CODECHAL_PAGE_SIZE);
2046
2047 // Stream object params
2048 hucStreamOutParams.indStreamInLength = copySize;
2049 hucStreamOutParams.inputRelativeOffset = sourceOffset - hucStreamOutParams.dataOffset;
2050 hucStreamOutParams.outputRelativeOffset = destOffset - hucStreamOutParams.streamOutObjectOffset;
2051
2052
2053 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->PerformHucStreamOut(
2054 &hucStreamOutParams,
2055 cmdBuffer));
2056
2057 // wait Huc completion (use HEVC bit for now)
2058 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
2059 MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
2060 vdPipeFlushParams.Flags.bFlushHEVC = 1;
2061 vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
2062 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams));
2063
2064 // Flush the engine to ensure memory written out
2065 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2066 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2067 flushDwParams.bVideoPipelineCacheInvalidate = true;
2068 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
2069
2070 return eStatus;
2071 }
2072
ExecutePictureLevel()2073 MOS_STATUS CodechalVdencHevcState::ExecutePictureLevel()
2074 {
2075 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2076
2077 CODECHAL_ENCODE_FUNCTION_ENTER;
2078
2079 PerfTagSetting perfTag;
2080 perfTag.Value = 0;
2081 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
2082 perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE;
2083 perfTag.PictureCodingType = m_pictureCodingType;
2084 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
2085
2086 if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()) \
2087 {
2088 CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum");
2089 eStatus = MOS_STATUS_INVALID_PARAMETER;
2090 return eStatus;
2091 }
2092
2093 CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifyCommandBufferSize());
2094
2095 if (!m_singleTaskPhaseSupportedInPak)
2096 {
2097 // Command buffer or patch list size are too small and so we cannot submit multiple pass of PAKs together
2098 m_firstTaskInPhase = true;
2099 m_lastTaskInPhase = true;
2100 }
2101
2102 // PAK pass type for each pass: VDEnc+PAK vs. PAK-only
2103 SetPakPassType();
2104
2105 bool pakOnlyMultipassEnable;
2106
2107 pakOnlyMultipassEnable = m_pakOnlyPass;
2108
2109 bool panicEnabled = (m_brcEnabled) && (m_panicEnable) && (GetCurrentPass() == 1) && !m_pakOnlyPass;
2110
2111 uint32_t rollingILimit = (m_hevcPicParams->bEnableRollingIntraRefresh == ROLLING_I_ROW) ? MOS_ROUNDUP_DIVIDE(m_frameHeight, 32) : (m_hevcPicParams->bEnableRollingIntraRefresh == ROLLING_I_COLUMN) ? MOS_ROUNDUP_DIVIDE(m_frameWidth, 32) : 0;
2112
2113 m_refList[m_currReconstructedPic.FrameIdx]->rollingIntraRefreshedPosition =
2114 CodecHal_Clip3(0, rollingILimit, m_hevcPicParams->IntraInsertionLocation + m_hevcPicParams->IntraInsertionSize);
2115
2116 // For ACQP / BRC, update pic params rolling intra reference location here before cmd buffer is prepared.
2117 PCODEC_PICTURE l0RefFrameList = m_hevcSliceParams->RefPicList[LIST_0];
2118 for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++)
2119 {
2120 CODEC_PICTURE refPic = l0RefFrameList[refIdx];
2121
2122 if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
2123 {
2124 uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
2125 m_hevcPicParams->RollingIntraReferenceLocation[refIdx] = m_refList[refPicIdx]->rollingIntraRefreshedPosition;
2126 }
2127 }
2128
2129 // clean-up per VDBOX semaphore memory
2130 int32_t currentPass = GetCurrentPass();
2131
2132 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->CmdInitializerSetConstData(
2133 m_osInterface,
2134 m_miInterface,
2135 m_vdencInterface,
2136 m_hevcSeqParams,
2137 m_hevcPicParams,
2138 m_hevcSliceParams,
2139 m_pakOnlyPass,
2140 m_hevcVdencAcqpEnabled,
2141 m_brcEnabled,
2142 m_vdencStreamInEnabled,
2143 m_vdencNativeROIEnabled,
2144 m_brcAdaptiveRegionBoostEnable,
2145 m_hevcVdencRoundingEnabled,
2146 panicEnabled,
2147 currentPass));
2148
2149 // Send HuC BRC Init/ Update only on first pipe.
2150 if (m_vdencHucUsed)
2151 {
2152 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->CmdInitializerExecute(true, &m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass]));
2153
2154 if (!m_singleTaskPhaseSupported)
2155 {
2156 //Reset earlier set PAK perf tag
2157 m_osInterface->pfnResetPerfBufferID(m_osInterface);
2158
2159 // STF: HuC+VDEnc+PAK single BB, non-STF: HuC Init/HuC Update/(VDEnc+PAK) in separate BBs
2160 perfTag.Value = 0;
2161 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
2162 perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_BRC_INIT_RESET;
2163 perfTag.PictureCodingType = m_pictureCodingType;
2164 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
2165 }
2166 m_resVdencBrcUpdateDmemBufferPtr[0] = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakInfo);
2167
2168 // Invoke BRC init/reset FW
2169 if (m_brcInit || m_brcReset)
2170 {
2171 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcInitReset());
2172 }
2173
2174 if (!m_singleTaskPhaseSupported)
2175 {
2176 //Reset performance buffer used for BRC init
2177 m_osInterface->pfnResetPerfBufferID(m_osInterface);
2178 // STF: HuC+VDEnc+PAK single BB, non-STF: HuC Init/HuC Update/(VDEnc+PAK) in separate BBs
2179 perfTag.Value = 0;
2180 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
2181 perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE;
2182 perfTag.PictureCodingType = m_pictureCodingType;
2183 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
2184 }
2185
2186 // Invoke BRC update FW
2187 CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcUpdate());
2188 m_brcInit = m_brcReset = false;
2189 if (!m_singleTaskPhaseSupported)
2190 {
2191 //reset performance buffer used for BRC update
2192 m_osInterface->pfnResetPerfBufferID(m_osInterface);
2193 }
2194 }
2195 else
2196 {
2197 ConstructBatchBufferHuCCQP(&m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource);
2198 }
2199
2200 MOS_COMMAND_BUFFER cmdBuffer;
2201 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2202
2203 if (!m_singleTaskPhaseSupported)
2204 {
2205 //PAK Perf Tag
2206 perfTag.Value = 0;
2207 perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
2208 perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE;
2209 perfTag.PictureCodingType = m_pictureCodingType;
2210 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
2211 }
2212
2213 if ((!m_singleTaskPhaseSupported || (m_firstTaskInPhase && !m_hevcVdencAcqpEnabled)) )
2214 {
2215 // Send command buffer header at the beginning (OS dependent)
2216 // frame tracking tag is only added in the last command buffer header
2217 bool requestFrameTracking = m_singleTaskPhaseSupported ?
2218 m_firstTaskInPhase :
2219 m_lastTaskInPhase;
2220
2221 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
2222 }
2223
2224 // ACQP + SSC, ACQP + WP, BRC, BRC + SSC, BRC + WP
2225 // 2nd pass for SSC, WP, BRC needs conditional batch buffer end cmd, which is decided by HUC_STATUS output from HuC
2226 if (currentPass && m_vdencHuCConditional2ndPass && (currentPass != m_uc2NdSaoPass))
2227 {
2228 MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
2229
2230 // Insert conditional batch buffer end
2231 MOS_ZeroMemory(
2232 &miConditionalBatchBufferEndParams,
2233 sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
2234
2235 // VDENC uses HuC FW generated semaphore for conditional 2nd pass
2236 miConditionalBatchBufferEndParams.presSemaphoreBuffer =
2237 &m_resPakMmioBuffer;
2238
2239 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
2240 &cmdBuffer,
2241 &miConditionalBatchBufferEndParams));
2242
2243 auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
2244 CODECHAL_ENCODE_CHK_NULL_RETURN(mmioRegisters);
2245 uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource
2246
2247 // Write back the HCP image control register for RC6 may clean it out
2248 MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
2249 MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
2250 miLoadRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2251 miLoadRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOffset;
2252 miLoadRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2253 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(&cmdBuffer, &miLoadRegMemParams));
2254
2255 MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
2256 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2257 miStoreRegMemParams.presStoreBuffer = &m_vdencBrcBuffers.resBrcPakStatisticBuffer[m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite];
2258 miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
2259 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2260 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2261
2262 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2263 miStoreRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2264 miStoreRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset;
2265 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2266 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2267 }
2268
2269 if (!currentPass && m_osInterface->bTagResourceSync)
2270 {
2271 // This is a short term solution to solve the sync tag issue: the sync tag write for PAK is inserted at the end of 2nd pass PAK BB
2272 // which may be skipped in multi-pass PAK enabled case. The idea here is to insert the previous frame's tag at the beginning
2273 // of the BB and keep the current frame's tag at the end of the BB. There will be a delay for tag update but it should be fine
2274 // as long as Dec/VP/Enc won't depend on this PAK so soon.
2275
2276 PMOS_RESOURCE globalGpuContextSyncTagBuffer = nullptr;
2277
2278 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource(
2279 m_osInterface,
2280 globalGpuContextSyncTagBuffer));
2281 CODECHAL_ENCODE_CHK_NULL_RETURN(globalGpuContextSyncTagBuffer);
2282
2283 MHW_MI_STORE_DATA_PARAMS params;
2284 params.pOsResource = globalGpuContextSyncTagBuffer;
2285 params.dwResourceOffset = m_osInterface->pfnGetGpuStatusTagOffset(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
2286 uint32_t value = m_osInterface->pfnGetGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
2287 params.dwValue = (value > 0) ? (value - 1) : 0;
2288 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, ¶ms));
2289 }
2290
2291 if (!m_lookaheadPass || m_swLaMode)
2292 {
2293 CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2294 }
2295
2296 MHW_VDBOX_SURFACE_PARAMS srcSurfaceParams;
2297 SetHcpSrcSurfaceParams(srcSurfaceParams);
2298
2299 MHW_VDBOX_SURFACE_PARAMS reconSurfaceParams;
2300 SetHcpReconSurfaceParams(reconSurfaceParams);
2301
2302 *m_pipeBufAddrParams = {};
2303 SetHcpPipeBufAddrParams(*m_pipeBufAddrParams);
2304 m_pipeBufAddrParams->pRawSurfParam = &srcSurfaceParams;
2305 m_pipeBufAddrParams->pDecodedReconParam = &reconSurfaceParams;
2306 m_mmcState->SetPipeBufAddr(m_pipeBufAddrParams, &cmdBuffer);
2307
2308 SetHcpPipeModeSelectParams(*m_pipeModeSelectParams);
2309
2310 // HuC modifies HCP pipe mode select command, when 2nd pass SAO is required
2311 if (m_vdencHucUsed && m_b2NdSaoPassNeeded)
2312 {
2313 // current location to add cmds in 2nd level batch buffer
2314 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].iCurrent = 0;
2315 // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
2316 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = 0;
2317
2318 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]));
2319
2320 // save offset for next 2nd level batch buffer usage
2321 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
2322 }
2323 else
2324 {
2325 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&cmdBuffer, m_pipeModeSelectParams));
2326 }
2327
2328 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &srcSurfaceParams));
2329
2330 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &reconSurfaceParams));
2331
2332 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(&cmdBuffer, m_pipeBufAddrParams));
2333
2334 MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
2335 SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
2336 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams));
2337
2338 MHW_VDBOX_QM_PARAMS fqmParams, qmParams;
2339 SetHcpQmStateParams(fqmParams, qmParams);
2340 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpFqmStateCmd(&cmdBuffer, &fqmParams));
2341 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpQmStateCmd(&cmdBuffer, &qmParams));
2342
2343 SetVdencPipeModeSelectParams(*m_pipeModeSelectParams);
2344 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(&cmdBuffer, m_pipeModeSelectParams));
2345
2346 MHW_VDBOX_SURFACE_PARAMS dsSurfaceParams[2];
2347 SetVdencSurfaceStateParams(srcSurfaceParams, reconSurfaceParams, dsSurfaceParams[0], dsSurfaceParams[1]);
2348 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencSrcSurfaceStateCmd(&cmdBuffer, &srcSurfaceParams));
2349 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &reconSurfaceParams));
2350 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencDsRefSurfaceStateCmd(&cmdBuffer, &dsSurfaceParams[0], 2));
2351
2352 SetVdencPipeBufAddrParams(*m_pipeBufAddrParams);
2353 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencPipeBufAddrCmd(&cmdBuffer, m_pipeBufAddrParams));
2354
2355 MHW_VDBOX_HEVC_PIC_STATE picStateParams;
2356 SetHcpPicStateParams(picStateParams);
2357
2358 if (m_vdencHucUsed)
2359 {
2360 // 2nd level batch buffer
2361 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
2362 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]));
2363
2364 // save offset for next 2nd level batch buffer usage
2365 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset += m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
2366 }
2367 else
2368 {
2369 // current location to add cmds in 2nd level batch buffer
2370 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].iCurrent = 0;
2371 // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
2372 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = 0;
2373
2374 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]));
2375 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
2376 }
2377
2378 // Send HEVC_VP9_RDOQ_STATE command
2379 if (m_hevcRdoqEnabled)
2380 {
2381 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&cmdBuffer, &picStateParams));
2382 }
2383
2384 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2385
2386 return eStatus;
2387 }
2388
SendHwSliceEncodeCommand(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_VDBOX_HEVC_SLICE_STATE params)2389 MOS_STATUS CodechalVdencHevcState::SendHwSliceEncodeCommand(
2390 PMOS_COMMAND_BUFFER cmdBuffer,
2391 PMHW_VDBOX_HEVC_SLICE_STATE params)
2392 {
2393 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2394
2395 CODECHAL_ENCODE_FUNCTION_ENTER;
2396
2397 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2398 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
2399 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pHevcPicIdx);
2400 CODECHAL_ENCODE_CHK_NULL_RETURN(params->presDataBuffer);
2401 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcSeqParams);
2402 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcPicParams);
2403 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcSliceParams);
2404 CODECHAL_ENCODE_CHK_NULL_RETURN(params->pBsBuffer);
2405 CODECHAL_ENCODE_CHK_NULL_RETURN(params->ppNalUnitParams);
2406
2407 PCODEC_HEVC_ENCODE_PICTURE_PARAMS hevcPicParams = params->pEncodeHevcPicParams;
2408 PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = params->pEncodeHevcSliceParams;
2409
2410 // VDENC does not use batch buffer for slice state
2411 // add HCP_REF_IDX command
2412 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpRefIdxCmd(cmdBuffer, nullptr, params));
2413
2414 if (params->bVdencHucInUse)
2415 {
2416 // 2nd level batch buffer
2417 PMHW_BATCH_BUFFER secondLevelBatchBufferUsed = params->pVdencBatchBuffer;
2418 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(cmdBuffer, secondLevelBatchBufferUsed));
2419 }
2420 else
2421 {
2422 // Weighted Prediction
2423 // This slice level command is issued, if the weighted_pred_flag or weighted_bipred_flag equals one.
2424 // If zero, then this command is not issued.
2425 if (params->bWeightedPredInUse)
2426 {
2427 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpWeightOffsetStateCmd(cmdBuffer, hevcSlcParams));
2428 }
2429
2430 // add HEVC Slice state commands
2431 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSliceStateCmd(cmdBuffer, params));
2432
2433 // add HCP_PAK_INSERT_OBJECTS command
2434 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPakInsertNALUs(cmdBuffer, params->pVdencBatchBuffer, params));
2435
2436 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPakInsertSliceHeader(cmdBuffer, params->pVdencBatchBuffer, params));
2437
2438 // Send VDENC_WEIGHT_OFFSETS_STATE command
2439 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddVdencWeightOffsetStateCmd(cmdBuffer, hevcSlcParams));
2440 }
2441
2442 // Send VDENC_WALKER_STATE command
2443 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddVdencWalkerStateCmd(cmdBuffer, params));
2444
2445 return eStatus;
2446 }
2447
ExecuteSliceLevel()2448 MOS_STATUS CodechalVdencHevcState::ExecuteSliceLevel()
2449 {
2450 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2451
2452 CODECHAL_ENCODE_FUNCTION_ENTER;
2453
2454 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBatchBufferForPakSlices());
2455
2456 MOS_COMMAND_BUFFER cmdBuffer;
2457 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2458
2459 CODECHAL_ENCODE_CHK_NULL_RETURN(m_sliceStateParams);
2460 SetHcpSliceStateCommonParams(*m_sliceStateParams);
2461
2462 // starting location for executing slice level cmds
2463 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
2464
2465 PCODEC_ENCODER_SLCDATA slcData = m_slcData;
2466 for (uint32_t startLcu = 0, slcCount = 0; slcCount < m_numSlices; slcCount++)
2467 {
2468 if (IsFirstPass())
2469 {
2470 slcData[slcCount].CmdOffset = startLcu * (m_hcpInterface->GetHcpPakObjSize()) * sizeof(uint32_t);
2471 }
2472
2473 SetHcpSliceStateParams(*m_sliceStateParams, slcData, slcCount);
2474
2475 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHwSliceEncodeCommand(&cmdBuffer, m_sliceStateParams));
2476
2477 startLcu += m_hevcSliceParams[slcCount].NumLCUsInSlice;
2478
2479 m_batchBufferForPakSlicesStartOffset = (uint32_t)m_batchBufferForPakSlices[m_currPakSliceIdx].iCurrent;
2480
2481 if (m_hevcVdencAcqpEnabled || m_brcEnabled)
2482 {
2483 // save offset for next 2nd level batch buffer usage
2484 // This is because we don't know how many times HCP_WEIGHTOFFSET_STATE & HCP_PAK_INSERT_OBJECT will be inserted for each slice
2485 // dwVdencBatchBufferPerSliceConstSize: constant size for each slice
2486 // m_vdencBatchBufferPerSliceVarSize: variable size for each slice
2487 m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset += m_hwInterface->m_vdencBatchBufferPerSliceConstSize + m_vdencBatchBufferPerSliceVarSize[slcCount];
2488 }
2489
2490 // Send VD_PIPELINE_FLUSH command
2491 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2492 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2493 vdPipelineFlushParams.Flags.bWaitDoneMFX = 1;
2494 vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1;
2495 vdPipelineFlushParams.Flags.bFlushVDENC = 1;
2496 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2497 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
2498 }
2499
2500 if (m_useBatchBufferForPakSlices)
2501 {
2502 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_UnlockBb(
2503 m_osInterface,
2504 &m_batchBufferForPakSlices[m_currPakSliceIdx],
2505 m_lastTaskInPhase));
2506 }
2507
2508 // Insert end of sequence/stream if set
2509 if (m_lastPicInStream || m_lastPicInSeq)
2510 {
2511 MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
2512 MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
2513 pakInsertObjectParams.bLastPicInSeq = m_lastPicInSeq;
2514 pakInsertObjectParams.bLastPicInStream = m_lastPicInStream;
2515 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&cmdBuffer, &pakInsertObjectParams));
2516 }
2517
2518 // Send MI_FLUSH command
2519 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2520 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2521 flushDwParams.bVideoPipelineCacheInvalidate = true;
2522 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2523
2524 // Send VD_PIPELINE_FLUSH command
2525 MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2526 MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2527 vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
2528 vdPipelineFlushParams.Flags.bFlushHEVC = 1;
2529 vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2530 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
2531
2532 // Send MI_FLUSH command
2533 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2534 flushDwParams.bVideoPipelineCacheInvalidate = true;
2535 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2536
2537 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
2538
2539 // BRC PAK statistics different for each pass
2540 if (m_brcEnabled)
2541 {
2542 uint32_t offset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
2543 m_encodeStatusBuf.dwNumPassesOffset + // Num passes offset
2544 sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource
2545
2546 EncodeReadBrcPakStatsParams readBrcPakStatsParams;
2547 readBrcPakStatsParams.pHwInterface = m_hwInterface;
2548 readBrcPakStatsParams.presBrcPakStatisticBuffer = &m_vdencBrcBuffers.resBrcPakStatisticBuffer[m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite];
2549 readBrcPakStatsParams.presStatusBuffer = &m_encodeStatusBuf.resStatusBuffer;
2550 readBrcPakStatsParams.dwStatusBufNumPassesOffset = offset;
2551 readBrcPakStatsParams.ucPass = (uint8_t) GetCurrentPass();
2552 readBrcPakStatsParams.VideoContext = m_videoContext;
2553
2554 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStatistics(
2555 &cmdBuffer,
2556 &readBrcPakStatsParams));
2557 }
2558
2559 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(&cmdBuffer));
2560 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSliceSize(&cmdBuffer));
2561 CODECHAL_ENCODE_CHK_STATUS_RETURN(PrepareHWMetaData(&cmdBuffer));
2562
2563 if (m_lookaheadPass)
2564 {
2565 CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreLookaheadStatistics(&cmdBuffer));
2566 }
2567 #if USE_CODECHAL_DEBUG_TOOL
2568 if (m_brcEnabled && m_enableFakeHrdSize)
2569 {
2570 uint32_t sizeInByte = (m_pictureCodingType == I_TYPE) ? m_fakeIFrameHrdSize : m_fakePBFrameHrdSize;
2571 CODECHAL_ENCODE_CHK_STATUS_RETURN(ModifyEncodedFrameSizeWithFakeHeaderSize(
2572 &cmdBuffer,
2573 sizeInByte,
2574 m_resVdencBrcUpdateDmemBufferPtr[0],
2575 0,
2576 &m_resFrameStatStreamOutBuffer,
2577 sizeof(uint32_t) * 4));
2578 }
2579 #endif
2580
2581 if (!m_lookaheadPass || m_swLaMode)
2582 {
2583 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2584 }
2585
2586 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2587 {
2588 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
2589 }
2590
2591 std::string pakPassName = "PAK_PASS[" + std::to_string(GetCurrentPass())+"]";
2592 CODECHAL_DEBUG_TOOL(
2593 CODECHAL_ENCODE_CHK_STATUS_RETURN( m_debugInterface->DumpCmdBuffer(
2594 &cmdBuffer,
2595 CODECHAL_NUM_MEDIA_STATES,
2596 pakPassName.data()));)
2597
2598 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2599
2600 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2601 {
2602 bool renderingFlags = m_videoContextUsesNullHw;
2603
2604 CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
2605
2606 CODECHAL_DEBUG_TOOL(
2607 if (m_mmcState)
2608 {
2609 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
2610 }
2611 )
2612
2613 if (IsLastPass() &&
2614 m_signalEnc &&
2615 m_currRefSync &&
2616 !Mos_ResourceIsNull(&m_currRefSync->resSyncObject))
2617 {
2618 // signal semaphore
2619 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
2620 syncParams.GpuContext = m_videoContext;
2621 syncParams.presSyncResource = &m_currRefSync->resSyncObject;
2622
2623 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
2624 m_currRefSync->uiSemaphoreObjCount++;
2625 m_currRefSync->bInUsed = true;
2626 }
2627 }
2628
2629 // HuC FW outputs are ready at this point if single task phase is enabled
2630 if (m_vdencHucUsed && m_singleTaskPhaseSupported)
2631 {
2632 // HuC Output STF=1
2633 CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(false));
2634 }
2635
2636 // Reset parameters for next PAK execution
2637 if (IsLastPass())
2638 {
2639 if (!m_singleTaskPhaseSupported)
2640 {
2641 m_osInterface->pfnResetPerfBufferID(m_osInterface);
2642 }
2643
2644 m_currPakSliceIdx = (m_currPakSliceIdx + 1) % CODECHAL_HEVC_NUM_PAK_SLICE_BATCH_BUFFERS;
2645
2646 if (m_hevcSeqParams->ParallelBRC)
2647 {
2648 m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite =
2649 (m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
2650 }
2651
2652 m_newPpsHeader = 0;
2653 m_newSeqHeader = 0;
2654 m_frameNum++;
2655 }
2656
2657 return eStatus;
2658 }
2659
ReadHcpStatus(PMOS_COMMAND_BUFFER cmdBuffer)2660 MOS_STATUS CodechalVdencHevcState::ReadHcpStatus(PMOS_COMMAND_BUFFER cmdBuffer)
2661 {
2662 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2663
2664 CODECHAL_ENCODE_FUNCTION_ENTER;
2665
2666 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2667
2668 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::ReadHcpStatus(cmdBuffer));
2669
2670 auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
2671 CODECHAL_ENCODE_CHK_NULL_RETURN(mmioRegisters);
2672 // Slice Size Conformance
2673 if (m_hevcSeqParams->SliceSizeControl)
2674 {
2675 MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
2676 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2677 miStoreRegMemParams.presStoreBuffer = m_resSliceCountBuffer;
2678 miStoreRegMemParams.dwOffset = 0;
2679 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncSliceCountRegOffset;
2680 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2681
2682 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2683 miStoreRegMemParams.presStoreBuffer = m_resVdencModeTimerBuffer;
2684 miStoreRegMemParams.dwOffset = 0;
2685 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncVdencModeTimerRegOffset;
2686 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2687 }
2688
2689 if (m_vdencHucUsed)
2690 {
2691 // Store PAK frameSize MMIO to PakInfo buffer
2692 MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
2693 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2694 miStoreRegMemParams.presStoreBuffer = m_resVdencBrcUpdateDmemBufferPtr[0];
2695 miStoreRegMemParams.dwOffset = 0;
2696 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset;
2697 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2698 }
2699
2700 CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadImageStatus(cmdBuffer))
2701
2702 return eStatus;
2703 }
2704
SetSequenceStructs()2705 MOS_STATUS CodechalVdencHevcState::SetSequenceStructs()
2706 {
2707 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2708
2709 CODECHAL_ENCODE_FUNCTION_ENTER;
2710
2711 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::SetSequenceStructs());
2712
2713 switch (m_hevcSeqParams->TargetUsage)
2714 {
2715 case 1: case 2: // Quality mode
2716 m_hevcSeqParams->TargetUsage = 1;
2717 break;
2718 case 3: case 4: case 5: // Normal mode
2719 m_hevcSeqParams->TargetUsage = 4;
2720 break;
2721 case 6: case 7: // Speed mode
2722 m_hevcSeqParams->TargetUsage = 7;
2723 break;
2724 default:
2725 m_hevcSeqParams->TargetUsage = 4;
2726 break;
2727 }
2728
2729 m_targetUsage = (uint32_t)m_hevcSeqParams->TargetUsage;
2730
2731 // enable motion adaptive under game streamming scenario for better quality
2732 if (m_hevcSeqParams->ScenarioInfo == ESCENARIO_GAMESTREAMING)
2733 {
2734 m_enableMotionAdaptive = true;
2735 }
2736
2737 // ACQP is by default disabled, enable it when SSC/QpAdjust required.
2738 if (m_hevcSeqParams->SliceSizeControl == true ||
2739 m_hevcSeqParams->QpAdjustment == true)
2740 {
2741 m_hevcVdencAcqpEnabled = true;
2742 }
2743
2744 // Get row store cache offset as all the needed information is got here
2745 if (m_vdencInterface->IsRowStoreCachingSupported())
2746 {
2747 MHW_VDBOX_ROWSTORE_PARAMS rowStoreParams;
2748 rowStoreParams.Mode = m_mode;
2749 rowStoreParams.dwPicWidth = m_frameWidth;
2750 rowStoreParams.ucChromaFormat = m_chromaFormat;
2751 rowStoreParams.ucBitDepthMinus8 = m_hevcSeqParams->bit_depth_luma_minus8;
2752 rowStoreParams.ucLCUSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
2753 // VDEnc only support LCU64 for now
2754 if (rowStoreParams.ucLCUSize != MAX_LCU_SIZE)
2755 {
2756 CODECHAL_ENCODE_ASSERTMESSAGE("HEVC VDEnc only support LCU64 for now.");
2757 return MOS_STATUS_INVALID_PARAMETER;
2758 }
2759 m_hwInterface->SetRowstoreCachingOffsets(&rowStoreParams);
2760 }
2761
2762
2763 if (m_hevcSeqParams->VBVBufferSizeInBit < m_hevcSeqParams->InitVBVBufferFullnessInBit)
2764 {
2765 CODECHAL_ENCODE_NORMALMESSAGE(
2766 "VBVBufferSizeInBit is less than InitVBVBufferFullnessInBit, \
2767 min(VBVBufferSizeInBit, InitVBVBufferFullnessInBit) will set to \
2768 hucVdencBrcInitDmem->InitBufFull_U32 and hucVdencBrcUpdateDmem->TARGETSIZE_U32(except Low Delay BRC).\n");
2769 }
2770
2771 m_lookaheadDepth = m_hevcSeqParams->LookaheadDepth;
2772 m_lookaheadPass = (m_lookaheadDepth > 0) && (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP);
2773
2774 if (m_lookaheadPass)
2775 {
2776 if (m_hevcSeqParams->MaxAdaptiveGopPicSize < m_hevcSeqParams->MinAdaptiveGopPicSize)
2777 {
2778 m_hevcSeqParams->MaxAdaptiveGopPicSize = m_hevcSeqParams->MinAdaptiveGopPicSize;
2779 }
2780 else if ((m_hevcSeqParams->MaxAdaptiveGopPicSize > 0) && (m_hevcSeqParams->MinAdaptiveGopPicSize == 0))
2781 {
2782 m_hevcSeqParams->MinAdaptiveGopPicSize = (m_hevcSeqParams->MaxAdaptiveGopPicSize + 1) >> 1;
2783 }
2784
2785 m_lookaheadAdaptiveI = (m_hevcSeqParams->MaxAdaptiveGopPicSize != m_hevcSeqParams->MinAdaptiveGopPicSize);
2786 if (!m_lookaheadAdaptiveI && (m_hevcSeqParams->MaxAdaptiveGopPicSize == 0))
2787 {
2788 if (m_hevcSeqParams->GopPicSize > 0)
2789 {
2790 m_hevcSeqParams->MaxAdaptiveGopPicSize = m_hevcSeqParams->GopPicSize;
2791 m_hevcSeqParams->MinAdaptiveGopPicSize = m_hevcSeqParams->GopPicSize;
2792 }
2793 else
2794 {
2795 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid GopPicSize in LPLA!");
2796 return MOS_STATUS_INVALID_PARAMETER;
2797 }
2798 }
2799 }
2800
2801 if (m_lookaheadDepth > 0)
2802 {
2803 uint64_t targetBitRate = (uint64_t)m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
2804 double frameRate = (m_hevcSeqParams->FrameRate.Denominator ? (double)m_hevcSeqParams->FrameRate.Numerator / m_hevcSeqParams->FrameRate.Denominator : 30);
2805 if ((frameRate < 1) || (targetBitRate < frameRate) || (targetBitRate > 0xFFFFFFFF))
2806 {
2807 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid FrameRate or TargetBitRate in LPLA!");
2808 return MOS_STATUS_INVALID_PARAMETER;
2809 }
2810
2811 m_averageFrameSize = (uint32_t)(targetBitRate / frameRate);
2812
2813 if (m_hevcSeqParams->VBVBufferSizeInBit < m_hevcSeqParams->InitVBVBufferFullnessInBit)
2814 {
2815 CODECHAL_ENCODE_ASSERTMESSAGE("VBVBufferSizeInBit is less than InitVBVBufferFullnessInBit\n");
2816 eStatus = MOS_STATUS_INVALID_PARAMETER;
2817 return eStatus;
2818 }
2819
2820 if (m_targetBufferFulness == 0 && m_prevTargetFrameSize == 0)
2821 {
2822 m_targetBufferFulness = m_hevcSeqParams->VBVBufferSizeInBit - m_hevcSeqParams->InitVBVBufferFullnessInBit;
2823 if (m_lookaheadPass)
2824 {
2825 uint32_t initVbvFullnessInFrames = MOS_MIN(m_hevcSeqParams->InitVBVBufferFullnessInBit, m_hevcSeqParams->VBVBufferSizeInBit) / m_averageFrameSize;
2826 uint32_t vbvBufferSizeInFrames = m_hevcSeqParams->VBVBufferSizeInBit / m_averageFrameSize;
2827 uint32_t encBufferFullness = (vbvBufferSizeInFrames - initVbvFullnessInFrames) * m_averageFrameSize;
2828 m_bufferFulnessError = (int32_t)((int64_t)m_targetBufferFulness - (int64_t)encBufferFullness);
2829 }
2830 }
2831 }
2832
2833 return eStatus;
2834 }
2835
SetPictureStructs()2836 MOS_STATUS CodechalVdencHevcState::SetPictureStructs()
2837 {
2838 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2839
2840 CODECHAL_ENCODE_FUNCTION_ENTER;
2841
2842 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::SetPictureStructs());
2843
2844 m_virtualEngineBbIndex = m_currOriginalPic.FrameIdx;
2845
2846 //Enable only for TU1
2847 if (m_hevcSeqParams->TargetUsage != 1)
2848 {
2849 m_hmeEnabled = m_b16XMeEnabled = m_b32XMeEnabled = false;
2850 m_16xMeSupported = false;
2851 }
2852
2853 // SSC can be satisfied in single VDEnc+PAK pass when required.
2854 // However it is not 100% guaranteed due to delay in HW.
2855 // When it happens, PAK would indicate SSC violation in MMIO register
2856 // and HuC would adjust SSC threshold and triggers another VDEnc+PAK pass.
2857 // SSC requires HuC for all target usages. (allow 1 pass SSC temporarily for testing purpose)
2858 if (m_hevcSeqParams->SliceSizeControl)
2859 {
2860 m_vdencHuCConditional2ndPass = true;
2861 }
2862
2863 // Weighted Prediction is supported only with VDEnc, only applicable to P/B frames
2864 if (m_hevcPicParams->weighted_pred_flag || m_hevcPicParams->weighted_bipred_flag)
2865 {
2866 // with SAO, needs to increase total number of passes to 3 later (2 for SAO, 1 for WP)
2867 m_hevcVdencWeightedPredEnabled = true;
2868 m_vdencHuCConditional2ndPass = true;
2869
2870 // Set ACQP enabled if GPU base WP is required.
2871 if(m_hevcPicParams->bEnableGPUWeightedPrediction)
2872 {
2873 m_hevcVdencAcqpEnabled = true;
2874 }
2875 }
2876
2877 if (m_lookaheadPass)
2878 {
2879 m_vdencHuCConditional2ndPass = m_lookaheadAdaptiveI && m_hevcPicParams->CodingType != I_TYPE; //conditional 2nd pass for adaptive IDR
2880 }
2881
2882 if (m_brcEnabled) // VDEnc BRC supports maximum 2 PAK passes
2883 {
2884 if (m_hevcPicParams->BRCPrecision == 1) // single-pass BRC, App requirment with first priority
2885 {
2886 m_numPasses = 0;
2887 // There is no need of additional pass for SSC, violation rate could be high but ok
2888 }
2889 else if (m_multipassBrcSupported) // multi-pass BRC is supported
2890 {
2891 m_numPasses = CODECHAL_VDENC_BRC_NUM_OF_PASSES - 1;
2892 m_vdencHuCConditional2ndPass = true;
2893 }
2894 else
2895 {
2896 m_numPasses = 0;
2897 }
2898
2899 m_vdencBrcEnabled = true;
2900 m_hevcVdencAcqpEnabled = false; // when BRC is enabled, ACQP has to be turned off
2901 }
2902 else // CQP, ACQP
2903 {
2904 m_numPasses = 0;
2905
2906 // ACQP + SSC, ACQP + WP. CQP + SSC/WP donot need 2nd pass
2907 // driver programs 2nd pass, but it will be decided by conditional batch buffer end cmd to execute 2nd pass
2908 if (m_vdencHuCConditional2ndPass && (m_hevcVdencAcqpEnabled || m_lookaheadPass))
2909 {
2910 m_numPasses += 1;
2911 }
2912 }
2913
2914 CODECHAL_ENCODE_VERBOSEMESSAGE("m_numPasses = %d",m_numPasses);
2915
2916 m_vdencHucUsed = m_hevcVdencAcqpEnabled || m_vdencBrcEnabled;
2917
2918 // VDEnc always needs to enable due to pak fractional QP features
2919 // In VDENC mode, this field "Cu_Qp_Delta_Enabled_Flag" should always be set to 1.
2920 CODECHAL_ENCODE_ASSERT(m_hevcPicParams->cu_qp_delta_enabled_flag == 1);
2921
2922 // Restriction: If RollingI is enabled, ROI needs to be disabled
2923 if (m_hevcPicParams->bEnableRollingIntraRefresh)
2924 {
2925 m_hevcPicParams->NumROI = 0;
2926 }
2927
2928 //VDEnc StreamIn enabled if case of ROI (All frames), MBQP(LCUQP), DirtyRect and SHME (ldB frames)
2929 m_vdencStreamInEnabled = (m_vdencEnabled) && (m_hevcPicParams->NumROI ||
2930 m_encodeParams.bMbQpDataEnabled ||
2931 (m_hevcPicParams->NumDirtyRects > 0 && (B_TYPE == m_hevcPicParams->CodingType)) || (m_b16XMeEnabled));
2932
2933 CODECHAL_ENCODE_CHK_STATUS_RETURN(PrepareVDEncStreamInData());
2934
2935 if (!m_lookaheadPass)
2936 {
2937 if ((m_lookaheadDepth > 0) && (m_prevTargetFrameSize > 0))
2938 {
2939 int64_t targetBufferFulness = (int64_t)m_targetBufferFulness;
2940 targetBufferFulness += (((int64_t)m_prevTargetFrameSize) << 3) - (int64_t)m_averageFrameSize;
2941 m_targetBufferFulness = targetBufferFulness < 0 ? 0 : (targetBufferFulness > 0xFFFFFFFF ? 0xFFFFFFFF : (uint32_t)targetBufferFulness);
2942 }
2943
2944 m_prevTargetFrameSize = m_hevcPicParams->TargetFrameSize;
2945 }
2946
2947 return eStatus;
2948 }
2949
SetupRegionBoosting(PMOS_RESOURCE vdencStreamIn,uint16_t boostIndex)2950 MOS_STATUS CodechalVdencHevcState::SetupRegionBoosting(PMOS_RESOURCE vdencStreamIn, uint16_t boostIndex)
2951 {
2952 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2953
2954 CODECHAL_ENCODE_FUNCTION_ENTER;
2955 CODECHAL_ENCODE_CHK_NULL_RETURN(vdencStreamIn);
2956
2957 MOS_LOCK_PARAMS lockFlags;
2958 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2959 lockFlags.WriteOnly = 1;
2960
2961 uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(
2962 m_osInterface,
2963 vdencStreamIn,
2964 &lockFlags);
2965 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
2966
2967 uint32_t streamInWidth = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32);
2968 uint32_t streamInHeight = (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32);
2969 int32_t streamInNumCUs = streamInWidth * streamInHeight;
2970
2971 MOS_ZeroMemory(data, streamInNumCUs * 64);
2972
2973 MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
2974
2975 MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
2976 streaminDataParams.setQpRoiCtrl = true;
2977 uint32_t roiCtrl = 85; // All four 16x16 blocks within the 32x32 blocks share the same region ID 1 (01010101).
2978 for (uint16_t y = 0; y < streamInHeight; y++)
2979 {
2980 if ((y & 7) == boostIndex)
2981 {
2982 for (uint16_t x = 0; x < streamInWidth; x++)
2983 {
2984 streaminDataParams.roiCtrl = 85;
2985 SetStreaminDataPerRegion(streamInWidth, y, y+1, x, x+1, &streaminDataParams, data);
2986 }
2987 }
2988 }
2989
2990 MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
2991 streaminDataParams.maxTuSize = 3; //Maximum TU Size allowed, restriction to be set to 3
2992 streaminDataParams.maxCuSize = 2; //For ARB, currently support 32x32 block
2993 switch (m_hevcSeqParams->TargetUsage)
2994 {
2995 case 1:
2996 case 4:
2997 streaminDataParams.numMergeCandidateCu64x64 = 4;
2998 streaminDataParams.numMergeCandidateCu32x32 = 3;
2999 streaminDataParams.numMergeCandidateCu16x16 = 2;
3000 streaminDataParams.numMergeCandidateCu8x8 = 1;
3001 streaminDataParams.numImePredictors = m_imgStateImePredictors;
3002 break;
3003 case 7:
3004 streaminDataParams.numMergeCandidateCu64x64 = 2;
3005 streaminDataParams.numMergeCandidateCu32x32 = 2;
3006 streaminDataParams.numMergeCandidateCu16x16 = 2;
3007 streaminDataParams.numMergeCandidateCu8x8 = 0;
3008 streaminDataParams.numImePredictors = 4;
3009 break;
3010 }
3011
3012 for (auto i = 0; i < streamInNumCUs; i++)
3013 {
3014 SetStreaminDataPerLcu(&streaminDataParams, data + (i * 64));
3015 }
3016
3017 m_osInterface->pfnUnlockResource(
3018 m_osInterface,
3019 vdencStreamIn);
3020
3021 return eStatus;
3022 }
3023
PrepareVDEncStreamInData()3024 MOS_STATUS CodechalVdencHevcState::PrepareVDEncStreamInData()
3025 {
3026 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3027
3028 CODECHAL_ENCODE_FUNCTION_ENTER;
3029
3030 if (m_vdencStreamInEnabled && m_encodeParams.bMbQpDataEnabled)
3031 {
3032 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
3033 {
3034 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupMbQpStreamIn(&m_resVdencStreamInBuffer[m_currRecycledBufIdx]));
3035 }
3036 else if (m_vdencHucUsed)
3037 {
3038 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBRCROIStreamIn(&m_resVdencStreamInBuffer[m_currRecycledBufIdx], &m_vdencDeltaQpBuffer[m_currRecycledBufIdx]));
3039 }
3040 }
3041
3042
3043 if (m_brcAdaptiveRegionBoostSupported && m_hevcPicParams->TargetFrameSize && !m_lookaheadDepth)
3044 {
3045 // Adaptive region boost is enabled for TCBRC only
3046 m_brcAdaptiveRegionBoostEnable = true;
3047 m_vdencStreamInEnabled = true;
3048 }
3049 else
3050 {
3051 m_brcAdaptiveRegionBoostEnable = false;
3052 }
3053
3054 if (!m_brcAdaptiveRegionBoostEnable && m_vdencStreamInEnabled && m_hevcPicParams->NumROI)
3055 {
3056 ProcessRoiDeltaQp();
3057
3058 if (m_vdencHucUsed && !m_vdencNativeROIEnabled)
3059 {
3060 //ForceQp ROI in ACQP, BRC mode only
3061 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBRCROIStreamIn(&m_resVdencStreamInBuffer[m_currRecycledBufIdx], &m_vdencDeltaQpBuffer[m_currRecycledBufIdx]));
3062 }
3063 else
3064 {
3065 //Native ROI
3066 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupROIStreamIn(&(m_resVdencStreamInBuffer[m_currRecycledBufIdx])));
3067 }
3068 }
3069 else if (!m_brcAdaptiveRegionBoostEnable && m_vdencStreamInEnabled && (m_hevcPicParams->NumDirtyRects > 0 && (B_TYPE == m_hevcPicParams->CodingType)))
3070 {
3071 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupDirtyRectStreamIn(&(m_resVdencStreamInBuffer[m_currRecycledBufIdx])));
3072 }
3073
3074 if (m_brcAdaptiveRegionBoostEnable)
3075 {
3076 uint16_t rowOffset[8] = {0, 3, 5, 2, 7, 4, 1, 6};
3077 uint16_t circularFrameIdx = (m_storeData - 1) & 7;
3078 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupRegionBoosting(&(m_resVdencStreamInBuffer[m_currRecycledBufIdx]), rowOffset[circularFrameIdx]));
3079 }
3080
3081 return eStatus;
3082 }
3083
CalcScaledDimensions()3084 MOS_STATUS CodechalVdencHevcState::CalcScaledDimensions()
3085 {
3086 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3087
3088 CODECHAL_ENCODE_FUNCTION_ENTER;
3089
3090 // HME Scaling WxH
3091 m_downscaledWidthInMb4x =
3092 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / SCALE_FACTOR_4x);
3093 m_downscaledHeightInMb4x =
3094 CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight / SCALE_FACTOR_4x);
3095 m_downscaledWidth4x =
3096 m_downscaledWidthInMb4x * CODECHAL_MACROBLOCK_WIDTH;
3097 m_downscaledHeight4x =
3098 m_downscaledHeightInMb4x * CODECHAL_MACROBLOCK_HEIGHT;
3099
3100 // SuperHME Scaling WxH
3101 m_downscaledWidthInMb16x =
3102 CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / SCALE_FACTOR_16x);
3103 m_downscaledHeightInMb16x =
3104 CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight / SCALE_FACTOR_16x);
3105 m_downscaledWidth16x =
3106 m_downscaledWidthInMb16x * CODECHAL_MACROBLOCK_WIDTH;
3107 m_downscaledHeight16x =
3108 m_downscaledHeightInMb16x * CODECHAL_MACROBLOCK_HEIGHT;
3109
3110 return eStatus;
3111 }
3112
ValidateRefFrameData(PCODEC_HEVC_ENCODE_SLICE_PARAMS slcParams)3113 MOS_STATUS CodechalVdencHevcState::ValidateRefFrameData(PCODEC_HEVC_ENCODE_SLICE_PARAMS slcParams)
3114 {
3115 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3116
3117 CODECHAL_ENCODE_CHK_NULL_RETURN(slcParams);
3118
3119 uint8_t maxNumRef0 = m_numMaxVdencL0Ref;
3120 uint8_t maxNumRef1 = m_numMaxVdencL1Ref;
3121
3122 if (slcParams->num_ref_idx_l0_active_minus1 > maxNumRef0 - 1)
3123 {
3124 CODECHAL_ENCODE_ASSERT(false);
3125 slcParams->num_ref_idx_l0_active_minus1 = maxNumRef0 - 1;
3126 }
3127
3128 if (slcParams->num_ref_idx_l1_active_minus1 > maxNumRef1 - 1)
3129 {
3130 CODECHAL_ENCODE_ASSERT(false);
3131 slcParams->num_ref_idx_l1_active_minus1 = maxNumRef1 - 1;
3132 }
3133
3134 // For HEVC VDEnc, L0 and L1 must contain the same (number of) elements. If not, the input slc param is not good for VDEnc.
3135 if (slcParams->num_ref_idx_l0_active_minus1 != slcParams->num_ref_idx_l1_active_minus1)
3136 {
3137 CODECHAL_ENCODE_ASSERT(false);
3138 slcParams->num_ref_idx_l1_active_minus1 = slcParams->num_ref_idx_l0_active_minus1;
3139 }
3140
3141 for (auto j = 0; j < CODEC_MAX_NUM_REF_FRAME_HEVC; j++)
3142 {
3143 if (slcParams->RefPicList[0][j].PicEntry != slcParams->RefPicList[1][j].PicEntry)
3144 {
3145 CODECHAL_ENCODE_ASSERT(false);
3146 eStatus = MOS_STATUS_INVALID_PARAMETER;
3147 return eStatus;
3148 }
3149 }
3150
3151 return eStatus;
3152 }
3153
InitializePicture(const EncoderParams & params)3154 MOS_STATUS CodechalVdencHevcState::InitializePicture(const EncoderParams& params)
3155 {
3156 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3157
3158 CODECHAL_ENCODE_FUNCTION_ENTER;
3159
3160 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::InitializePicture(params));
3161
3162 m_resVdencStatsBuffer = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, vdencStats);
3163 m_resPakStatsBuffer = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakStats);
3164 m_resSliceCountBuffer = &m_sliceCountBuffer;
3165 m_resVdencModeTimerBuffer = &m_vdencModeTimerBuffer;
3166
3167 return eStatus;
3168 }
3169
UserFeatureKeyReport()3170 MOS_STATUS CodechalVdencHevcState::UserFeatureKeyReport()
3171 {
3172 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3173
3174 CODECHAL_ENCODE_FUNCTION_ENTER;
3175
3176 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::UserFeatureKeyReport());
3177
3178 #if (_DEBUG || _RELEASE_INTERNAL)
3179 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_VDENC_IN_USE_ID, m_vdencEnabled, m_osInterface->pOsContext);
3180 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_ACQP_ENABLE_ID, m_hevcVdencAcqpEnabled, m_osInterface->pOsContext);
3181 #endif
3182
3183 return eStatus;
3184 }
3185
GetStatusReport(EncodeStatus * encodeStatus,EncodeStatusReport * encodeStatusReport)3186 MOS_STATUS CodechalVdencHevcState::GetStatusReport(
3187 EncodeStatus *encodeStatus,
3188 EncodeStatusReport *encodeStatusReport)
3189 {
3190 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3191
3192 CODECHAL_ENCODE_FUNCTION_ENTER;
3193
3194 // common initilization
3195 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::GetStatusReport(encodeStatus, encodeStatusReport));
3196
3197 MOS_LOCK_PARAMS lockFlags;
3198 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3199 lockFlags.ReadOnly = 1;
3200
3201 uint32_t* sliceSize = nullptr;
3202 // pSliceSize is set/ allocated only when dynamic slice is enabled. Cannot use SSC flag here, as it is an asynchronous call
3203 if (encodeStatus->sliceReport.pSliceSize)
3204 {
3205 sliceSize = (uint32_t*)m_osInterface->pfnLockResource(m_osInterface, encodeStatus->sliceReport.pSliceSize, &lockFlags);
3206 CODECHAL_ENCODE_CHK_NULL_RETURN(sliceSize);
3207
3208 encodeStatusReport->NumberSlices = encodeStatus->sliceReport.NumberSlices;
3209 encodeStatusReport->SizeOfSliceSizesBuffer = sizeof(uint16_t) * encodeStatus->sliceReport.NumberSlices;
3210 encodeStatusReport->SliceSizeOverflow = (encodeStatus->sliceReport.SliceSizeOverflow >> 16) & 1;
3211 encodeStatusReport->pSliceSizes = (uint16_t*)sliceSize;
3212
3213 uint16_t prevCumulativeSliceSize = 0;
3214 // HW writes out a DW for each slice size. Copy in place the DW into 16bit fields expected by App
3215 for (auto sliceCount = 0; sliceCount < encodeStatus->sliceReport.NumberSlices; sliceCount++)
3216 {
3217 // PAK output the sliceSize at 16DW intervals.
3218 CODECHAL_ENCODE_CHK_NULL_RETURN(&sliceSize[sliceCount * 16]);
3219 uint32_t CurrAccumulatedSliceSize = sliceSize[sliceCount * 16];
3220
3221 //convert cummulative slice size to individual, first slice may have PPS/SPS,
3222 encodeStatusReport->pSliceSizes[sliceCount] = CurrAccumulatedSliceSize - prevCumulativeSliceSize;
3223 prevCumulativeSliceSize += encodeStatusReport->pSliceSizes[sliceCount];
3224 }
3225 m_osInterface->pfnUnlockResource(m_osInterface, encodeStatus->sliceReport.pSliceSize);
3226 }
3227
3228 if (m_lookaheadPass && m_lookaheadReport && (encodeStatus->lookaheadStatus.targetFrameSize > 0))
3229 {
3230 encodeStatusReport->pLookaheadStatus = &encodeStatus->lookaheadStatus;
3231 encodeStatus->lookaheadStatus.isValid = 1;
3232 uint64_t targetFrameSize = (uint64_t)encodeStatus->lookaheadStatus.targetFrameSize * m_averageFrameSize;
3233 encodeStatus->lookaheadStatus.targetFrameSize = (uint32_t)((targetFrameSize + (32*8)) / (64*8)); // Convert bits to bytes. 64 is normalized average frame size used in lookahead analysis kernel
3234 uint64_t targetBufferFulness = (uint64_t)encodeStatus->lookaheadStatus.targetBufferFulness * m_averageFrameSize;
3235 encodeStatus->lookaheadStatus.targetBufferFulness = (uint32_t)((targetBufferFulness + 32) / 64); // 64 is normalized average frame size used in lookahead analysis kernel
3236 // Apply rounding error to targetFrameSize to align target buffer fullness between lookahead pass and encode pass
3237 if (m_prevTargetFrameSize > 0)
3238 {
3239 int64_t encTargetBufferFulness = (int64_t)m_targetBufferFulness;
3240 encTargetBufferFulness += (((int64_t)m_prevTargetFrameSize) << 3) - (int64_t)m_averageFrameSize;
3241 m_targetBufferFulness = encTargetBufferFulness < 0 ?
3242 0 : (encTargetBufferFulness > 0xFFFFFFFF ? 0xFFFFFFFF : (uint32_t)encTargetBufferFulness);
3243 int32_t deltaBits = (int32_t)((int64_t)(encodeStatus->lookaheadStatus.targetBufferFulness) + m_bufferFulnessError - (int64_t)(m_targetBufferFulness));
3244 deltaBits /= 64;
3245 if (deltaBits > 8)
3246 {
3247 if ((uint32_t)deltaBits > encodeStatus->lookaheadStatus.targetFrameSize)
3248 {
3249 deltaBits = (int32_t)(encodeStatus->lookaheadStatus.targetFrameSize);
3250 }
3251 encodeStatus->lookaheadStatus.targetFrameSize += (uint32_t)(deltaBits >> 3);
3252 }
3253 else if (deltaBits < -8)
3254 {
3255 if ((-deltaBits) > (int32_t)(encodeStatus->lookaheadStatus.targetFrameSize))
3256 {
3257 deltaBits = -(int32_t)(encodeStatus->lookaheadStatus.targetFrameSize);
3258 }
3259 encodeStatus->lookaheadStatus.targetFrameSize -= (uint32_t)((-deltaBits) >> 3);
3260 }
3261 }
3262 m_prevTargetFrameSize = encodeStatus->lookaheadStatus.targetFrameSize;
3263
3264 if (encodeStatus->lookaheadStatus.cqmHint > 4)
3265 {
3266 // Currently only 0x00 and 0x01 are valid. Report invalid (0xFF) for other values.
3267 encodeStatus->lookaheadStatus.cqmHint = 0xFF;
3268 }
3269
3270 // ensure CQM hint never exceed the number of PPS
3271 if (encodeStatus->lookaheadStatus.cqmHint <= 2)
3272 {
3273 encodeStatus->lookaheadStatus.cqmHint = 0;
3274 }
3275 else if (encodeStatus->lookaheadStatus.cqmHint <= 4)
3276 {
3277 encodeStatus->lookaheadStatus.cqmHint = 1;
3278 }
3279
3280 if (encodeStatus->lookaheadStatus.pyramidDeltaQP == 0)
3281 {
3282 encodeStatus->lookaheadStatus.miniGopSize = 1;
3283 }
3284 else if (m_hevcSeqParams->GopRefDist == 1) // LPLA only supports P pyramid for this condition
3285 {
3286 encodeStatus->lookaheadStatus.miniGopSize = 4;
3287 }
3288 else
3289 {
3290 encodeStatus->lookaheadStatus.miniGopSize = m_hevcSeqParams->GopRefDist;
3291 }
3292 }
3293 else
3294 {
3295 encodeStatusReport->pLookaheadStatus = nullptr;
3296 encodeStatus->lookaheadStatus.isValid = 0;
3297 encodeStatus->lookaheadStatus.cqmHint = 0xFF;
3298 encodeStatus->lookaheadStatus.targetFrameSize = 0;
3299 encodeStatus->lookaheadStatus.targetBufferFulness = 0;
3300 }
3301
3302 return eStatus;
3303 }
3304
AllocatePakResources()3305 MOS_STATUS CodechalVdencHevcState::AllocatePakResources()
3306 {
3307 CODECHAL_ENCODE_FUNCTION_ENTER;
3308
3309 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3310
3311 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::AllocatePakResources());
3312
3313 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3314 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3315 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3316 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3317 allocParamsForBufferLinear.Format = Format_Buffer;
3318
3319 // Allocate Frame Statistics Streamout Data Destination Buffer. DW98-100 in HCP PipeBufAddr command
3320 uint32_t size = MOS_ALIGN_CEIL(m_sizeOfHcpPakFrameStats * m_maxTileNumber, CODECHAL_PAGE_SIZE); //Each tile has 8 cache size bytes of data, Align to page is HuC requirement
3321 allocParamsForBufferLinear.dwBytes = size;
3322 allocParamsForBufferLinear.pBufName = "FrameStatStreamOutBuffer";
3323
3324 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3325 m_osInterface,
3326 &allocParamsForBufferLinear,
3327 &m_resFrameStatStreamOutBuffer),
3328 "Failed to create VDENC FrameStatStreamOutBuffer Buffer");
3329
3330 // PAK Statistics buffer
3331 size = MOS_ALIGN_CEIL(m_vdencBrcPakStatsBufferSize, CODECHAL_PAGE_SIZE);
3332 CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
3333 m_standard, size, 1, pakStats, "pakStats"));
3334
3335 // Slice Count buffer 1 DW = 4 Bytes
3336 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
3337 allocParamsForBufferLinear.pBufName = "Slice Count Buffer";
3338
3339 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3340 m_osInterface,
3341 &allocParamsForBufferLinear,
3342 &m_sliceCountBuffer),
3343 "Failed to create VDENC Slice Count Buffer");
3344
3345 // VDEncMode Timer buffer 1 DW = 4 Bytes
3346 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
3347 allocParamsForBufferLinear.pBufName = "VDEncMode Timer Buffer";
3348
3349 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3350 m_osInterface,
3351 &allocParamsForBufferLinear,
3352 &m_vdencModeTimerBuffer),
3353 "Failed to create VDEncMode Timer Buffer");
3354
3355 return eStatus;
3356 }
3357
FreePakResources()3358 MOS_STATUS CodechalVdencHevcState::FreePakResources()
3359 {
3360 CODECHAL_ENCODE_FUNCTION_ENTER;
3361
3362 m_osInterface->pfnFreeResource(m_osInterface, &m_resFrameStatStreamOutBuffer);
3363 m_osInterface->pfnFreeResource(m_osInterface, &m_sliceCountBuffer);
3364 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencModeTimerBuffer);
3365
3366 for (uint32_t i = 0; i < CODECHAL_ENCODE_STATUS_NUM; i++)
3367 {
3368 if (!Mos_ResourceIsNull(&m_resSliceReport[i]))
3369 {
3370 m_osInterface->pfnFreeResource(m_osInterface, &m_resSliceReport[i]);
3371 }
3372 }
3373
3374 if (m_swLaMode != nullptr)
3375 {
3376 m_osInterface->pfnFreeLibrary(m_swLaMode);
3377 m_swLaMode = nullptr;
3378 }
3379
3380 return CodechalEncodeHevcBase::FreePakResources();
3381 }
3382
AllocateEncResources()3383 MOS_STATUS CodechalVdencHevcState::AllocateEncResources()
3384 {
3385 CODECHAL_ENCODE_FUNCTION_ENTER;
3386
3387 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3388
3389 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3390 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3391 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3392 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3393 allocParamsForBufferLinear.Format = Format_Buffer;
3394
3395 // PAK stream-out buffer
3396 allocParamsForBufferLinear.dwBytes = CODECHAL_HEVC_PAK_STREAMOUT_SIZE;
3397 allocParamsForBufferLinear.pBufName = "Pak StreamOut Buffer";
3398 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3399 m_osInterface,
3400 &allocParamsForBufferLinear,
3401 &m_resStreamOutBuffer[0]),
3402 "Failed to allocate Pak Stream Out Buffer.");
3403
3404 // VDENC Intra Row Store Scratch buffer
3405 // 1 cacheline per MB
3406 uint32_t size = m_picWidthInMb * CODECHAL_CACHELINE_SIZE;
3407 CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
3408 m_standard, size, 1, vdencIntraRowStoreScratch, "vdencIntraRowStoreScratch"));
3409
3410 // VDENC Statistics buffer, only needed for BRC
3411 // The size is 19 CL for each tile, allocated with worst case, optimize later
3412 size = MOS_ALIGN_CEIL(m_vdencBrcStatsBufferSize * m_maxTileNumber, CODECHAL_PAGE_SIZE);
3413 CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
3414 m_standard, size, 1, vdencStats, "vdencStats"));
3415
3416 if (m_hucCmdInitializer)
3417 {
3418 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->CmdInitializerAllocateResources(m_hwInterface));
3419 }
3420
3421 return eStatus;
3422 }
3423
FreeEncResources()3424 MOS_STATUS CodechalVdencHevcState::FreeEncResources()
3425 {
3426 CODECHAL_ENCODE_FUNCTION_ENTER;
3427
3428 // PAK stream-out buffer de-allocated inside CodecHalEncodeReleaseResources()
3429
3430 if (m_hucCmdInitializer)
3431 {
3432 m_hucCmdInitializer->CmdInitializerFreeResources();
3433 }
3434 MOS_Delete(m_hucCmdInitializer);
3435
3436 return MOS_STATUS_SUCCESS;
3437 }
3438
AllocateBrcResources()3439 MOS_STATUS CodechalVdencHevcState::AllocateBrcResources()
3440 {
3441 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3442
3443 CODECHAL_ENCODE_FUNCTION_ENTER;
3444
3445 // initiate allocation paramters and lock flags
3446 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3447 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3448 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3449 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3450 allocParamsForBufferLinear.Format = Format_Buffer;
3451
3452 allocParamsForBufferLinear.dwBytes = m_hevcBrcPakStatisticsSize;
3453 allocParamsForBufferLinear.pBufName = "BRC PAK Statistics Buffer";
3454
3455 MOS_LOCK_PARAMS lockFlagsWriteOnly;
3456 MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
3457 lockFlagsWriteOnly.WriteOnly = true;
3458
3459 uint8_t *data = nullptr;
3460 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
3461 {
3462 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3463 m_osInterface,
3464 &allocParamsForBufferLinear,
3465 &m_vdencBrcBuffers.resBrcPakStatisticBuffer[i]),
3466 "Failed to allocate BRC PAK Statistics Buffer.");
3467
3468 CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint8_t *)m_osInterface->pfnLockResource(
3469 m_osInterface,
3470 &(m_vdencBrcBuffers.resBrcPakStatisticBuffer[i]),
3471 &lockFlagsWriteOnly));
3472
3473 MOS_ZeroMemory(data, m_hevcBrcPakStatisticsSize);
3474 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcBuffers.resBrcPakStatisticBuffer[i]);
3475 }
3476
3477 // PAK Info buffer
3478 uint32_t size = MOS_ALIGN_CEIL(sizeof(CodechalVdencHevcPakInfo), CODECHAL_PAGE_SIZE);
3479 CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
3480 m_standard, size, 1, pakInfo, "pakInfo"));
3481
3482 // HuC FW Region 6: Data Buffer of Current Picture
3483 // Data (1024 bytes) for current
3484 // Data (1024 bytes) for ref0
3485 // Data (1024 bytes) for ref1
3486 // Data (1024 bytes) for ref2
3487 allocParamsForBufferLinear.dwBytes = CODECHAL_PAGE_SIZE * 4;
3488 allocParamsForBufferLinear.pBufName = "Data from Pictures Buffer for Weighted Prediction";
3489
3490 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3491 m_osInterface,
3492 &allocParamsForBufferLinear,
3493 &m_dataFromPicsBuffer),
3494 "Failed to create Data from Pictures Buffer for Weighted Prediction");
3495
3496 for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
3497 {
3498 // Delta QP for ROI Buffer
3499 // 1 byte for each 32x32 block, maximum region size is 8192 bytes for 4K/2K resolution, currently the allocation size is fixed
3500 allocParamsForBufferLinear.dwBytes = m_deltaQpRoiBufferSize;
3501 allocParamsForBufferLinear.pBufName = "Delta QP for ROI Buffer";
3502
3503 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3504 m_osInterface,
3505 &allocParamsForBufferLinear,
3506 &m_vdencDeltaQpBuffer[k]),
3507 "Failed to create Delta QP for ROI Buffer");
3508
3509 // BRC update DMEM
3510 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_vdencBrcUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
3511 allocParamsForBufferLinear.pBufName = "VDENC BrcUpdate DmemBuffer";
3512
3513 for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES_FOR_TILE_REPLAY; i++)
3514 {
3515 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3516 m_osInterface,
3517 &allocParamsForBufferLinear,
3518 &m_vdencBrcUpdateDmemBuffer[k][i]),
3519 "Failed to create VDENC BrcUpdate DmemBuffer");
3520
3521 CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint8_t *)m_osInterface->pfnLockResource(
3522 m_osInterface,
3523 &m_vdencBrcUpdateDmemBuffer[k][i],
3524 &lockFlagsWriteOnly));
3525
3526 MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
3527 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcUpdateDmemBuffer[k][i]);
3528 }
3529
3530 // BRC init/reset DMEM
3531 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_vdencBrcInitDmemBufferSize, CODECHAL_CACHELINE_SIZE);
3532 allocParamsForBufferLinear.pBufName = "VDENC BrcInit DmemBuffer";
3533
3534 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3535 m_osInterface,
3536 &allocParamsForBufferLinear,
3537 &m_vdencBrcInitDmemBuffer[k]),
3538 "Failed to create VDENC BrcInit DmemBuffer");
3539
3540 CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint8_t *)m_osInterface->pfnLockResource(
3541 m_osInterface,
3542 &m_vdencBrcInitDmemBuffer[k],
3543 &lockFlagsWriteOnly));
3544
3545 MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
3546 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcInitDmemBuffer[k]);
3547
3548 // Const Data buffer
3549 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_vdencBrcConstDataBufferSize, CODECHAL_PAGE_SIZE);
3550 allocParamsForBufferLinear.pBufName = "VDENC BRC Const Data Buffer";
3551
3552 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3553 m_osInterface,
3554 &allocParamsForBufferLinear,
3555 &m_vdencBrcConstDataBuffer[k]),
3556 "Failed to create VDENC BRC Const Data Buffer");
3557
3558 // VDEnc read batch buffer (input for HuC FW)
3559 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE);
3560 allocParamsForBufferLinear.pBufName = "VDENC Read Batch Buffer";
3561
3562 for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
3563 {
3564 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3565 m_osInterface,
3566 &allocParamsForBufferLinear,
3567 &m_vdencReadBatchBuffer[k][i]),
3568 "Failed to allocate VDENC Read Batch Buffer");
3569 }
3570
3571 // Lookahead Update DMEM
3572 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_vdencLaUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
3573 allocParamsForBufferLinear.pBufName = "VDENC Lookahead update Dmem Buffer";
3574
3575 for (auto i = 0; i < CODECHAL_LPLA_NUM_OF_PASSES; i++)
3576 {
3577 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3578 m_osInterface,
3579 &allocParamsForBufferLinear,
3580 &m_vdencLaUpdateDmemBuffer[k][i]),
3581 "Failed to create VDENC Lookahead Update Dmem Buffer");
3582 }
3583 }
3584
3585 for (auto j = 0; j < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; j++)
3586 {
3587 // VDENC uses second level batch buffer
3588 MOS_ZeroMemory(&m_vdenc2ndLevelBatchBuffer[j], sizeof(MHW_BATCH_BUFFER));
3589 m_vdenc2ndLevelBatchBuffer[j].bSecondLevel = true;
3590 CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
3591 m_osInterface,
3592 &m_vdenc2ndLevelBatchBuffer[j],
3593 nullptr,
3594 m_hwInterface->m_vdenc2ndLevelBatchBufferSize));
3595 }
3596
3597 // BRC history buffer
3598 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_brcHistoryBufSize, CODECHAL_PAGE_SIZE);
3599 allocParamsForBufferLinear.pBufName = "VDENC BRC History Buffer";
3600
3601 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3602 m_osInterface,
3603 &allocParamsForBufferLinear,
3604 &m_vdencBrcHistoryBuffer),
3605 "Failed to create VDENC BRC History Buffer");
3606
3607 // Lookahead Init DMEM
3608 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_vdencLaInitDmemBufferSize, CODECHAL_CACHELINE_SIZE);
3609 allocParamsForBufferLinear.pBufName = "VDENC Lookahead Init DmemBuffer";
3610
3611 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3612 m_osInterface,
3613 &allocParamsForBufferLinear,
3614 &m_vdencLaInitDmemBuffer),
3615 "Failed to create VDENC Lookahead Init DmemBuffer");
3616
3617 // Lookahead history buffer
3618 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_LaHistoryBufSize, CODECHAL_PAGE_SIZE);
3619 allocParamsForBufferLinear.pBufName = "VDENC Lookahead History Buffer";
3620
3621 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3622 m_osInterface,
3623 &allocParamsForBufferLinear,
3624 &m_vdencLaHistoryBuffer),
3625 "Failed to create VDENC Lookahead History Buffer");
3626
3627 // Debug buffer
3628 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_brcDebugBufSize, CODECHAL_PAGE_SIZE);
3629 allocParamsForBufferLinear.pBufName = "VDENC BRC Debug Buffer";
3630
3631 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3632 m_osInterface,
3633 &allocParamsForBufferLinear,
3634 &m_vdencBrcDbgBuffer),
3635 "Failed to create VDENC BRC Debug Buffer");
3636
3637 // Output ROI Streamin Buffer
3638 // 16 DWORDs (VDENC_HEVC_VP9_STREAMIN_STATE) for each 32x32 block, maximum region size is 65536 bytes for 8K/8K resolution, currently the allocation size is fixed
3639 allocParamsForBufferLinear.dwBytes = m_roiStreamInBufferSize;
3640 allocParamsForBufferLinear.pBufName = "Output ROI Streamin Buffer";
3641
3642 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
3643 m_osInterface,
3644 &allocParamsForBufferLinear,
3645 &m_vdencOutputROIStreaminBuffer));
3646
3647 // Buffer to store VDEnc frame statistics for lookahead BRC
3648 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_brcLooaheadStatsBufferSize, CODECHAL_PAGE_SIZE);
3649 allocParamsForBufferLinear.pBufName = "VDENC Lookahead Statistics Buffer";
3650
3651 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3652 m_osInterface,
3653 &allocParamsForBufferLinear,
3654 &m_vdencLaStatsBuffer),
3655 "Failed to create VDENC Lookahead Statistics Buffer");
3656
3657 CodechalVdencHevcLaStats *lookaheadInfo = (CodechalVdencHevcLaStats *)m_osInterface->pfnLockResource(
3658 m_osInterface,
3659 &m_vdencLaStatsBuffer,
3660 &lockFlagsWriteOnly);
3661 CODECHAL_ENCODE_CHK_NULL_RETURN(lookaheadInfo);
3662 MOS_ZeroMemory(lookaheadInfo, allocParamsForBufferLinear.dwBytes);
3663 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaStatsBuffer);
3664
3665 // Buffer to store lookahead output
3666 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_brcLooaheadDataBufferSize, CODECHAL_PAGE_SIZE);
3667 allocParamsForBufferLinear.pBufName = "VDENC Lookahead Data Buffer";
3668
3669 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3670 m_osInterface,
3671 &allocParamsForBufferLinear,
3672 &m_vdencLaDataBuffer),
3673 "Failed to create VDENC Lookahead Data Buffer");
3674
3675 CodechalVdencHevcLaData *lookaheadData = (CodechalVdencHevcLaData *)m_osInterface->pfnLockResource(
3676 m_osInterface,
3677 &m_vdencLaDataBuffer,
3678 &lockFlagsWriteOnly);
3679 CODECHAL_ENCODE_CHK_NULL_RETURN(lookaheadData);
3680 MOS_ZeroMemory(lookaheadData, allocParamsForBufferLinear.dwBytes);
3681 m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaDataBuffer);
3682
3683 return eStatus;
3684 }
3685
FreeBrcResources()3686 MOS_STATUS CodechalVdencHevcState::FreeBrcResources()
3687 {
3688 CODECHAL_ENCODE_FUNCTION_ENTER;
3689
3690 if (m_swBrcMode != nullptr)
3691 {
3692 m_osInterface->pfnFreeLibrary(m_swBrcMode);
3693 m_swBrcMode = nullptr;
3694 }
3695
3696 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
3697 {
3698 m_osInterface->pfnFreeResource(
3699 m_osInterface,
3700 &m_vdencBrcBuffers.resBrcPakStatisticBuffer[i]);
3701 }
3702
3703 m_osInterface->pfnFreeResource(m_osInterface, &m_dataFromPicsBuffer);
3704
3705 for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
3706 {
3707 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencDeltaQpBuffer[k]);
3708
3709 for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
3710 {
3711 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencReadBatchBuffer[k][i]);
3712 }
3713
3714 for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES_FOR_TILE_REPLAY; i++)
3715 {
3716 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencBrcUpdateDmemBuffer[k][i]);
3717 }
3718
3719 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencBrcInitDmemBuffer[k]);
3720 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencBrcConstDataBuffer[k]);
3721
3722 for (auto i = 0; i < CODECHAL_LPLA_NUM_OF_PASSES; i++)
3723 {
3724 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencLaUpdateDmemBuffer[k][i]);
3725 }
3726 }
3727
3728 for (auto j = 0; j < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; j++)
3729 {
3730 Mhw_FreeBb(m_osInterface, &m_vdenc2ndLevelBatchBuffer[j], nullptr);
3731 }
3732
3733 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencBrcHistoryBuffer);
3734 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencBrcDbgBuffer);
3735 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencOutputROIStreaminBuffer);
3736 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencLaStatsBuffer);
3737 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencLaDataBuffer);
3738 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencLaInitDmemBuffer);
3739 m_osInterface->pfnFreeResource(m_osInterface, &m_vdencLaHistoryBuffer);
3740
3741 return MOS_STATUS_SUCCESS;
3742 }
3743
Initialize(CodechalSetting * settings)3744 MOS_STATUS CodechalVdencHevcState::Initialize(CodechalSetting * settings)
3745 {
3746 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3747
3748 CODECHAL_ENCODE_FUNCTION_ENTER;
3749
3750 // common initilization
3751 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::Initialize(settings));
3752
3753 m_vdencBrcBuffers.uiCurrBrcPakStasIdxForRead = 0;
3754 //Reading buffer is with 2 frames late for BRC kernel uses the PAK statstic info of the frame before the previous frame
3755 m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite =
3756 (m_vdencBrcBuffers.uiCurrBrcPakStasIdxForRead + 2) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
3757
3758 uint32_t vdencPictureStatesSize = 0, vdencPicturePatchListSize = 0;
3759 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetVdencStateCommandsDataSize(
3760 CODECHAL_ENCODE_MODE_HEVC,
3761 &vdencPictureStatesSize,
3762 &vdencPicturePatchListSize));
3763
3764 //the following code used to calculate ulMBCodeSize:
3765 //pakObjCmdStreamOutDataSize = 2*BYTES_PER_DWORD*(numOfLcu*NUM_PAK_DWS_PER_LCU + numOfLcu*maxNumOfCUperLCU*NUM_DWS_PER_CU); // Multiply by 2 for sideband
3766 //const uint32_t maxNumOfCUperLCU = (64/8)*(64/8);
3767 // NUM_PAK_DWS_PER_LCU 5
3768 // NUM_DWS_PER_CU 8
3769 uint32_t numOfLCU = MOS_ROUNDUP_DIVIDE(m_frameWidth, MAX_LCU_SIZE) * MOS_ROUNDUP_DIVIDE(m_frameHeight, MAX_LCU_SIZE);
3770 m_mbCodeSize = MOS_ALIGN_CEIL(2 * sizeof(uint32_t) * numOfLCU * (5 + 64 * 8), CODECHAL_PAGE_SIZE);
3771
3772 m_defaultPictureStatesSize += vdencPictureStatesSize;
3773 m_defaultPicturePatchListSize += vdencPicturePatchListSize;
3774 m_extraPictureStatesSize += m_hwInterface->m_hucCommandBufferSize; // For slice size reporting, add the HuC copy commands
3775
3776 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
3777 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3778 MOS_UserFeature_ReadValue_ID(
3779 nullptr,
3780 __MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID,
3781 &userFeatureData,
3782 m_osInterface->pOsContext);
3783 m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false;
3784
3785 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3786 MOS_UserFeature_ReadValue_ID(
3787 nullptr,
3788 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_RDOQ_ENABLE_ID,
3789 &userFeatureData,
3790 m_osInterface->pOsContext);
3791 m_hevcRdoqEnabled = userFeatureData.i32Data ? true : false;
3792
3793 // Multi-Pass BRC
3794 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3795 MOS_UserFeature_ReadValue_ID(
3796 nullptr,
3797 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_MULTIPASS_BRC_ENABLE_ID,
3798 &userFeatureData,
3799 m_osInterface->pOsContext);
3800 m_multipassBrcSupported = (userFeatureData.i32Data) ? true : false;
3801
3802 if (m_codecFunction != CODECHAL_FUNCTION_PAK)
3803 {
3804 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3805 userFeatureData.i32Data = 1;
3806 userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
3807 MOS_UserFeature_ReadValue_ID(
3808 nullptr,
3809 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ME_ENABLE_ID,
3810 &userFeatureData,
3811 m_osInterface->pOsContext);
3812 m_hmeSupported = (userFeatureData.i32Data) ? true : false;
3813
3814 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3815 userFeatureData.i32Data = 1;
3816 userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
3817 MOS_UserFeature_ReadValue_ID(
3818 nullptr,
3819 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_16xME_ENABLE_ID,
3820 &userFeatureData,
3821 m_osInterface->pOsContext);
3822 m_16xMeSupported = (userFeatureData.i32Data) ? true : false;
3823 }
3824
3825 if (m_codecFunction == CODECHAL_FUNCTION_ENC_VDENC_PAK)
3826 {
3827 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3828 MOS_UserFeature_ReadValue_ID(
3829 nullptr,
3830 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_ACQP_ENABLE_ID,
3831 &userFeatureData,
3832 m_osInterface->pOsContext);
3833 m_hevcVdencAcqpEnabled = userFeatureData.i32Data ? true : false;
3834
3835 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3836 MOS_UserFeature_ReadValue_ID(
3837 nullptr,
3838 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_VQI_ENABLE_ID,
3839 &userFeatureData,
3840 m_osInterface->pOsContext);
3841 m_hevcVisualQualityImprovement = userFeatureData.i32Data ? true : false;
3842
3843 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3844 MOS_UserFeature_ReadValue_ID(
3845 nullptr,
3846 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_ROUNDING_ENABLE_ID,
3847 &userFeatureData,
3848 m_osInterface->pOsContext);
3849 m_hevcVdencRoundingEnabled = userFeatureData.i32Data ? true : false;
3850
3851 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3852 MOS_UserFeature_ReadValue_ID(
3853 nullptr,
3854 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_PAKOBJCMD_STREAMOUT_ENABLE_ID,
3855 &userFeatureData,
3856 m_osInterface->pOsContext);
3857 m_vdencPakObjCmdStreamOutEnabled = userFeatureData.i32Data ? true : false;
3858
3859 #if (_DEBUG || _RELEASE_INTERNAL)
3860 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3861 MOS_UserFeature_ReadValue_ID(
3862 nullptr,
3863 __MEDIA_USER_FEATURE_VALUE_ENCODE_CQM_QP_THRESHOLD_ID,
3864 &userFeatureData,
3865 m_osInterface->pOsContext);
3866 m_cqmQpThreshold = (uint8_t)userFeatureData.u32Data;
3867 #endif
3868 }
3869
3870 m_minScaledDimension = CODECHAL_ENCODE_MIN_SCALED_SURFACE_SIZE;
3871 m_minScaledDimensionInMb = (CODECHAL_ENCODE_MIN_SCALED_SURFACE_SIZE + 15) >> 4;
3872
3873 if (m_frameWidth < 128 || m_frameHeight < 128)
3874 {
3875 m_16xMeSupported = false;
3876 m_32xMeSupported = false;
3877 }
3878
3879 else if (m_frameWidth < 512 || m_frameHeight < 512)
3880 {
3881 m_16xMeSupported = true;
3882 m_32xMeSupported = false;
3883 }
3884
3885 else
3886 {
3887 m_16xMeSupported = true;
3888 m_32xMeSupported = true;
3889 }
3890
3891 if (m_16xMeSupported)
3892 {
3893 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3894 MOS_UserFeature_ReadValue_ID(
3895 nullptr,
3896 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_16xME_ENABLE_ID,
3897 &userFeatureData,
3898 m_osInterface->pOsContext);
3899 m_16xMeSupported = (userFeatureData.i32Data) ? true : false;
3900 }
3901
3902 if (m_32xMeSupported)
3903 {
3904 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3905 MOS_UserFeature_ReadValue_ID(
3906 nullptr,
3907 __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_32xME_ENABLE_ID,
3908 &userFeatureData,
3909 m_osInterface->pOsContext);
3910 m_32xMeSupported = (userFeatureData.i32Data) ? true : false;
3911 }
3912
3913 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3914 MOS_UserFeature_ReadValue_ID(
3915 nullptr,
3916 __MEDIA_USER_FEATURE_VALUE_ENCODE_LA_SOFTWARE_ID,
3917 &userFeatureData,
3918 m_osInterface->pOsContext);
3919
3920 if (userFeatureData.i32Data)
3921 {
3922 MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
3923 char path_buffer[256];
3924 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3925 MOS_ZeroMemory(path_buffer, 256);
3926 userFeatureData.StringData.pStringData = path_buffer;
3927
3928 statusKey = MOS_UserFeature_ReadValue_ID(
3929 nullptr,
3930 __MEDIA_USER_FEATURE_VALUE_ENCODE_LA_SOFTWARE_PATH_ID,
3931 &userFeatureData,
3932 m_osInterface->pOsContext);
3933
3934 if (statusKey == MOS_STATUS_SUCCESS && userFeatureData.StringData.uSize > 0)
3935 {
3936 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnLoadLibrary(m_osInterface, path_buffer, &m_swLaMode));
3937 }
3938 }
3939
3940 // SW LA DLL Reporting
3941 CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENCODE_LA_SOFTWARE_IN_USE_ID, (m_swLaMode == nullptr) ? false : true, m_osInterface->pOsContext);
3942
3943 return eStatus;
3944 }
3945
CodechalVdencHevcState(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)3946 CodechalVdencHevcState::CodechalVdencHevcState(
3947 CodechalHwInterface* hwInterface,
3948 CodechalDebugInterface* debugInterface,
3949 PCODECHAL_STANDARD_INFO standardInfo)
3950 :CodechalEncodeHevcBase(hwInterface, debugInterface, standardInfo)
3951 {
3952 m_fieldScalingOutputInterleaved = false;
3953 m_2xMeSupported = false;
3954 m_combinedDownScaleAndDepthConversion = false;
3955 m_vdencBrcStatsBufferSize = m_brcStatsBufSize;
3956 m_vdencBrcPakStatsBufferSize = m_brcPakStatsBufSize;
3957 m_vdencLaInitDmemBufferSize = sizeof(CodechalVdencHevcLaDmem);
3958 m_vdencLaUpdateDmemBufferSize = sizeof(CodechalVdencHevcLaDmem);
3959
3960 MOS_ZeroMemory(&m_sliceCountBuffer, sizeof(m_sliceCountBuffer));
3961 MOS_ZeroMemory(&m_vdencModeTimerBuffer, sizeof(m_vdencModeTimerBuffer));
3962
3963 MOS_ZeroMemory(&m_vdencBrcBuffers, sizeof(m_vdencBrcBuffers));
3964 MOS_ZeroMemory(&m_dataFromPicsBuffer, sizeof(m_dataFromPicsBuffer));
3965 MOS_ZeroMemory(&m_vdencDeltaQpBuffer, sizeof(m_vdencDeltaQpBuffer));
3966 MOS_ZeroMemory(&m_vdencOutputROIStreaminBuffer, sizeof(m_vdencOutputROIStreaminBuffer));
3967 MOS_ZeroMemory(m_vdencBrcUpdateDmemBuffer, sizeof(m_vdencBrcUpdateDmemBuffer));
3968 MOS_ZeroMemory(&m_vdencBrcInitDmemBuffer, sizeof(m_vdencBrcInitDmemBuffer));
3969 MOS_ZeroMemory(&m_vdencBrcConstDataBuffer, sizeof(m_vdencBrcConstDataBuffer));
3970 MOS_ZeroMemory(&m_vdencBrcHistoryBuffer, sizeof(m_vdencBrcHistoryBuffer));
3971 MOS_ZeroMemory(&m_vdencReadBatchBuffer, sizeof(m_vdencReadBatchBuffer));
3972 MOS_ZeroMemory(&m_vdencReadBatchBuffer, sizeof(m_vdencGroup3BatchBuffer));
3973 MOS_ZeroMemory(&m_vdencBrcDbgBuffer, sizeof(m_vdencBrcDbgBuffer));
3974 MOS_ZeroMemory(&m_vdenc2ndLevelBatchBuffer, sizeof(m_vdenc2ndLevelBatchBuffer));
3975 MOS_ZeroMemory(m_resSliceReport, sizeof(m_resSliceReport));
3976 MOS_ZeroMemory(&m_vdencLaStatsBuffer, sizeof(m_vdencLaStatsBuffer));
3977 MOS_ZeroMemory(&m_vdencLaDataBuffer, sizeof(m_vdencLaDataBuffer));
3978
3979 }
3980
StoreHucErrorStatus(MmioRegistersHuc * mmioRegisters,PMOS_COMMAND_BUFFER cmdBuffer,bool addToEncodeStatus)3981 MOS_STATUS CodechalVdencHevcState::StoreHucErrorStatus(MmioRegistersHuc* mmioRegisters, PMOS_COMMAND_BUFFER cmdBuffer, bool addToEncodeStatus)
3982 {
3983 // Write Huc Error Flag mask: DW1 (mask value)
3984 MHW_MI_STORE_DATA_PARAMS storeDataParams;
3985 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3986 storeDataParams.pOsResource = &m_resHucErrorStatusBuffer;
3987 storeDataParams.dwResourceOffset = sizeof(uint32_t);
3988 storeDataParams.dwValue = CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_MEMORY_ACCESS_ERROR_MASK;
3989 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
3990
3991 // store HUC_STATUS register: DW0 (actual value)
3992 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
3993 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
3994 storeRegParams.presStoreBuffer = &m_resHucErrorStatusBuffer;
3995 storeRegParams.dwOffset = 0;
3996 storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
3997 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams));
3998
3999 if(addToEncodeStatus)
4000 {
4001 EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
4002
4003 uint32_t baseOffset =
4004 (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource
4005
4006 // store HUC_STATUS register
4007 MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
4008 MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
4009 storeRegParams.presStoreBuffer = &encodeStatusBuf.resStatusBuffer;
4010 storeRegParams.dwOffset = baseOffset + encodeStatusBuf.dwHuCStatusRegOffset;
4011 storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
4012 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(
4013 cmdBuffer,
4014 &storeRegParams));
4015 }
4016
4017 return MOS_STATUS_SUCCESS;
4018 }
4019
PrepareHWMetaData(PMOS_COMMAND_BUFFER cmdBuffer)4020 MOS_STATUS CodechalVdencHevcState::PrepareHWMetaData(PMOS_COMMAND_BUFFER cmdBuffer)
4021 {
4022 CODECHAL_ENCODE_FUNCTION_ENTER;
4023 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4024
4025 if (!m_presMetadataBuffer)
4026 {
4027 return MOS_STATUS_SUCCESS;
4028 }
4029
4030 // Intra/Inter/Skip CU Cnt /SubregionSliceSizeCalc
4031 auto xCalAtomic = [&](PMOS_RESOURCE presDst, uint32_t dstOffset, PMOS_RESOURCE presSrc, uint32_t srcOffset, MHW_COMMON_MI_ATOMIC_OPCODE opCode) {
4032 auto mmioRegistersMfx = m_mfxInterface->GetMmioRegisters(m_vdboxIndex);
4033 MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
4034 MHW_MI_FLUSH_DW_PARAMS flushDwParams;
4035 MHW_MI_ATOMIC_PARAMS atomicParams;
4036
4037 MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
4038 MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
4039 MOS_ZeroMemory(&atomicParams, sizeof(atomicParams));
4040
4041 miLoadRegMemParams.presStoreBuffer = presSrc;
4042 miLoadRegMemParams.dwOffset = srcOffset;
4043 miLoadRegMemParams.dwRegister = mmioRegistersMfx->generalPurposeRegister0LoOffset;
4044 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(cmdBuffer, &miLoadRegMemParams));
4045
4046 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
4047
4048 atomicParams.pOsResource = presDst;
4049 atomicParams.dwResourceOffset = dstOffset;
4050 atomicParams.dwDataSize = sizeof(uint32_t);
4051 atomicParams.Operation = opCode;
4052 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd(cmdBuffer, &atomicParams));
4053
4054 return MOS_STATUS_SUCCESS;
4055 };
4056
4057 MHW_MI_STORE_DATA_PARAMS storeDataParams;
4058 MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
4059 storeDataParams.pOsResource = m_presMetadataBuffer;
4060 storeDataParams.dwResourceOffset = m_metaDataOffset.dwEncodeErrorFlags;
4061 storeDataParams.dwValue = 0; // No error
4062 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
4063
4064 storeDataParams.dwResourceOffset = m_metaDataOffset.dwWrittenSubregionsCount;
4065 storeDataParams.dwValue = m_numSlices;
4066 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
4067
4068 MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
4069 MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
4070 for (uint16_t slcCount = 0; slcCount < m_numSlices; slcCount++)
4071 {
4072 uint32_t subRegionSartOffset = m_metaDataOffset.dwMetaDataSize + slcCount*m_metaDataOffset.dwMetaDataSubRegionSize;
4073
4074 storeDataParams.dwResourceOffset = subRegionSartOffset + m_metaDataOffset.dwbStartOffset;
4075 storeDataParams.dwValue = 0;
4076 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
4077
4078 storeDataParams.dwResourceOffset = subRegionSartOffset + m_metaDataOffset.dwbHeaderSize;
4079 storeDataParams.dwValue = m_slcData[slcCount].BitSize;
4080 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
4081
4082 miCpyMemMemParams.presSrc = &m_resLcuBaseAddressBuffer;
4083 miCpyMemMemParams.presDst = m_presMetadataBuffer;
4084 miCpyMemMemParams.dwSrcOffset = slcCount * 16 * sizeof(uint32_t); //slice size offset in resLcuBaseAddressBuffer is 16DW
4085 miCpyMemMemParams.dwDstOffset = subRegionSartOffset + m_metaDataOffset.dwbSize;
4086 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer,&miCpyMemMemParams));
4087 if (slcCount)
4088 {
4089 CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, subRegionSartOffset + m_metaDataOffset.dwbSize,
4090 &m_resLcuBaseAddressBuffer, (slcCount - 1) * 16 * sizeof(uint32_t), MHW_MI_ATOMIC_SUB));
4091 }
4092 }
4093 auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
4094 CODECHAL_ENCODE_CHK_NULL_RETURN(mmioRegisters);
4095 MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
4096 MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
4097 miStoreRegMemParams.presStoreBuffer = m_presMetadataBuffer;
4098 miStoreRegMemParams.dwOffset = m_metaDataOffset.dwEncodedBitstreamWrittenBytesCount;
4099 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset;
4100 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
4101
4102 // Statistics
4103 // Average QP
4104 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
4105 {
4106 storeDataParams.dwResourceOffset = m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwAverageQP;
4107 storeDataParams.dwValue = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
4108 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
4109 }
4110 else
4111 {
4112 miCpyMemMemParams.presSrc = &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource;
4113 miCpyMemMemParams.dwSrcOffset = 0x6F * sizeof(uint32_t);
4114 miCpyMemMemParams.presDst = m_presMetadataBuffer;
4115 miCpyMemMemParams.dwDstOffset = m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwAverageQP;
4116 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer,&miCpyMemMemParams));
4117
4118 MHW_MI_ATOMIC_PARAMS atomicParams;
4119 MOS_ZeroMemory((&atomicParams), sizeof(atomicParams));
4120 atomicParams.pOsResource = m_presMetadataBuffer;
4121 atomicParams.dwResourceOffset = m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwAverageQP;
4122 atomicParams.dwDataSize = sizeof(uint32_t);
4123 atomicParams.Operation = MHW_MI_ATOMIC_AND;
4124 atomicParams.bInlineData = true;
4125 atomicParams.dwOperand1Data[0] = 0xFF;
4126 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd(cmdBuffer, &atomicParams));
4127 }
4128
4129 // LCUSkipIn8x8Unit
4130 miCpyMemMemParams.presSrc = &m_resFrameStatStreamOutBuffer;
4131 miCpyMemMemParams.dwSrcOffset = 7 * sizeof(uint32_t);
4132 miCpyMemMemParams.presDst = m_presMetadataBuffer;
4133 miCpyMemMemParams.dwDstOffset = m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwSkipCodingUnitsCount;
4134 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer,&miCpyMemMemParams));
4135 CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwSkipCodingUnitsCount,
4136 &m_resFrameStatStreamOutBuffer, 7 * sizeof(uint32_t), MHW_MI_ATOMIC_ADD));
4137 CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwSkipCodingUnitsCount,
4138 &m_resFrameStatStreamOutBuffer, 7 * sizeof(uint32_t), MHW_MI_ATOMIC_ADD));
4139 CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwSkipCodingUnitsCount,
4140 &m_resFrameStatStreamOutBuffer, 7 * sizeof(uint32_t), MHW_MI_ATOMIC_ADD));
4141
4142 // NumCU_IntraDC, NumCU_IntraPlanar, NumCU_IntraAngular
4143 miCpyMemMemParams.presSrc = &m_resFrameStatStreamOutBuffer;
4144 miCpyMemMemParams.dwSrcOffset = 20 * sizeof(uint32_t);
4145 miCpyMemMemParams.dwDstOffset = m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwIntraCodingUnitsCount;
4146 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer,&miCpyMemMemParams));
4147 CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwIntraCodingUnitsCount,
4148 &m_resFrameStatStreamOutBuffer, 21 * sizeof(uint32_t), MHW_MI_ATOMIC_ADD));
4149 CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwIntraCodingUnitsCount,
4150 &m_resFrameStatStreamOutBuffer, 22 * sizeof(uint32_t), MHW_MI_ATOMIC_ADD));
4151
4152 //NumCU_Merge (LCUSkipIn8x8Unit), NumCU_MVdirL0, NumCU_MVdirL1, NumCU_MVdirBi
4153 miCpyMemMemParams.presSrc = &m_resFrameStatStreamOutBuffer;
4154 miCpyMemMemParams.dwSrcOffset = 27 * sizeof(uint32_t);
4155 miCpyMemMemParams.dwDstOffset = m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwInterCodingUnitsCount;
4156 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer,&miCpyMemMemParams));
4157 CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwInterCodingUnitsCount,
4158 &m_resFrameStatStreamOutBuffer, 28 * sizeof(uint32_t), MHW_MI_ATOMIC_ADD));
4159 CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwInterCodingUnitsCount,
4160 &m_resFrameStatStreamOutBuffer, 29 * sizeof(uint32_t), MHW_MI_ATOMIC_ADD));
4161 CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwInterCodingUnitsCount,
4162 &m_resFrameStatStreamOutBuffer, 30 * sizeof(uint32_t), MHW_MI_ATOMIC_ADD));
4163 CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwInterCodingUnitsCount,
4164 m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwSkipCodingUnitsCount, MHW_MI_ATOMIC_SUB));
4165
4166 // Average MV_X/MV_Y, report (0,0) as temp solution, later may need kernel involved
4167 storeDataParams.dwResourceOffset = m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwAverageMotionEstimationXDirection;
4168 storeDataParams.dwValue = 0;
4169 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
4170
4171 storeDataParams.dwResourceOffset = m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwAverageMotionEstimationYDirection;
4172 storeDataParams.dwValue = 0;
4173 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
4174
4175 return eStatus;
4176 }
4177
SetupForceIntraStreamIn(PMOS_RESOURCE streamIn)4178 MOS_STATUS CodechalVdencHevcState::SetupForceIntraStreamIn(PMOS_RESOURCE streamIn)
4179 {
4180 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4181
4182 CODECHAL_ENCODE_FUNCTION_ENTER;
4183
4184 CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
4185
4186 MOS_LOCK_PARAMS lockFlags;
4187 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4188 lockFlags.WriteOnly = true;
4189
4190 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
4191 m_osInterface,
4192 streamIn,
4193 &lockFlags);
4194 CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4195
4196 MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
4197 uint32_t streamInWidth = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32);
4198 uint32_t streamInHeight = (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32);
4199
4200 if (m_lookaheadPass)
4201 {
4202 // lookahead pass should lower QP by 2 to encode force intra frame.
4203 MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
4204 streaminDataParams.setQpRoiCtrl = true;
4205 streaminDataParams.forceQp[0] = m_hevcPicParams->QpY - 2;
4206 streaminDataParams.forceQp[1] = m_hevcPicParams->QpY - 2;
4207 streaminDataParams.forceQp[2] = m_hevcPicParams->QpY - 2;
4208 streaminDataParams.forceQp[3] = m_hevcPicParams->QpY - 2;
4209 SetStreaminDataPerRegion(streamInWidth, 0, streamInHeight, 0, streamInWidth, &streaminDataParams, data);
4210 }
4211
4212 MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
4213 streaminDataParams.puTypeCtrl = 1; //force intra
4214 streaminDataParams.maxTuSize = 3;
4215 streaminDataParams.maxCuSize = 3;
4216 switch (m_hevcSeqParams->TargetUsage)
4217 {
4218 case 1:
4219 case 4:
4220 streaminDataParams.numMergeCandidateCu64x64 = 4;
4221 streaminDataParams.numMergeCandidateCu32x32 = 3;
4222 streaminDataParams.numMergeCandidateCu16x16 = 2;
4223 streaminDataParams.numMergeCandidateCu8x8 = 1;
4224 streaminDataParams.numImePredictors = m_imgStateImePredictors;
4225 break;
4226 case 7:
4227 streaminDataParams.numMergeCandidateCu64x64 = 2;
4228 streaminDataParams.numMergeCandidateCu32x32 = 2;
4229 streaminDataParams.numMergeCandidateCu16x16 = 2;
4230 streaminDataParams.numMergeCandidateCu8x8 = 0;
4231 streaminDataParams.numImePredictors = 4;
4232 break;
4233 }
4234
4235 uint32_t streamInNumCUs = streamInWidth * streamInHeight;
4236 for (uint32_t i = 0; i < streamInNumCUs; i++)
4237 {
4238 SetStreaminDataPerLcu(&streaminDataParams, data + (i * 64));
4239 }
4240
4241 m_osInterface->pfnUnlockResource(
4242 m_osInterface,
4243 streamIn);
4244
4245 return eStatus;
4246 }
4247
4248 #if USE_CODECHAL_DEBUG_TOOL
DumpHucBrcInit()4249 MOS_STATUS CodechalVdencHevcState::DumpHucBrcInit()
4250 {
4251 CODECHAL_ENCODE_FUNCTION_ENTER;
4252 int32_t currentPass = GetCurrentPass();
4253 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
4254 &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx],
4255 m_vdencBrcInitDmemBufferSize,
4256 currentPass,
4257 hucRegionDumpInit));
4258
4259 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4260 &m_vdencBrcHistoryBuffer,
4261 0,
4262 CODECHAL_VDENC_HEVC_BRC_HISTORY_BUF_SIZE,
4263 0,
4264 "_History",
4265 true,
4266 currentPass,
4267 hucRegionDumpInit));
4268 return MOS_STATUS_SUCCESS;
4269 }
4270
DumpHucBrcUpdate(bool isInput)4271 MOS_STATUS CodechalVdencHevcState::DumpHucBrcUpdate(bool isInput)
4272 {
4273 CODECHAL_ENCODE_FUNCTION_ENTER;
4274 int32_t currentPass = GetCurrentPass();
4275 if (isInput)
4276 {
4277 //Dump HucBrcUpdate input buffers
4278 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
4279 &m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][currentPass],
4280 m_vdencBrcUpdateDmemBufferSize,
4281 currentPass,
4282 hucRegionDumpUpdate));
4283
4284 // Region 1 - VDENC Statistics Buffer dump
4285 auto vdencStatusBuffer = m_virtualAddrParams.regionParams[1].presRegion;
4286 auto vdencStatusOffset = m_virtualAddrParams.regionParams[1].dwOffset;
4287 if (vdencStatusBuffer)
4288 {
4289 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4290 vdencStatusBuffer,
4291 vdencStatusOffset,
4292 m_vdencBrcStatsBufferSize,
4293 1,
4294 "_VdencStats",
4295 true,
4296 currentPass,
4297 hucRegionDumpUpdate));
4298 }
4299
4300 // Region 2 - PAK Statistics Buffer dump
4301 auto frameStatStreamOutBuffer = m_virtualAddrParams.regionParams[2].presRegion;
4302 auto frameStatStreamOutOffset = m_virtualAddrParams.regionParams[2].dwOffset;
4303 if (frameStatStreamOutBuffer)
4304 {
4305 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4306 frameStatStreamOutBuffer,
4307 frameStatStreamOutOffset,
4308 m_vdencBrcPakStatsBufferSize,
4309 2,
4310 "_PakStats",
4311 true,
4312 currentPass,
4313 hucRegionDumpUpdate));
4314 }
4315
4316 // Region 3 - Input SLB Buffer
4317 auto vdencReadBatchBuffer = m_virtualAddrParams.regionParams[3].presRegion;
4318 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4319 vdencReadBatchBuffer,
4320 0,
4321 m_hwInterface->m_vdencReadBatchBufferSize,
4322 3,
4323 "_Slb",
4324 true,
4325 currentPass,
4326 hucRegionDumpUpdate));
4327
4328 // Region 4 - Constant Data Buffer dump
4329 auto vdencBrcConstDataBuffer = m_virtualAddrParams.regionParams[4].presRegion;
4330 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4331 vdencBrcConstDataBuffer,
4332 0,
4333 m_vdencBrcConstDataBufferSize,
4334 4,
4335 "_ConstData",
4336 true,
4337 currentPass,
4338 hucRegionDumpUpdate));
4339
4340 // Region 7 - Slice Stat Streamout (Input)
4341 auto lucBasedAddressBuffer = m_virtualAddrParams.regionParams[7].presRegion;
4342 auto lucBasedAddressOffset = m_virtualAddrParams.regionParams[7].dwOffset;
4343 if (lucBasedAddressBuffer)
4344 {
4345 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4346 lucBasedAddressBuffer,
4347 lucBasedAddressOffset,
4348 CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6 * CODECHAL_CACHELINE_SIZE,
4349 7,
4350 "_SliceStat",
4351 true,
4352 currentPass,
4353 hucRegionDumpUpdate));
4354 }
4355
4356 // Region 8 - PAK MMIO Buffer dump
4357 auto pakInfoBufffer = m_virtualAddrParams.regionParams[8].presRegion;
4358 if (pakInfoBufffer)
4359 {
4360 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4361 pakInfoBufffer,
4362 0,
4363 sizeof(CodechalVdencHevcPakInfo),
4364 8,
4365 "_PakMmio",
4366 true,
4367 currentPass,
4368 hucRegionDumpUpdate));
4369 }
4370
4371 // Region 9 - Streamin Buffer for ROI (Input)
4372 auto streamInBufferSize = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32) * (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32) * CODECHAL_CACHELINE_SIZE;
4373 auto stramInBuffer = m_virtualAddrParams.regionParams[9].presRegion;
4374 if (stramInBuffer)
4375 {
4376 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4377 stramInBuffer,
4378 0,
4379 streamInBufferSize,
4380 9,
4381 "_RoiStreamin",
4382 true,
4383 currentPass,
4384 hucRegionDumpUpdate));
4385 }
4386
4387 // Region 10 - Delta QP for ROI Buffer
4388 auto vdencDeltaQpBuffer = m_virtualAddrParams.regionParams[10].presRegion;
4389 if (vdencDeltaQpBuffer)
4390 {
4391 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4392 vdencDeltaQpBuffer,
4393 0,
4394 m_deltaQpRoiBufferSize,
4395 10,
4396 "_DeltaQp",
4397 true,
4398 currentPass,
4399 hucRegionDumpUpdate));
4400 }
4401
4402 // Region 12 - Input SLB Buffer
4403 auto slbBuffer = m_virtualAddrParams.regionParams[12].presRegion;
4404 if (slbBuffer)
4405 {
4406 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4407 slbBuffer,
4408 0,
4409 m_hwInterface->m_vdencGroup3BatchBufferSize,
4410 12,
4411 "_Slb",
4412 true,
4413 currentPass,
4414 hucRegionDumpUpdate));
4415 }
4416 }
4417 else
4418 {
4419 // Region 5 - Output SLB Buffer
4420 auto vdenc2ndLevelBatchBuffer = m_virtualAddrParams.regionParams[5].presRegion;
4421 if (vdenc2ndLevelBatchBuffer)
4422 {
4423 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4424 vdenc2ndLevelBatchBuffer,
4425 0,
4426 m_hwInterface->m_vdenc2ndLevelBatchBufferSize,
4427 5,
4428 "_Slb",
4429 false,
4430 currentPass,
4431 hucRegionDumpUpdate));
4432 }
4433
4434 // Region 11 - Output ROI Streamin Buffer
4435 auto vdencOutputROIStreaminBuffer = m_virtualAddrParams.regionParams[11].presRegion;
4436 if (vdencOutputROIStreaminBuffer)
4437 {
4438 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4439 vdencOutputROIStreaminBuffer,
4440 0,
4441 m_roiStreamInBufferSize,
4442 11,
4443 "_RoiStreamin",
4444 false,
4445 currentPass,
4446 hucRegionDumpUpdate));
4447 }
4448 }
4449
4450 // Region 0 - History Buffer dump (Input/Output)
4451 auto vdencBrcHistoryBuffer = m_virtualAddrParams.regionParams[0].presRegion;
4452 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4453 vdencBrcHistoryBuffer,
4454 0,
4455 m_brcHistoryBufSize,
4456 0,
4457 "_History",
4458 isInput,
4459 currentPass,
4460 hucRegionDumpUpdate));
4461
4462 // Region 6 - Data from Pictures for Weighted Prediction (Input/Output)
4463 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4464 &m_dataFromPicsBuffer,
4465 0,
4466 CODECHAL_PAGE_SIZE * 4,
4467 6,
4468 "_PicsData",
4469 isInput,
4470 currentPass,
4471 hucRegionDumpUpdate));
4472
4473 // Region 15 - Debug Output
4474 auto debugBuffer = m_virtualAddrParams.regionParams[15].presRegion;
4475 if (debugBuffer)
4476 {
4477 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4478 debugBuffer,
4479 0,
4480 0x1000,
4481 15,
4482 "_Debug",
4483 isInput,
4484 currentPass,
4485 hucRegionDumpUpdate));
4486 }
4487 return MOS_STATUS_SUCCESS;
4488 }
4489
DumpVdencOutputs()4490 MOS_STATUS CodechalVdencHevcState::DumpVdencOutputs()
4491 {
4492 CODECHAL_ENCODE_FUNCTION_ENTER;
4493
4494 // Dump VDENC Stats Buffer
4495 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4496 m_resVdencStatsBuffer,
4497 CodechalDbgAttr::attrVdencOutput,
4498 "_Stats",
4499 m_vdencBrcStatsBufferSize,
4500 0,
4501 CODECHAL_NUM_MEDIA_STATES));
4502
4503 // Dump PAK Stats Buffer
4504 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4505 m_resPakStatsBuffer,
4506 CodechalDbgAttr::attrVdencOutput,
4507 "_PakStats",
4508 m_vdencBrcPakStatsBufferSize,
4509 0,
4510 CODECHAL_NUM_MEDIA_STATES));
4511
4512 // Dump PAK MMIO Buffer
4513 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4514 &m_resPakMmioBuffer,
4515 CodechalDbgKernel::kernelBrcUpdate,
4516 m_currPass ? "_MmioReg_Output_Pass1" : "_MmioReg_Output_Pass0",
4517 sizeof(VdencBrcPakMmio),
4518 0,
4519 CODECHAL_NUM_MEDIA_STATES));
4520
4521 // Dump PAK Obj Cmd Buffer
4522 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4523 m_resVdencPakObjCmdStreamOutBuffer,
4524 CodechalDbgAttr::attrVdencOutput,
4525 "_MbCode",
4526 m_mvOffset,
4527 0,
4528 CODECHAL_NUM_MEDIA_STATES));
4529
4530 // Dump CU Record Cmd Buffer
4531 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4532 m_resVdencPakObjCmdStreamOutBuffer,
4533 CodechalDbgAttr::attrVdencOutput,
4534 "_CURecord",
4535 m_mbCodeSize - m_mvOffset,
4536 m_mvOffset,
4537 CODECHAL_NUM_MEDIA_STATES));
4538
4539 // Slice Size Conformance
4540 if (m_hevcSeqParams->SliceSizeControl)
4541 {
4542 uint32_t dwSize = CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6*CODECHAL_CACHELINE_SIZE;
4543 if (!m_hevcPicParams->tiles_enabled_flag)
4544 {
4545 // Slice Size StreamOut Surface
4546 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4547 &m_resLcuBaseAddressBuffer,
4548 CodechalDbgAttr::attrVdencOutput,
4549 "_SliceSize",
4550 dwSize,
4551 0,
4552 CODECHAL_NUM_MEDIA_STATES));
4553 }
4554
4555 dwSize = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
4556 // Slice Count buffer 1 DW = 4 Bytes
4557 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4558 m_resSliceCountBuffer,
4559 CodechalDbgAttr::attrVdencOutput,
4560 "_SliceCount",
4561 dwSize,
4562 0,
4563 CODECHAL_NUM_MEDIA_STATES));
4564
4565 // VDEncMode Timer buffer 1 DW = 4 Bytes
4566 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4567 m_resVdencModeTimerBuffer,
4568 CodechalDbgAttr::attrVdencOutput,
4569 "_ModeTimer",
4570 dwSize,
4571 0,
4572 CODECHAL_NUM_MEDIA_STATES));
4573 }
4574
4575 return MOS_STATUS_SUCCESS;
4576 }
4577
ModifyEncodedFrameSizeWithFakeHeaderSize(PMOS_COMMAND_BUFFER cmdBuffer,uint32_t fakeHeaderSizeInByte,PMOS_RESOURCE resBrcUpdateCurbe,uint32_t targetSizePos,PMOS_RESOURCE resPakStat,uint32_t slcHrdSizePos)4578 MOS_STATUS CodechalVdencHevcState::ModifyEncodedFrameSizeWithFakeHeaderSize(
4579 PMOS_COMMAND_BUFFER cmdBuffer,
4580 uint32_t fakeHeaderSizeInByte,
4581 PMOS_RESOURCE resBrcUpdateCurbe,
4582 uint32_t targetSizePos,
4583 PMOS_RESOURCE resPakStat,
4584 uint32_t slcHrdSizePos
4585 )
4586 {
4587 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4588
4589 CODECHAL_ENCODE_FUNCTION_ENTER;
4590
4591 //calculate slice headers size
4592 PCODEC_ENCODER_SLCDATA slcData = m_slcData;
4593 CODECHAL_ENCODE_CHK_NULL_RETURN(slcData);
4594 uint32_t totalSliceHeaderSize = 0;
4595 for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
4596 {
4597 totalSliceHeaderSize += (slcData->BitSize + 7) >> 3;
4598 slcData++;
4599 }
4600
4601 uint32_t firstHdrSz = 0;
4602 for (uint32_t i = 0; i < m_encodeParams.uiNumNalUnits; i++)
4603 {
4604 firstHdrSz += m_encodeParams.ppNALUnitParams[i]->uiSize;
4605 }
4606
4607 totalSliceHeaderSize += firstHdrSz;
4608
4609 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddBufferWithIMMValue(
4610 cmdBuffer,
4611 resBrcUpdateCurbe,
4612 targetSizePos,
4613 fakeHeaderSizeInByte - totalSliceHeaderSize,
4614 true));
4615
4616 CODECHAL_ENCODE_CHK_STATUS_RETURN(AddBufferWithIMMValue(
4617 cmdBuffer,
4618 resPakStat,
4619 slcHrdSizePos,
4620 fakeHeaderSizeInByte * 8,
4621 true));
4622
4623 return eStatus;
4624 }
4625 #endif
4626