1 /*
2 * Copyright (c) 2017-2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     codechal_vdenc_hevc.cpp
24 //! \brief    Defines base class for HEVC VDEnc encoder.
25 //!
26 
27 #include "codechal_vdenc_hevc.h"
28 #if USE_CODECHAL_DEBUG_TOOL
29 #include "codechal_debug_kernel.h"
30 #endif
31 
32 //!< \cond SKIP_DOXYGEN
33 const uint8_t CodechalVdencHevcState::m_estRateThreshP0[7] =
34 {
35     4, 8, 12, 16, 20, 24, 28
36 };
37 
38 const uint8_t CodechalVdencHevcState::m_estRateThreshB0[7] =
39 {
40     4, 8, 12, 16, 20, 24, 28
41 };
42 
43 const uint8_t CodechalVdencHevcState::m_estRateThreshI0[7] =
44 {
45     4, 8, 12, 16, 20, 24, 28
46 };
47 
48 const int8_t CodechalVdencHevcState::m_instRateThreshP0[4] =
49 {
50     40, 60, 80, 120
51 };
52 
53 const int8_t CodechalVdencHevcState::m_instRateThreshB0[4] =
54 {
55     35, 60, 80, 120
56 };
57 
58 const int8_t CodechalVdencHevcState::m_instRateThreshI0[4] =
59 {
60     40, 60, 90, 115
61 };
62 
63 const uint16_t CodechalVdencHevcState::m_startGAdjFrame[4] =
64 {
65     10, 50, 100, 150
66 };
67 
68 const uint8_t CodechalVdencHevcState::m_startGAdjMult[5] =
69 {
70     1, 1, 3, 2, 1
71 };
72 
73 const uint8_t CodechalVdencHevcState::m_startGAdjDiv[5] =
74 {
75     40, 5, 5, 3, 1
76 };
77 
78 const uint8_t CodechalVdencHevcState::m_rateRatioThreshold[7] =
79 {
80     40, 75, 97, 103, 125, 160, 0
81 };
82 
83 const uint8_t CodechalVdencHevcState::m_rateRatioThresholdQP[8] =
84 {
85     253, 254, 255, 0, 1, 2, 3, 0
86 };
87 
88 const uint32_t CodechalVdencHevcState::m_hucModeCostsIFrame[] = {
89     0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
90     0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
91     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
92     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000,
93     0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000,
94     0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
95     0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
96     0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
97     0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
98     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
99     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000,
100     0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000,
101     0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
102     0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
103     0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
104     0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
105     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
106     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000,
107     0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000,
108     0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
109     0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
110     0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
111     0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
112     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
113     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000,
114     0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000,
115     0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
116     0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
117     0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
118     0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
119     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
120     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000,
121     0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000,
122     0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
123     0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
124     0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
125     0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
126     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
127     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000,
128     0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000,
129     0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
130     0x00000000, 0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
131     0x0d0e101e, 0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e,
132     0x00320707, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707,
133     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0d0e101e, 0x00320707, 0x00000000,
134     0x00000000, 0x00000000, 0x00000000, 0x00000000
135 };
136 
137 const uint32_t CodechalVdencHevcState::m_hucModeCostsPbFrame[] = {
138     0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
139     0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
140     0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
141     0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15,
142     0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d,
143     0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333,
144     0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b,
145     0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
146     0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
147     0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
148     0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15,
149     0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d,
150     0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333,
151     0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b,
152     0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
153     0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
154     0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
155     0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15,
156     0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d,
157     0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333,
158     0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b,
159     0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
160     0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
161     0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
162     0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15,
163     0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d,
164     0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333,
165     0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b,
166     0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
167     0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
168     0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
169     0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15,
170     0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d,
171     0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333,
172     0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b,
173     0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
174     0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
175     0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
176     0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15,
177     0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d,
178     0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333,
179     0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b,
180     0x0d0e101e, 0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e,
181     0x44320707, 0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707,
182     0x15232314, 0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b, 0x0d0e101e, 0x44320707, 0x15232314,
183     0x6e4d3f15, 0x04476e4d, 0x1f232333, 0x0f13131b
184 };
185 
186 const uint16_t CodechalVdencHevcState::m_sadQpLambdaI[] = {
187     0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0004, 0x0004,
188     0x0005, 0x0006, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000E, 0x0010, 0x0012, 0x0014, 0x0016, 0x0019, 0x001C,
189     0x001F, 0x0023, 0x0027, 0x002C, 0x0032, 0x0038, 0x003E, 0x0046, 0x004F, 0x0058, 0x0063, 0x006F, 0x007D, 0x008C, 0x009D, 0x00B1,
190     0x00C6, 0x00DF, 0x00FA, 0x0118
191 };
192 
193 // new table for visual quality improvement
194 const uint16_t CodechalVdencHevcState::m_sadQpLambdaI_VQI[] = {
195     0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0004, 0x0004,
196     0x0005, 0x0006, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000D, 0x000F, 0x0011, 0x0014, 0x0017, 0x001A, 0x001E, 0x0022,
197     0x0027, 0x002D, 0x0033, 0x003B, 0x0043, 0x004D, 0x0057, 0x0064, 0x0072, 0x0082, 0x0095, 0x00A7, 0x00BB, 0x00D2, 0x00EC, 0x0109,
198     0x0129, 0x014E, 0x0177, 0x01A5
199 };
200 
201 const uint16_t CodechalVdencHevcState::m_sadQpLambdaP[] = {
202     0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0004, 0x0004, 0x0005,
203     0x0005, 0x0006, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000D, 0x000E, 0x0010, 0x0012, 0x0014, 0x0017, 0x001A, 0x001D,
204     0x0021, 0x0024, 0x0029, 0x002E, 0x0034, 0x003A, 0x0041, 0x0049, 0x0052, 0x005C, 0x0067, 0x0074, 0x0082, 0x0092, 0x00A4, 0x00B8,
205     0x00CE, 0x00E8, 0x0104, 0x0124
206 };
207 
208 const uint16_t CodechalVdencHevcState::m_rdQpLambdaI[] = {
209     0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0003, 0x0004, 0x0005,
210     0x0006, 0x0008, 0x000A, 0x000C, 0x000F, 0x0013, 0x0018, 0x001E, 0x0026, 0x0030, 0x003D, 0x004D, 0x0061, 0x007A, 0x009A, 0x00C2,
211     0x00F4, 0x0133, 0x0183, 0x01E8, 0x0266, 0x0306, 0x03CF, 0x04CD, 0x060C, 0x079F, 0x099A, 0x0C18, 0x0F3D, 0x1333, 0x1831, 0x1E7A,
212     0x2666, 0x3062, 0x3CF5, 0x4CCD
213 };
214 
215 const uint16_t CodechalVdencHevcState::m_rdQpLambdaP[] = {
216     0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0004, 0x0005,
217     0x0007, 0x0008, 0x000A, 0x000D, 0x0011, 0x0015, 0x001A, 0x0021, 0x002A, 0x0034, 0x0042, 0x0053, 0x0069, 0x0084, 0x00A6, 0x00D2,
218     0x0108, 0x014D, 0x01A3, 0x0210, 0x029A, 0x0347, 0x0421, 0x0533, 0x068D, 0x0841, 0x0A66, 0x0D1A, 0x1082, 0x14CD, 0x1A35, 0x2105,
219     0x299A, 0x346A, 0x4209, 0x5333
220 };
221 
222 // Originial CodechalVdencHevcState::m_penaltyForIntraNonDC32x32PredMode table
223 const uint8_t CodechalVdencHevcState::m_penaltyForIntraNonDC32x32PredMode[] = {
224     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
225     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
226     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
227     0x00, 0x00, 0x00, 0x00
228 };
229 
230 // New table for visual quality improvement
231 const uint8_t CodechalVdencHevcState::m_penaltyForIntraNonDC32x32PredMode_VQI[] = {
232     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
233     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x0C, 0x12, 0x19, 0x1f, 0x25, 0x2C, 0x32, 0x38,
234     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
235     0x3F, 0x3F, 0x3F, 0x3F
236 };
237 //! \endcond
238 
GetMaxAllowedSlices(uint8_t levelIdc)239 uint32_t CodechalVdencHevcState::GetMaxAllowedSlices(uint8_t levelIdc)
240 {
241     uint32_t maxAllowedNumSlices = 0;
242 
243     switch (levelIdc)
244     {
245     case 10:
246     case 20:
247         maxAllowedNumSlices = 16;
248         break;
249     case 21:
250         maxAllowedNumSlices = 20;
251         break;
252     case 30:
253         maxAllowedNumSlices = 30;
254         break;
255     case 31:
256         maxAllowedNumSlices = 40;
257         break;
258     case 40:
259     case 41:
260         maxAllowedNumSlices = 75;
261         break;
262     case 50:
263     case 51:
264     case 52:
265         maxAllowedNumSlices = 200;
266         break;
267     case 60:
268     case 61:
269     case 62:
270         maxAllowedNumSlices = 600;
271         break;
272     default:
273         maxAllowedNumSlices = 0;
274         break;
275     }
276 
277     return maxAllowedNumSlices;
278 }
279 
SetPakPassType()280 void CodechalVdencHevcState::SetPakPassType()
281 {
282     CODECHAL_ENCODE_FUNCTION_ENTER;
283 
284     // default: VDEnc+PAK pass
285     m_pakOnlyPass = false;
286 
287     // BRC
288     if (m_brcEnabled)
289     {
290         // BRC with SSC, BRC without SSC
291         // BRC fast 2nd pass needed, but weighted prediction/SSC 2nd pass not needed
292         // HuC will update PAK pass type to be VDEnc+PAK if WP/SSC 2nd pass is needed
293         if (GetCurrentPass() == 1)
294         {
295             m_pakOnlyPass = true;
296         }
297     }
298 
299     // CQP, ACQP, BRC
300     if (m_hevcSeqParams->SAO_enabled_flag)
301     {
302         // SAO 2nd pass is always PAK only pass
303         if (m_b2NdSaoPassNeeded && (GetCurrentPass() == m_uc2NdSaoPass))
304         {
305             m_pakOnlyPass = true;
306         }
307     }
308 
309     return;
310 }
311 
ComputeVDEncInitQP(int32_t & initQPIP,int32_t & initQPB)312 void CodechalVdencHevcState::ComputeVDEncInitQP(int32_t& initQPIP, int32_t& initQPB)
313 {
314     CODECHAL_ENCODE_FUNCTION_ENTER;
315 
316     const float x0 = 0, y0 = 1.19f, x1 = 1.75f, y1 = 1.75f;
317     uint32_t frameSize = ((m_frameWidth * m_frameHeight * 3) >> 1);
318 
319     initQPIP = (int)(1. / 1.2 * pow(10.0, (log10(frameSize * 2. / 3. * ((float)m_hevcSeqParams->FrameRate.Numerator / ((float)m_hevcSeqParams->FrameRate.Denominator * (float)m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS))) - x0) * (y1 - y0) / (x1 - x0) + y0) + 0.5);
320 
321     initQPIP += 2;
322 
323     int32_t gopP    = (m_hevcSeqParams->GopRefDist) ? ((m_hevcSeqParams->GopPicSize - 1) / m_hevcSeqParams->GopRefDist) : 0;
324     int32_t gopB    = m_hevcSeqParams->GopPicSize - 1 - gopP;
325     int32_t gopB1 = 0;
326     int32_t gopB2 = 0;
327     int32_t gopSize = 1 + gopP + gopB + gopB1 + gopB2;
328 
329     if (gopSize == 1)
330     {
331         initQPIP += 12;
332     }
333     else if (gopSize < 15)
334     {
335         initQPIP += ((14 - gopSize) >> 1);
336     }
337 
338     initQPIP = CodecHal_Clip3((int32_t)m_hevcPicParams->BRCMinQp, (int32_t)m_hevcPicParams->BRCMaxQp, initQPIP);
339     initQPIP--;
340 
341     if (initQPIP < 0)
342     {
343         initQPIP = 1;
344     }
345 
346     initQPB = ((initQPIP + initQPIP) * 563 >> 10) + 1;
347     initQPB = CodecHal_Clip3((int32_t)m_hevcPicParams->BRCMinQp, (int32_t)m_hevcPicParams->BRCMaxQp, initQPB);
348 
349     if (gopSize > 300)  //if intra frame is not inserted frequently
350     {
351         initQPIP -= 8;
352         initQPB -= 8;
353     }
354     else
355     {
356         initQPIP -= 2;
357         initQPB -= 2;
358     }
359 
360     initQPIP = CodecHal_Clip3((int32_t)m_hevcPicParams->BRCMinQp, (int32_t)m_hevcPicParams->BRCMaxQp, initQPIP);
361     initQPB  = CodecHal_Clip3((int32_t)m_hevcPicParams->BRCMinQp, (int32_t)m_hevcPicParams->BRCMaxQp, initQPB);
362 }
363 
StoreHuCStatus2Register(PMOS_COMMAND_BUFFER cmdBuffer)364 MOS_STATUS CodechalVdencHevcState::StoreHuCStatus2Register(PMOS_COMMAND_BUFFER cmdBuffer)
365 {
366     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
367 
368     CODECHAL_ENCODE_FUNCTION_ENTER;
369 
370     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
371 
372     // Write HUC_STATUS2 mask - bit 6 - valid IMEM loaded
373     MHW_MI_STORE_DATA_PARAMS storeDataParams;
374     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
375     storeDataParams.pOsResource = &m_resHucStatus2Buffer;
376     storeDataParams.dwResourceOffset = 0;
377     storeDataParams.dwValue = m_hucInterface->GetHucStatus2ImemLoadedMask();
378     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
379 
380     // Store HUC_STATUS2 register
381     MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
382     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
383     storeRegParams.presStoreBuffer = &m_resHucStatus2Buffer;
384     storeRegParams.dwOffset = sizeof(uint32_t);
385     storeRegParams.dwRegister = m_hucInterface->GetMmioRegisters(m_vdboxIndex)->hucStatus2RegOffset;
386     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams));
387 
388     return eStatus;
389 }
390 
HuCBrcInitReset()391 MOS_STATUS CodechalVdencHevcState::HuCBrcInitReset()
392 {
393     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
394 
395     CODECHAL_ENCODE_FUNCTION_ENTER;
396 
397     MOS_COMMAND_BUFFER cmdBuffer;
398     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
399 
400     if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) )
401     {
402         // Send command buffer header at the beginning (OS dependent)
403         bool requestFrameTracking = m_singleTaskPhaseSupported ?
404             m_firstTaskInPhase : 0;
405         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
406     }
407 
408     // load kernel from WOPCM into L2 storage RAM
409     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
410     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
411     imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcInitKernelDescriptor;
412 
413     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
414 
415     // pipe mode select
416     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
417     pipeModeSelectParams.Mode = m_mode;
418     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
419 
420     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
421 
422     // set HuC DMEM param
423     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
424     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
425     dmemParams.presHucDataSource = &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx];
426     dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcInitDmemBufferSize, CODECHAL_CACHELINE_SIZE);
427     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
428     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
429 
430     MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
431     MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
432     virtualAddrParams.regionParams[0].presRegion = &m_vdencBrcHistoryBuffer;
433     virtualAddrParams.regionParams[0].isWritable = true;
434     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
435 
436     CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcDummyStreamObject(&cmdBuffer));
437 
438     // Store HUC_STATUS2 register bit 6 before HUC_Start command
439     // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
440     // (HUC_Start command with last start bit set).
441     CODECHAL_DEBUG_TOOL(
442         CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
443     )
444 
445     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
446 
447     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
448 
449     // wait Huc completion (use HEVC bit for now)
450     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
451     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
452     vdPipeFlushParams.Flags.bFlushHEVC = 1;
453     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
454     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
455 
456     // Flush the engine to ensure memory written out
457     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
458     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
459     flushDwParams.bVideoPipelineCacheInvalidate = true;
460     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
461 
462     if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd))
463     {
464         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
465     }
466 
467     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
468 
469     if (!m_singleTaskPhaseSupported)
470     {
471         bool renderingFlags = m_videoContextUsesNullHw;
472 
473         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN( m_debugInterface->DumpCmdBuffer(
474             &cmdBuffer,
475             CODECHAL_MEDIA_STATE_BRC_INIT_RESET,
476             "ENC")));
477 
478         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
479     }
480 
481     CODECHAL_DEBUG_TOOL(DumpHucBrcInit());
482     return eStatus;
483 }
484 
SetupBRCROIStreamIn(PMOS_RESOURCE streamIn,PMOS_RESOURCE deltaQpBuffer)485 MOS_STATUS CodechalVdencHevcState::SetupBRCROIStreamIn(PMOS_RESOURCE streamIn, PMOS_RESOURCE deltaQpBuffer)
486 {
487     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
488 
489     CODECHAL_ENCODE_FUNCTION_ENTER;
490 
491     CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
492     CODECHAL_ENCODE_CHK_NULL_RETURN(deltaQpBuffer);
493 
494     MOS_LOCK_PARAMS lockFlags;
495     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
496     lockFlags.WriteOnly = true;
497 
498     PDeltaQpForROI deltaQpData = (PDeltaQpForROI)m_osInterface->pfnLockResource(
499         m_osInterface,
500         deltaQpBuffer,
501         &lockFlags);
502     CODECHAL_ENCODE_CHK_NULL_RETURN(deltaQpData);
503 
504     MOS_ZeroMemory(deltaQpData, m_deltaQpRoiBufferSize);
505 
506     uint32_t streamInWidth = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32);
507     uint32_t streamInHeight = (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32);
508     uint32_t deltaQpBufWidth = (MOS_ALIGN_CEIL(m_frameWidth, 32) / 32);
509     uint32_t deltaQpBufHeight = (MOS_ALIGN_CEIL(m_frameHeight, 32) / 32);
510     bool cu64Align = true;
511 
512     if ((m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR ||
513          m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR ||
514          m_hevcSeqParams->RateControlMethod == RATECONTROL_QVBR) &&
515          m_encodeParams.bMbQpDataEnabled)
516     {
517         cu64Align = false;
518 
519         MOS_LOCK_PARAMS LockFlagsReadOnly;
520         MOS_ZeroMemory(&LockFlagsReadOnly, sizeof(MOS_LOCK_PARAMS));
521         LockFlagsReadOnly.ReadOnly = true;
522 
523         auto pInputDataGfx = (PDeltaQpForROI)m_osInterface->pfnLockResource(
524             m_osInterface, &(m_encodeParams.psMbQpDataSurface->OsResource), &LockFlagsReadOnly);
525 
526         CODECHAL_ENCODE_CHK_NULL_RETURN(pInputDataGfx);
527 
528         for (uint32_t curY = 0; curY < deltaQpBufHeight; curY++)
529         {
530             for (uint32_t curX = 0; curX < deltaQpBufWidth; curX++)
531             {
532                 uint32_t iMB = curY * deltaQpBufHeight + curX;
533                 deltaQpData[iMB] = *(pInputDataGfx + m_encodeParams.psMbQpDataSurface->dwPitch * curY + curX);
534             }
535         }
536 
537         m_osInterface->pfnUnlockResource(
538         m_osInterface,
539         &(m_encodeParams.psMbQpDataSurface->OsResource));
540     }
541     else
542     {
543         for (auto i = m_hevcPicParams->NumROI - 1; i >= 0; i--)
544         {
545             //Check if the region is with in the borders
546             uint16_t top    = (uint16_t)CodecHal_Clip3(0, (deltaQpBufHeight - 1), m_hevcPicParams->ROI[i].Top);
547             uint16_t bottom = (uint16_t)CodecHal_Clip3(0, deltaQpBufHeight, m_hevcPicParams->ROI[i].Bottom);
548             uint16_t left   = (uint16_t)CodecHal_Clip3(0, (deltaQpBufWidth - 1), m_hevcPicParams->ROI[i].Left);
549             uint16_t right  = (uint16_t)CodecHal_Clip3(0, deltaQpBufWidth, m_hevcPicParams->ROI[i].Right);
550 
551             //Check if all the sides of ROI regions are aligned to 64CU
552             if ((top % 2 == 1) || (bottom % 2 == 1) || (left % 2 == 1) || (right % 2 == 1))
553             {
554                 cu64Align = false;
555             }
556 
557             SetBrcRoiDeltaQpMap(streamInWidth, top, bottom, left, right, (uint8_t)i, deltaQpData);
558         }
559     }
560 
561     m_osInterface->pfnUnlockResource(
562         m_osInterface,
563         deltaQpBuffer);
564 
565     uint8_t* data = (uint8_t*) m_osInterface->pfnLockResource(
566         m_osInterface,
567         streamIn,
568         &lockFlags);
569     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
570 
571     MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
572     MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
573     streaminDataParams.maxTuSize = 3;    //Maximum TU Size allowed, restriction to be set to 3
574     streaminDataParams.maxCuSize = (cu64Align) ? 3 : 2;
575     switch (m_hevcSeqParams->TargetUsage)
576     {
577     case 1:
578     case 4:
579         streaminDataParams.numMergeCandidateCu64x64 = 4;
580         streaminDataParams.numMergeCandidateCu32x32 = 3;
581         streaminDataParams.numMergeCandidateCu16x16 = 2;
582         streaminDataParams.numMergeCandidateCu8x8   = 1;
583         streaminDataParams.numImePredictors         = m_imgStateImePredictors;
584         break;
585     case 7:
586         streaminDataParams.numMergeCandidateCu64x64 = 2;
587         streaminDataParams.numMergeCandidateCu32x32 = 2;
588         streaminDataParams.numMergeCandidateCu16x16 = 2;
589         streaminDataParams.numMergeCandidateCu8x8   = 0;
590         streaminDataParams.numImePredictors         = 4;
591         break;
592     }
593 
594     int32_t streamInNumCUs = streamInWidth * streamInHeight;
595     for (auto i = 0; i < streamInNumCUs; i++)
596     {
597         SetStreaminDataPerLcu(&streaminDataParams, data+(i*64));
598     }
599 
600     m_osInterface->pfnUnlockResource(
601         m_osInterface,
602         streamIn);
603 
604     return eStatus;
605 }
606 
SetBrcRoiDeltaQpMap(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,uint8_t regionId,PDeltaQpForROI deltaQpMap)607 void CodechalVdencHevcState::SetBrcRoiDeltaQpMap(
608     uint32_t streamInWidth,
609     uint32_t top,
610     uint32_t bottom,
611     uint32_t left,
612     uint32_t right,
613     uint8_t regionId,
614     PDeltaQpForROI deltaQpMap)
615 {
616     CODECHAL_ENCODE_FUNCTION_ENTER;
617 
618     for (auto y = top; y < bottom; y++)
619     {
620         for (auto x = left; x < right; x++)
621         {
622             uint32_t offset = 0, xyOffset = 0;
623             StreaminZigZagToLinearMap(streamInWidth, x, y, &offset, &xyOffset);
624 
625             (deltaQpMap + (offset + xyOffset))->iDeltaQp = m_hevcPicParams->ROI[regionId].PriorityLevelOrDQp;
626         }
627     }
628 }
629 
ProcessRoiDeltaQp()630 void CodechalVdencHevcState::ProcessRoiDeltaQp()
631 {
632     CODECHAL_ENCODE_FUNCTION_ENTER;
633 
634     // Intialize ROIDistinctDeltaQp to be min expected delta qp, setting to -128
635     // Check if forceQp is needed or not
636     // forceQp is enabled if there are greater than 3 distinct delta qps or if the deltaqp is beyond range (-8, 7)
637 
638     for (auto k = 0; k < m_maxNumROI; k++)
639     {
640         m_hevcPicParams->ROIDistinctDeltaQp[k] = -128;
641     }
642 
643     int32_t numQp = 0;
644     for (int32_t i = 0; i < m_hevcPicParams->NumROI; i++)
645     {
646         bool dqpNew = true;
647 
648         //Get distinct delta Qps among all ROI regions, index 0 having the lowest delta qp
649         int32_t k = numQp - 1;
650         for (; k >= 0; k--)
651         {
652             if (m_hevcPicParams->ROI[i].PriorityLevelOrDQp == m_hevcPicParams->ROIDistinctDeltaQp[k] || m_hevcPicParams->ROI[i].PriorityLevelOrDQp == 0)
653             {
654                 dqpNew = false;
655                 break;
656             }
657             else if (m_hevcPicParams->ROI[i].PriorityLevelOrDQp < m_hevcPicParams->ROIDistinctDeltaQp[k])
658             {
659                 continue;
660             }
661             else
662             {
663                 break;
664             }
665         }
666 
667         if (dqpNew)
668         {
669             for (int32_t j = numQp - 1; (j >= k + 1 && j >= 0); j--)
670             {
671                 m_hevcPicParams->ROIDistinctDeltaQp[j + 1] = m_hevcPicParams->ROIDistinctDeltaQp[j];
672             }
673             m_hevcPicParams->ROIDistinctDeltaQp[k + 1] = m_hevcPicParams->ROI[i].PriorityLevelOrDQp;
674             numQp++;
675         }
676     }
677 
678     //Set the ROI DeltaQp to zero for remaining array elements
679     for (auto k = numQp; k < m_maxNumROI; k++)
680     {
681         m_hevcPicParams->ROIDistinctDeltaQp[k] = 0;
682     }
683 
684     m_vdencNativeROIEnabled = !(numQp > m_maxNumNativeROI || m_hevcPicParams->ROIDistinctDeltaQp[0] < -8 || m_hevcPicParams->ROIDistinctDeltaQp[numQp - 1] > 7);
685 }
686 
SetupROIStreamIn(PMOS_RESOURCE streamIn)687 MOS_STATUS CodechalVdencHevcState::SetupROIStreamIn(PMOS_RESOURCE streamIn)
688 {
689     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
690 
691     CODECHAL_ENCODE_FUNCTION_ENTER;
692 
693     CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
694 
695     MOS_LOCK_PARAMS lockFlags;
696     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
697     lockFlags.WriteOnly = true;
698 
699     uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(
700         m_osInterface,
701         streamIn,
702         &lockFlags);
703     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
704 
705     uint32_t streamInWidth = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32);
706     uint32_t streamInHeight = (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32);
707     int32_t streamInNumCUs = streamInWidth * streamInHeight;
708 
709     MOS_ZeroMemory(data, streamInNumCUs * 64);
710 
711     MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
712 
713     //ROI higher priority for smaller index.
714     bool cu64Align = true;
715     for (int32_t i = m_hevcPicParams->NumROI - 1; i >= 0; i--)
716     {
717 
718         //Check if the region is with in the borders
719         uint16_t top    = (uint16_t)CodecHal_Clip3(0, (streamInHeight - 1), m_hevcPicParams->ROI[i].Top);
720         uint16_t bottom = (uint16_t)CodecHal_Clip3(0, streamInHeight, m_hevcPicParams->ROI[i].Bottom);
721         uint16_t left   = (uint16_t)CodecHal_Clip3(0, (streamInWidth - 1), m_hevcPicParams->ROI[i].Left);
722         uint16_t right  = (uint16_t)CodecHal_Clip3(0, streamInWidth, m_hevcPicParams->ROI[i].Right);
723 
724         //Check if all the sides of ROI regions are aligned to 64CU
725         if ((top % 2 == 1) || (bottom % 2 == 1) || (left % 2 == 1) || (right % 2 == 1))
726         {
727             cu64Align = false;
728         }
729 
730         // For native ROI, determine Region ID based on distinct delta Qps and set ROI control
731         uint32_t roiCtrl = 0;
732         for (auto j = 0; j < m_maxNumNativeROI; j++)
733         {
734             if (m_hevcPicParams->ROIDistinctDeltaQp[j] == m_hevcPicParams->ROI[i].PriorityLevelOrDQp)
735             {
736                 //All four 16x16 blocks within the 32x32 blocks should share the same region ID j
737                 roiCtrl = j + 1;
738                 for (auto k = 0; k < 3; k++)
739                 {
740                     roiCtrl = roiCtrl << 2;
741                     roiCtrl = roiCtrl + j + 1;
742                 }
743                 break;
744             }
745         }
746         // Calculate ForceQp
747         int8_t forceQp = (int8_t)CodecHal_Clip3(10, 51, m_hevcPicParams->QpY + m_hevcPicParams->ROI[i].PriorityLevelOrDQp + m_hevcSliceParams->slice_qp_delta);
748 
749         MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
750         streaminDataParams.setQpRoiCtrl = true;
751         if (m_vdencNativeROIEnabled)
752         {
753             streaminDataParams.roiCtrl = (uint8_t)roiCtrl;
754         }
755         else
756         {
757             streaminDataParams.forceQp[0] = forceQp;
758             streaminDataParams.forceQp[1] = forceQp;
759             streaminDataParams.forceQp[2] = forceQp;
760             streaminDataParams.forceQp[3] = forceQp;
761         }
762 
763         SetStreaminDataPerRegion(streamInWidth, top, bottom, left, right, &streaminDataParams, data);
764     }
765 
766     MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
767     streaminDataParams.maxTuSize = 3;    //Maximum TU Size allowed, restriction to be set to 3
768     streaminDataParams.maxCuSize = (cu64Align) ? 3 : 2;
769     switch (m_hevcSeqParams->TargetUsage)
770     {
771     case 1:
772     case 4:
773         streaminDataParams.numMergeCandidateCu64x64 = 4;
774         streaminDataParams.numMergeCandidateCu32x32 = 3;
775         streaminDataParams.numMergeCandidateCu16x16 = 2;
776         streaminDataParams.numMergeCandidateCu8x8   = 1;
777         streaminDataParams.numImePredictors         = m_imgStateImePredictors;
778         break;
779     case 7:
780         streaminDataParams.numMergeCandidateCu64x64 = 2;
781         streaminDataParams.numMergeCandidateCu32x32 = 2;
782         streaminDataParams.numMergeCandidateCu16x16 = 2;
783         streaminDataParams.numMergeCandidateCu8x8   = 0;
784         streaminDataParams.numImePredictors         = 4;
785         break;
786     }
787 
788     for (auto i = 0; i < streamInNumCUs; i++)
789     {
790         SetStreaminDataPerLcu(&streaminDataParams, data + (i * 64));
791     }
792 
793     m_osInterface->pfnUnlockResource(
794         m_osInterface,
795         streamIn);
796 
797     return eStatus;
798 }
SetupMbQpStreamIn(PMOS_RESOURCE streamIn)799 MOS_STATUS CodechalVdencHevcState::SetupMbQpStreamIn(PMOS_RESOURCE streamIn)
800 {
801     CODECHAL_ENCODE_FUNCTION_ENTER;
802 
803     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
804     CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
805 
806     MOS_LOCK_PARAMS LockFlags;
807     MOS_ZeroMemory(&LockFlags, sizeof(MOS_LOCK_PARAMS));
808     LockFlags.WriteOnly = true;
809 
810     auto dataGfx = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface,
811                                                          streamIn,
812                                                          &LockFlags);
813     CODECHAL_ENCODE_CHK_NULL_RETURN(dataGfx);
814     MOS_SURFACE surfInfo = {};
815     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetResourceInfo(m_osInterface, streamIn, &surfInfo));
816 
817     uint32_t uiSize = surfInfo.dwSize;
818     uint32_t uiAlign = 64;
819     auto data = (uint8_t*)MOS_AllocMemory(uiSize + uiAlign);
820     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
821 
822     auto dataBase = (uint8_t*)((((uint64_t)(data) + uiAlign - 1) / uiAlign) * uiAlign);
823 
824     MOS_SecureMemcpy(dataBase, uiSize, dataGfx, uiSize);
825 
826     uint32_t streamInWidth = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32);
827     uint32_t streamInHeight = (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32);
828     int32_t streamInNumCUs = streamInWidth * streamInHeight;
829 
830     MOS_LOCK_PARAMS LockFlagsReadOnly;
831     MOS_ZeroMemory(&LockFlagsReadOnly, sizeof(MOS_LOCK_PARAMS));
832     LockFlagsReadOnly.ReadOnly = true;
833 
834     auto pInputDataGfx = (uint8_t*)m_osInterface->pfnLockResource(
835                                                             m_osInterface, &(m_encodeParams.psMbQpDataSurface->OsResource),
836                                                             &LockFlagsReadOnly);
837     if (pInputDataGfx == nullptr)
838     {
839         MOS_SafeFreeMemory(data);
840         CODECHAL_ENCODE_ASSERTMESSAGE("Invalid (nullptr) Pointer from LockResource!");
841         return MOS_STATUS_NULL_POINTER;
842     }
843 
844     eStatus = m_osInterface->pfnGetResourceInfo(
845                                         m_osInterface, &(m_encodeParams.psMbQpDataSurface->OsResource),
846                                         &surfInfo);
847     if (eStatus != MOS_STATUS_SUCCESS)
848     {
849         MOS_SafeFreeMemory(data);
850         CODECHAL_ENCODE_ASSERTMESSAGE("Get psMbQpDataSurface ResourceInfo Failed!");
851         return eStatus;
852     }
853 
854     auto pInputData = (int8_t*)MOS_AllocMemory(surfInfo.dwSize);
855     if (pInputData == nullptr)
856     {
857         MOS_SafeFreeMemory(data);
858         CODECHAL_ENCODE_ASSERTMESSAGE("Invalid (nullptr) Pointer from MOS_AllocMemory!");
859         return MOS_STATUS_NULL_POINTER;
860     }
861 
862     MOS_SecureMemcpy(pInputData, surfInfo.dwSize, pInputDataGfx, surfInfo.dwSize);
863 
864     MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams = {};
865 
866     for (uint32_t h = 0; h < streamInHeight; h++)
867     {
868         for (uint32_t w = 0; w < streamInWidth; w++)
869         {
870             //Calculate X Y Offset for the zig zag scan with in each 64x64 LCU
871             //dwOffset gives the 64 LCU row
872             //            uint32_t Offset = StreamInWidth * (h/2) * 2;
873             //            uint32_t YOffset = (h % 2) * 2;
874             //            uint32_t XOffset = 2 * (w/2 * 2) + w % 2;
875 
876             //            (pData + (Offset + XOffset + YOffset))->DW7.QpEnable = 0xf;
877             //            (pData + (Offset + XOffset + YOffset))->DW14.ForceQp_0 = ForceQp;
878             //            (pData + (Offset + XOffset + YOffset))->DW14.ForceQp_1 = ForceQp;
879             //            (pData + (Offset + XOffset + YOffset))->DW14.ForceQp_2 = ForceQp;
880             //            (pData + (Offset + XOffset + YOffset))->DW14.ForceQp_3 = ForceQp;
881             streaminDataParams.setQpRoiCtrl = true;
882 
883             streaminDataParams.forceQp[0] = (int8_t) ( pInputData[(h * 2)     * m_encodeParams.psMbQpDataSurface->dwPitch + (w * 2)]);
884             streaminDataParams.forceQp[1] = (int8_t) ( pInputData[(h * 2)     * m_encodeParams.psMbQpDataSurface->dwPitch + (w * 2 + 1)]);
885             streaminDataParams.forceQp[2] = (int8_t) ( pInputData[(h * 2 + 1) * m_encodeParams.psMbQpDataSurface->dwPitch + (w * 2)]);
886             streaminDataParams.forceQp[3] = (int8_t) ( pInputData[(h * 2 + 1) * m_encodeParams.psMbQpDataSurface->dwPitch + (w * 2 + 1)]);
887 
888             SetStreaminDataPerRegion(streamInWidth, h, h+1, w, w+1, &streaminDataParams, dataBase);
889 
890         }
891     }
892 
893     MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
894     streaminDataParams.maxTuSize = 3;    //Maximum TU Size allowed, restriction to be set to 3
895     streaminDataParams.maxCuSize = 3;
896     switch (m_hevcSeqParams->TargetUsage)
897     {
898         case 1:
899         case 4:
900             streaminDataParams.numMergeCandidateCu64x64 = 4;
901             streaminDataParams.numMergeCandidateCu32x32 = 3;
902             streaminDataParams.numMergeCandidateCu16x16 = 2;
903             streaminDataParams.numMergeCandidateCu8x8   = 1;
904             streaminDataParams.numImePredictors         = m_imgStateImePredictors;
905             break;
906         case 7:
907             streaminDataParams.numMergeCandidateCu64x64 = 2;
908             streaminDataParams.numMergeCandidateCu32x32 = 2;
909             streaminDataParams.numMergeCandidateCu16x16 = 2;
910             streaminDataParams.numMergeCandidateCu8x8   = 0;
911             streaminDataParams.numImePredictors         = 4;
912             break;
913     }
914 
915     for (auto i = 0; i < streamInNumCUs; i++)
916     {
917         SetStreaminDataPerLcu(&streaminDataParams, dataBase + (i * 64));
918     }
919 
920     MOS_SecureMemcpy(dataGfx, uiSize, dataBase, uiSize);
921     MOS_SafeFreeMemory(data);
922     MOS_SafeFreeMemory(pInputData);
923 
924     m_osInterface->pfnUnlockResource(
925                                     m_osInterface,
926                                     &(m_encodeParams.psMbQpDataSurface->OsResource));
927     m_osInterface->pfnUnlockResource(
928                                     m_osInterface,
929                                     streamIn);
930     return eStatus;
931 }
932 
StreaminSetDirtyRectRegion(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,uint8_t maxcu,void * streaminData)933 void CodechalVdencHevcState::StreaminSetDirtyRectRegion(
934     uint32_t streamInWidth,
935     uint32_t top,
936     uint32_t bottom,
937     uint32_t left,
938     uint32_t right,
939     uint8_t  maxcu,
940     void* streaminData)
941 {
942     CODECHAL_ENCODE_FUNCTION_ENTER;
943 
944     MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
945     MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
946     streaminDataParams.maxTuSize = 3;
947     streaminDataParams.maxCuSize = maxcu;
948     streaminDataParams.puTypeCtrl = 0;
949 
950     switch (m_hevcSeqParams->TargetUsage)
951     {
952     case 1:
953     case 4:
954         streaminDataParams.numMergeCandidateCu64x64 = 4;
955         streaminDataParams.numMergeCandidateCu32x32 = 3;
956         streaminDataParams.numMergeCandidateCu16x16 = 2;
957         streaminDataParams.numMergeCandidateCu8x8 = 1;
958         streaminDataParams.numImePredictors = m_imgStateImePredictors;
959         break;
960     case 7:
961         streaminDataParams.numMergeCandidateCu64x64 = 2;
962         streaminDataParams.numMergeCandidateCu32x32 = 2;
963         streaminDataParams.numMergeCandidateCu16x16 = 2;
964         streaminDataParams.numMergeCandidateCu8x8 = 0;
965         streaminDataParams.numImePredictors = 4;
966         break;
967     }
968 
969     SetStreaminDataPerRegion(streamInWidth, top, bottom, left, right, &streaminDataParams, streaminData);
970 }
971 
SetStreaminDataPerRegion(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,void * streaminData)972 void CodechalVdencHevcState::SetStreaminDataPerRegion(
973     uint32_t streamInWidth,
974     uint32_t top,
975     uint32_t bottom,
976     uint32_t left,
977     uint32_t right,
978     PMHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminParams,
979     void* streaminData)
980 {
981     uint8_t* data = (uint8_t*)streaminData;
982 
983     for (auto y = top; y < bottom; y++)
984     {
985         for (auto x = left; x < right; x++)
986         {
987             //Calculate X Y for the zig zag scan
988             uint32_t offset = 0, xyOffset = 0;
989             StreaminZigZagToLinearMap(streamInWidth, x, y, &offset, &xyOffset);
990 
991             SetStreaminDataPerLcu(streaminParams, data + (offset + xyOffset) * 64);
992         }
993     }
994 }
995 
StreaminZigZagToLinearMap(uint32_t streamInWidth,uint32_t x,uint32_t y,uint32_t * offset,uint32_t * xyOffset)996 void CodechalVdencHevcState::StreaminZigZagToLinearMap(
997     uint32_t streamInWidth,
998     uint32_t x,
999     uint32_t y,
1000     uint32_t* offset,
1001     uint32_t* xyOffset)
1002 {
1003     CODECHAL_ENCODE_FUNCTION_ENTER;
1004 
1005     *offset = streamInWidth * y;
1006     uint32_t yOffset = 0;
1007     uint32_t xOffset = 2 * x;
1008 
1009     //Calculate X Y Offset for the zig zag scan with in each 64x64 LCU
1010     //dwOffset gives the 64 LCU row
1011     if (y % 2)
1012     {
1013         *offset = streamInWidth * (y - 1);
1014         yOffset = 2;
1015     }
1016 
1017     if (x % 2)
1018     {
1019         xOffset = (2 * x) - 1;
1020     }
1021 
1022     *xyOffset = xOffset + yOffset;
1023 }
1024 
StreaminSetBorderNon64AlignStaticRegion(uint32_t streamInWidth,uint32_t top,uint32_t bottom,uint32_t left,uint32_t right,void * streaminData)1025 void CodechalVdencHevcState::StreaminSetBorderNon64AlignStaticRegion(
1026     uint32_t streamInWidth,
1027     uint32_t top,
1028     uint32_t bottom,
1029     uint32_t left,
1030     uint32_t right,
1031     void* streaminData)
1032 {
1033     CODECHAL_ENCODE_FUNCTION_ENTER;
1034 
1035     MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
1036     MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
1037     streaminDataParams.maxTuSize = 3;
1038     streaminDataParams.maxCuSize = 2;
1039     streaminDataParams.numMergeCandidateCu64x64 = 0; // MergeCand setting for Force MV
1040     streaminDataParams.numMergeCandidateCu32x32 = 1; // this is always set to 1
1041     streaminDataParams.numMergeCandidateCu16x16 = 0;
1042     streaminDataParams.numMergeCandidateCu8x8 = 0;
1043     streaminDataParams.numImePredictors = 0;
1044     streaminDataParams.puTypeCtrl = 0xff; //Force MV
1045 
1046     SetStreaminDataPerRegion(streamInWidth, top, bottom, left, right, &streaminDataParams, streaminData);
1047 }
1048 
SetupDirtyRectStreamIn(PMOS_RESOURCE streamIn)1049 MOS_STATUS CodechalVdencHevcState::SetupDirtyRectStreamIn(PMOS_RESOURCE streamIn)
1050 {
1051     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1052 
1053     CODECHAL_ENCODE_FUNCTION_ENTER;
1054 
1055     CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
1056 
1057     MOS_LOCK_PARAMS lockFlags;
1058     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1059     lockFlags.WriteOnly = true;
1060 
1061     uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(
1062         m_osInterface,
1063         streamIn,
1064         &lockFlags);
1065     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1066 
1067     uint32_t streamInWidth = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32);
1068     uint32_t streamInHeight = (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32);
1069     int32_t streamInNumCUs = streamInWidth * streamInHeight;
1070 
1071     MOS_ZeroMemory(data, streamInNumCUs * 64);
1072 
1073     MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
1074     MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
1075     streaminDataParams.maxTuSize = 3;
1076     streaminDataParams.maxCuSize = 3;
1077     streaminDataParams.numImePredictors = 0;
1078     streaminDataParams.puTypeCtrl = 0xff; //Force MV
1079     streaminDataParams.numMergeCandidateCu64x64 = 1; // MergeCand setting for Force MV
1080     streaminDataParams.numMergeCandidateCu32x32 = 0; // this is always set to 1
1081     streaminDataParams.numMergeCandidateCu16x16 = 0;
1082     streaminDataParams.numMergeCandidateCu8x8 = 0;
1083 
1084     for (auto i = 0; i < streamInNumCUs; i++)
1085     {
1086         SetStreaminDataPerLcu(&streaminDataParams, data + (i * 64));
1087     }
1088 
1089     uint32_t streamInWidthNo64Align  = (MOS_ALIGN_CEIL(m_frameWidth, 32) / 32);
1090     uint32_t streamInHeightNo64Align = (MOS_ALIGN_CEIL(m_frameHeight, 32) / 32);
1091 
1092     bool bActualWidth32Align  = (m_frameWidth % 32) == 0;
1093     bool bActualHeight32Align = (m_frameHeight % 32) == 0;
1094 
1095     // Set the static region when the width is not 64 CU aligned.
1096     if (streamInWidthNo64Align != streamInWidth || !bActualWidth32Align)
1097     {
1098         auto border_top    = 0;
1099         auto border_bottom = streamInHeight;
1100         auto border_left   = streamInWidthNo64Align - 1;
1101         auto border_right  = streamInWidth;
1102 
1103         if (!bActualWidth32Align)
1104         {
1105             StreaminSetDirtyRectRegion(streamInWidth, border_top, border_bottom, border_left, border_right, 3, data);
1106             if (streamInWidthNo64Align == streamInWidth)
1107             {
1108                 StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left-1, border_right-1, data);
1109             }
1110         }
1111         else
1112         {
1113             StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left, border_right, data);
1114         }
1115     }
1116 
1117     // Set the static region when the height is not 64 CU aligned.
1118     if (streamInHeightNo64Align != streamInHeight || !bActualHeight32Align)
1119     {
1120         auto border_top    = streamInHeightNo64Align - 1;
1121         auto border_bottom = streamInHeight;
1122         auto border_left   = 0;
1123         auto border_right  = streamInWidth;
1124 
1125         if (!bActualHeight32Align)
1126         {
1127             StreaminSetDirtyRectRegion(streamInWidth, border_top, border_bottom, border_left, border_right, 3, data);
1128             if (streamInHeightNo64Align == streamInHeight)
1129             {
1130                 StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top - 1, border_bottom - 1, border_left, border_right, data);
1131             }
1132         }
1133         else
1134         {
1135             StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left, border_right, data);
1136         }
1137     }
1138 
1139     for (int i = m_hevcPicParams->NumDirtyRects - 1; i >= 0; i--)
1140     {
1141         //Check if the region is with in the borders
1142         uint16_t top    = (uint16_t)CodecHal_Clip3(0, (streamInHeight - 1), m_hevcPicParams->pDirtyRect[i].Top);
1143         uint16_t bottom = (uint16_t)CodecHal_Clip3(0, (streamInHeight - 1), m_hevcPicParams->pDirtyRect[i].Bottom) + 1;
1144         uint16_t left   = (uint16_t)CodecHal_Clip3(0, (streamInWidth - 1), m_hevcPicParams->pDirtyRect[i].Left);
1145         uint16_t right  = (uint16_t)CodecHal_Clip3(0, (streamInWidth - 1), m_hevcPicParams->pDirtyRect[i].Right) + 1;
1146 
1147         auto dirtyrect_top = top;
1148         auto dirtyrect_bottom = bottom;
1149         auto dirtyrect_left = left;
1150         auto dirtyrect_right = right;
1151 
1152         //If the border of the DirtyRect is not aligned with 64 CU, different setting in the border
1153         if (top % 2 != 0)
1154         {
1155             auto border_top = top;
1156             auto border_bottom = top + 1;
1157             auto border_left = left;
1158             auto border_right = right;
1159 
1160             StreaminSetDirtyRectRegion(streamInWidth, border_top, border_bottom, border_left, border_right, 2, data);
1161 
1162             border_top = top - 1;
1163             border_bottom = top;
1164             border_left = (left % 2 != 0) ? left - 1 : left;
1165             border_right = (right % 2 != 0) ? right + 1 : right;
1166 
1167             StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left, border_right, data);
1168 
1169             dirtyrect_top = top + 1;
1170         }
1171 
1172         if (bottom % 2 != 0)
1173         {
1174             auto border_top = bottom - 1;
1175             auto border_bottom = bottom;
1176             auto border_left = left;
1177             auto border_right = right;
1178 
1179             StreaminSetDirtyRectRegion(streamInWidth, border_top, border_bottom, border_left, border_right, 2, data);
1180 
1181             border_top = bottom;
1182             border_bottom = bottom + 1;
1183             border_left = (left % 2 != 0) ? left - 1 : left;
1184             border_right = (right % 2 != 0) ? right + 1 : right;
1185 
1186             StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left, border_right, data);
1187 
1188             dirtyrect_bottom = bottom - 1;
1189         }
1190 
1191         if (left % 2 != 0)
1192         {
1193             auto border_top = top;
1194             auto border_bottom = bottom;
1195             auto border_left = left;
1196             auto border_right = left + 1;
1197 
1198             StreaminSetDirtyRectRegion(streamInWidth, border_top, border_bottom, border_left, border_right, 2, data);
1199 
1200             border_top = (top % 2 != 0) ? top - 1 : top;
1201             border_bottom = (bottom % 2 != 0) ? bottom + 1 : bottom;
1202             border_left = left - 1;
1203             border_right = left;
1204 
1205             StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left, border_right, data);
1206 
1207             dirtyrect_left = left + 1;
1208         }
1209 
1210         if (right % 2 != 0)
1211         {
1212             auto border_top = top;
1213             auto border_bottom = bottom;
1214             auto border_left = right - 1;
1215             auto border_right = right;
1216 
1217             StreaminSetDirtyRectRegion(streamInWidth, border_top, border_bottom, border_left, border_right, 2, data);
1218 
1219             border_top = (top % 2 != 0) ? top - 1 : top;
1220             border_bottom = (bottom % 2 != 0) ? bottom + 1 : bottom;
1221             border_left = right;
1222             border_right = right + 1;
1223 
1224             StreaminSetBorderNon64AlignStaticRegion(streamInWidth, border_top, border_bottom, border_left, border_right, data);
1225             dirtyrect_right = right - 1;
1226         }
1227 
1228         StreaminSetDirtyRectRegion(streamInWidth, dirtyrect_top, dirtyrect_bottom, dirtyrect_left, dirtyrect_right, 3, data);
1229     }
1230 
1231     m_osInterface->pfnUnlockResource(
1232         m_osInterface,
1233         streamIn);
1234 
1235     return eStatus;
1236 }
1237 
SetRegionsHuCBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)1238 MOS_STATUS CodechalVdencHevcState::SetRegionsHuCBrcUpdate(PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams)
1239 {
1240     CODECHAL_ENCODE_FUNCTION_ENTER;
1241 
1242     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1243 
1244     int32_t currentPass = GetCurrentPass();
1245     if (currentPass < 0)
1246     {
1247         eStatus = MOS_STATUS_INVALID_PARAMETER;
1248         return eStatus;
1249     }
1250     // Add Virtual addr
1251     MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
1252     virtualAddrParams->regionParams[0].presRegion = &m_vdencBrcHistoryBuffer;                // Region 0 - History Buffer (Input/Output)
1253     virtualAddrParams->regionParams[0].isWritable = true;
1254     virtualAddrParams->regionParams[1].presRegion =
1255         (MOS_RESOURCE*)m_allocator->GetResource(m_standard, vdencStats);                     // Region 1  VDEnc Statistics Buffer (Input) - VDENC_HEVC_VP9_FRAME_BASED_STATISTICS_STREAMOUT
1256     virtualAddrParams->regionParams[2].presRegion = &m_resFrameStatStreamOutBuffer;          // Region 2  PAK Statistics Buffer (Input) - MFX_PAK_FRAME_STATISTICS
1257     virtualAddrParams->regionParams[3].presRegion = &m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass];    // Region 3 - Input SLB Buffer (Input)
1258     virtualAddrParams->regionParams[4].presRegion = &m_vdencBrcConstDataBuffer[m_currRecycledBufIdx];              // Region 4 - Constant Data (Input)
1259     virtualAddrParams->regionParams[5].presRegion = &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource;  // Region 5 - Output SLB Buffer (Output)
1260     virtualAddrParams->regionParams[5].isWritable = true;
1261     virtualAddrParams->regionParams[6].presRegion = &m_dataFromPicsBuffer;                   // Region 6 - Data Buffer of Current and Reference Pictures for Weighted Prediction (Input/Output)
1262     virtualAddrParams->regionParams[6].isWritable = true;
1263     virtualAddrParams->regionParams[7].presRegion = &m_resLcuBaseAddressBuffer;  // Region 7  Slice Stat Streamout (Input)
1264     virtualAddrParams->regionParams[8].presRegion =
1265         (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakInfo);                        // Region 8 - PAK Information (Input)
1266     virtualAddrParams->regionParams[9].presRegion = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];          // Region 9 - Streamin Buffer for ROI (Input)
1267     virtualAddrParams->regionParams[10].presRegion = &m_vdencDeltaQpBuffer[m_currRecycledBufIdx];                  // Region 10 - Delta QP Buffer for ROI (Input)
1268     virtualAddrParams->regionParams[11].presRegion = &m_vdencOutputROIStreaminBuffer;        // Region 11 - Streamin Buffer for ROI (Output)
1269     virtualAddrParams->regionParams[11].isWritable = true;
1270 
1271     // region 15 always in clear
1272     virtualAddrParams->regionParams[15].presRegion = &m_vdencBrcDbgBuffer;                   // Region 15 - Debug Buffer (Output)
1273     virtualAddrParams->regionParams[15].isWritable = true;
1274 
1275     return eStatus;
1276 }
1277 
HuCBrcUpdate()1278 MOS_STATUS CodechalVdencHevcState::HuCBrcUpdate()
1279 {
1280     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1281 
1282     CODECHAL_ENCODE_FUNCTION_ENTER;
1283 
1284     MOS_COMMAND_BUFFER cmdBuffer;
1285     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
1286 
1287     if (!m_singleTaskPhaseSupported || (m_firstTaskInPhase && !m_brcInit))
1288     {
1289         // Send command buffer header at the beginning (OS dependent)
1290         bool requestFrameTracking = m_singleTaskPhaseSupported ?
1291             m_firstTaskInPhase : 0;
1292         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
1293     }
1294 
1295     int32_t currentPass = GetCurrentPass();
1296     if (currentPass < 0)
1297     {
1298         eStatus = MOS_STATUS_INVALID_PARAMETER;
1299         return eStatus;
1300     }
1301     CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructBatchBufferHuCBRC(&m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass]));
1302 
1303     // load kernel from WOPCM into L2 storage RAM
1304     MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
1305     MOS_ZeroMemory(&imemParams, sizeof(imemParams));
1306 
1307     if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW)  // Low Delay BRC
1308     {
1309         imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcLowdelayKernelDescriptor;
1310     }
1311     else
1312     {
1313         imemParams.dwKernelDescriptor = m_vdboxHucHevcBrcUpdateKernelDescriptor;
1314     }
1315 
1316     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
1317 
1318     // pipe mode select
1319     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
1320     pipeModeSelectParams.Mode = m_mode;
1321     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
1322 
1323     // DMEM set
1324     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
1325 
1326     MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
1327     MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
1328     dmemParams.presHucDataSource = &(m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][currentPass]);
1329     dmemParams.dwDataLength = MOS_ALIGN_CEIL(m_vdencBrcUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
1330     dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
1331 
1332     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
1333 
1334     // Set Const Data buffer
1335     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetConstDataHuCBrcUpdate());
1336 
1337     // Add Virtual addr
1338     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCBrcUpdate(&m_virtualAddrParams));
1339     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &m_virtualAddrParams));
1340 
1341     CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcDummyStreamObject(&cmdBuffer));
1342 
1343     // Store HUC_STATUS2 register bit 6 before HUC_Start command
1344     // BitField: VALID IMEM LOADED - This bit will be cleared by HW at the end of a HUC workload
1345     // (HUC_Start command with last start bit set).
1346     CODECHAL_DEBUG_TOOL(
1347         CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
1348     )
1349 
1350     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Report(&cmdBuffer));
1351 
1352     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
1353 
1354     // wait Huc completion (use HEVC bit for now)
1355     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
1356     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
1357     vdPipeFlushParams.Flags.bFlushHEVC = 1;
1358     vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
1359     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
1360 
1361     // Flush the engine to ensure memory written out
1362     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
1363     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
1364     flushDwParams.bVideoPipelineCacheInvalidate = true;
1365     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
1366 
1367     // Write HUC_STATUS mask: DW1 (mask value)
1368     MHW_MI_STORE_DATA_PARAMS storeDataParams;
1369     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
1370     storeDataParams.pOsResource = &m_resPakMmioBuffer;
1371     storeDataParams.dwResourceOffset = sizeof(uint32_t);
1372     storeDataParams.dwValue = CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_REENCODE_MASK;
1373     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
1374 
1375     // store HUC_STATUS register: DW0 (actual value)
1376     CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum");
1377     auto mmioRegisters = m_hucInterface->GetMmioRegisters(m_vdboxIndex);
1378     MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
1379     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
1380     storeRegParams.presStoreBuffer = &m_resPakMmioBuffer;
1381     storeRegParams.dwOffset = 0;
1382     storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
1383     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
1384 
1385     // DW0 & DW1 will considered together for conditional batch buffer end cmd later
1386     if ((!m_singleTaskPhaseSupported) && (m_osInterface->bNoParsingAssistanceInKmd))
1387     {
1388         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
1389     }
1390 
1391     // HuC Input
1392     CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(true));
1393 
1394     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
1395 
1396     if (!m_singleTaskPhaseSupported)
1397     {
1398         bool renderingFlags = m_videoContextUsesNullHw;
1399 
1400         CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN( m_debugInterface->DumpCmdBuffer(
1401             &cmdBuffer,
1402             CODECHAL_MEDIA_STATE_BRC_UPDATE,
1403             "ENC")));
1404 
1405         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
1406     }
1407 
1408     CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(false));
1409 
1410     return eStatus;
1411 }
1412 
HuCBrcDummyStreamObject(PMOS_COMMAND_BUFFER cmdBuffer)1413 MOS_STATUS CodechalVdencHevcState::HuCBrcDummyStreamObject(PMOS_COMMAND_BUFFER cmdBuffer)
1414 {
1415     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1416 
1417     CODECHAL_ENCODE_FUNCTION_ENTER;
1418 
1419     // pass dummy buffer by Ind Obj Addr command
1420     MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjParams;
1421     MOS_ZeroMemory(&indObjParams, sizeof(indObjParams));
1422     indObjParams.presDataBuffer = &m_vdencBrcDbgBuffer;
1423     indObjParams.dwDataSize = 1;
1424     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucIndObjBaseAddrStateCmd(cmdBuffer, &indObjParams));
1425 
1426     MHW_VDBOX_HUC_STREAM_OBJ_PARAMS streamObjParams;
1427     MOS_ZeroMemory(&streamObjParams, sizeof(streamObjParams));
1428     streamObjParams.dwIndStreamInLength = 1;
1429     streamObjParams.bHucProcessing = true;
1430     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStreamObjectCmd(cmdBuffer, &streamObjParams));
1431 
1432     return eStatus;
1433 }
1434 
SetVdencPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS & pipeModeSelectParams)1435 void CodechalVdencHevcState::SetVdencPipeModeSelectParams(
1436     MHW_VDBOX_PIPE_MODE_SELECT_PARAMS& pipeModeSelectParams)
1437 {
1438     pipeModeSelectParams.ucVdencBitDepthMinus8    = m_hevcSeqParams->bit_depth_luma_minus8;
1439     pipeModeSelectParams.bPakThresholdCheckEnable = m_hevcSeqParams->SliceSizeControl;
1440     pipeModeSelectParams.ChromaType               = m_hevcSeqParams->chroma_format_idc;
1441     pipeModeSelectParams.bTlbPrefetchEnable = true;
1442     pipeModeSelectParams.Format = m_rawSurfaceToPak->Format;
1443 
1444     // can be enabled by reg key (disabled by default)
1445     pipeModeSelectParams.bVdencPakObjCmdStreamOutEnable = m_vdencPakObjCmdStreamOutEnabled;
1446 
1447     int32_t currentPass = GetCurrentPass();
1448 
1449     // needs to be enabled for 1st pass in multi-pass case
1450     // This bit is ignored if PAK only second pass is enabled.
1451     if ((currentPass == 0) && (currentPass != m_numPasses))
1452     {
1453         pipeModeSelectParams.bVdencPakObjCmdStreamOutEnable = true;
1454     }
1455 }
1456 
SetVdencSurfaceStateParams(MHW_VDBOX_SURFACE_PARAMS & srcSurfaceParams,MHW_VDBOX_SURFACE_PARAMS & reconSurfaceParams,MHW_VDBOX_SURFACE_PARAMS & ds8xSurfaceParams,MHW_VDBOX_SURFACE_PARAMS & ds4xSurfaceParams)1457 void CodechalVdencHevcState::SetVdencSurfaceStateParams(
1458     MHW_VDBOX_SURFACE_PARAMS& srcSurfaceParams,
1459     MHW_VDBOX_SURFACE_PARAMS& reconSurfaceParams,
1460     MHW_VDBOX_SURFACE_PARAMS& ds8xSurfaceParams,
1461     MHW_VDBOX_SURFACE_PARAMS& ds4xSurfaceParams)
1462 {
1463     // VDENC_SRC_SURFACE_STATE parameters
1464     srcSurfaceParams.dwActualWidth        = ((m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3));
1465     srcSurfaceParams.dwActualHeight       = ((m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1) << (m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3));
1466     srcSurfaceParams.bColorSpaceSelection = (m_hevcSeqParams->InputColorSpace == ECOLORSPACE_P709) ? 1 : 0;
1467 
1468     // VDENC_REF_SURFACE_STATE parameters
1469     reconSurfaceParams.dwActualWidth = srcSurfaceParams.dwActualWidth;
1470     reconSurfaceParams.dwActualHeight = srcSurfaceParams.dwActualHeight;
1471     reconSurfaceParams.dwReconSurfHeight = m_rawSurfaceToPak->dwHeight;
1472 
1473     // VDENC_DS_REF_SURFACE_STATE parameters
1474     MOS_ZeroMemory(&ds8xSurfaceParams, sizeof(ds8xSurfaceParams));
1475     ds8xSurfaceParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
1476     ds8xSurfaceParams.psSurface = m_trackedBuf->Get8xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
1477     ds8xSurfaceParams.ucSurfaceStateId = CODECHAL_MFX_DSRECON_SURFACE_ID;
1478     ds8xSurfaceParams.dwActualWidth = ds8xSurfaceParams.psSurface->dwWidth;
1479     ds8xSurfaceParams.dwActualHeight = ds8xSurfaceParams.psSurface->dwHeight;
1480 
1481     MOS_ZeroMemory(&ds4xSurfaceParams, sizeof(ds4xSurfaceParams));
1482     ds4xSurfaceParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
1483     ds4xSurfaceParams.psSurface = m_trackedBuf->Get4xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
1484     ds4xSurfaceParams.ucSurfaceStateId = CODECHAL_MFX_DSRECON_SURFACE_ID;
1485     ds4xSurfaceParams.dwActualWidth = ds4xSurfaceParams.psSurface->dwWidth;
1486     ds4xSurfaceParams.dwActualHeight = ds4xSurfaceParams.psSurface->dwHeight;
1487 }
1488 
SetVdencPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS & pipeBufAddrParams)1489 void CodechalVdencHevcState::SetVdencPipeBufAddrParams(
1490     MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams)
1491 {
1492     pipeBufAddrParams = {};
1493 
1494     //set MMC flag
1495     if (m_mmcState->IsMmcEnabled())
1496     {
1497         pipeBufAddrParams.bMmcEnabled = true;
1498     }
1499 
1500     pipeBufAddrParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
1501     pipeBufAddrParams.psRawSurface = m_rawSurfaceToPak;
1502     pipeBufAddrParams.ps4xDsSurface = m_trackedBuf->Get4xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
1503     pipeBufAddrParams.ps8xDsSurface = m_trackedBuf->Get8xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
1504     pipeBufAddrParams.presVdencStreamOutBuffer = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, vdencStats);
1505     pipeBufAddrParams.dwVdencStatsStreamOutOffset =  0;
1506     pipeBufAddrParams.presVdencIntraRowStoreScratchBuffer = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, vdencIntraRowStoreScratch);
1507     pipeBufAddrParams.presVdencPakObjCmdStreamOutBuffer = m_resVdencPakObjCmdStreamOutBuffer = &m_resMbCodeSurface;
1508     pipeBufAddrParams.dwNumRefIdxL0ActiveMinus1                                              = m_hevcSliceParams->num_ref_idx_l0_active_minus1;
1509     pipeBufAddrParams.dwNumRefIdxL1ActiveMinus1                                              = m_hevcSliceParams->num_ref_idx_l1_active_minus1;
1510 
1511     if (m_vdencStreamInEnabled)
1512     {
1513         bool useBrcInDeltaQpMap = m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR ||
1514                                   m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR ||
1515                                   m_hevcSeqParams->RateControlMethod == RATECONTROL_QVBR;
1516         if (m_vdencHucUsed && ((m_hevcPicParams->NumROI && !m_vdencNativeROIEnabled) ||
1517             (useBrcInDeltaQpMap && m_encodeParams.bMbQpDataEnabled)))
1518         {
1519             pipeBufAddrParams.presVdencStreamInBuffer = &m_vdencOutputROIStreaminBuffer;
1520         }
1521         else if (m_lookaheadPass)
1522         {
1523             pipeBufAddrParams.presVdencStreamInBuffer = &m_resVdencStreamInBuffer[0];
1524         }
1525         else
1526         {
1527             pipeBufAddrParams.presVdencStreamInBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
1528         }
1529     }
1530 
1531     PCODEC_PICTURE l0RefFrameList = m_hevcSliceParams->RefPicList[LIST_0];
1532     for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++)
1533     {
1534         CODEC_PICTURE refPic = l0RefFrameList[refIdx];
1535 
1536         if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
1537         {
1538             // L0 references
1539             uint8_t refPicIdx                             = m_picIdx[refPic.FrameIdx].ucPicIdx;
1540             pipeBufAddrParams.presVdencReferences[refIdx] = &m_refList[refPicIdx]->sRefReconBuffer.OsResource;
1541 
1542             // 4x/8x DS surface for VDEnc
1543             uint8_t scaledIdx                              = m_refList[refPicIdx]->ucScalingIdx;
1544             pipeBufAddrParams.presVdenc4xDsSurface[refIdx] = &(m_trackedBuf->Get4xDsReconSurface(scaledIdx))->OsResource;
1545             pipeBufAddrParams.presVdenc8xDsSurface[refIdx] = &(m_trackedBuf->Get8xDsReconSurface(scaledIdx))->OsResource;
1546         }
1547     }
1548 
1549     if (!m_lowDelay)
1550     {
1551         PCODEC_PICTURE l1RefFrameList = m_hevcSliceParams->RefPicList[LIST_1];
1552         for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l1_active_minus1; refIdx++)
1553         {
1554             CODEC_PICTURE refPic = l1RefFrameList[refIdx];
1555 
1556             if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
1557             {
1558                 // L1 references
1559                 uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
1560                 pipeBufAddrParams.presVdencReferences[refIdx + m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1] =
1561                     &m_refList[refPicIdx]->sRefReconBuffer.OsResource;
1562 
1563                 // 4x/8x DS surface for VDEnc
1564                 uint8_t scaledIdx = m_refList[refPicIdx]->ucScalingIdx;
1565                 pipeBufAddrParams.presVdenc4xDsSurface[refIdx + m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1] =
1566                     &(m_trackedBuf->Get4xDsReconSurface(scaledIdx))->OsResource;
1567                 pipeBufAddrParams.presVdenc8xDsSurface[refIdx + m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1] =
1568                     &(m_trackedBuf->Get8xDsReconSurface(scaledIdx))->OsResource;
1569             }
1570         }
1571     }
1572 
1573     uint8_t idxForTempMVP = 0xFF;
1574 
1575     if (m_hevcPicParams->CollocatedRefPicIndex != 0xFF && m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC)
1576     {
1577         uint8_t frameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx;
1578         idxForTempMVP = m_refList[frameIdx]->ucScalingIdx;
1579     }
1580 
1581     if (idxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag)
1582     {
1583         // Temporal reference MV index is invalid and so disable the temporal MVP
1584         m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
1585     }
1586     else
1587     {
1588         pipeBufAddrParams.presColMvTempBuffer[0] = m_trackedBuf->GetMvTemporalBuffer(idxForTempMVP);
1589     }
1590 
1591     // Disable temporal MVP for LDB frames which only refer to I frame
1592     if (m_pictureCodingType == I_TYPE)
1593     {
1594         m_currGopIFramePOC = m_hevcPicParams->CurrPicOrderCnt;
1595     }
1596 
1597     if (m_hevcSeqParams->sps_temporal_mvp_enable_flag == 0 && m_hevcSliceParams->slice_temporal_mvp_enable_flag == 1)
1598     {
1599         CODECHAL_ENCODE_NORMALMESSAGE("Attention: temporal MVP flag is inconsistent between seq and slice.");
1600         m_hevcSliceParams->slice_temporal_mvp_enable_flag = 0;
1601     }
1602 
1603     if (!m_hevcPicParams->pps_curr_pic_ref_enabled_flag && m_lowDelay && m_hevcSliceParams->num_ref_idx_l0_active_minus1 == 0
1604         && m_currGopIFramePOC != -1 && m_hevcSliceParams->slice_temporal_mvp_enable_flag != 0)
1605     {
1606         auto idx = m_picIdx[m_hevcSliceParams->RefPicList[0][0].FrameIdx].ucPicIdx;
1607 
1608         if (m_refList[idx]->iFieldOrderCnt[0] == m_currGopIFramePOC)
1609         {
1610             m_hevcSliceParams->slice_temporal_mvp_enable_flag = 0;
1611         }
1612     }
1613 
1614 }
1615 
SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE & sliceStateParams)1616 void CodechalVdencHevcState::SetHcpSliceStateCommonParams(MHW_VDBOX_HEVC_SLICE_STATE& sliceStateParams)
1617 {
1618     CODECHAL_ENCODE_FUNCTION_ENTER;
1619 
1620     CodechalEncodeHevcBase::SetHcpSliceStateCommonParams(sliceStateParams);
1621 
1622     sliceStateParams.bVdencInUse = true;
1623     sliceStateParams.bVdencHucInUse     = m_hevcVdencAcqpEnabled || m_brcEnabled;
1624     sliceStateParams.bWeightedPredInUse = m_hevcVdencWeightedPredEnabled;
1625     sliceStateParams.pVdencBatchBuffer  = &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx];
1626 
1627     // This bit disables Top intra Reference pixel fetch in VDENC mode.
1628     // In PAK only second pass, this bit should be set to one.
1629     // "IntraRefFetchDisable" in HCP SLICE STATE should be set to 0 in first pass and 1 in subsequent passes.
1630     // For dynamic slice, 2nd pass is still VDEnc + PAK pass, not PAK only pass.
1631     sliceStateParams.bIntraRefFetchDisable = m_pakOnlyPass;
1632 }
1633 
AddHcpPakInsertSliceHeader(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_BATCH_BUFFER batchBuffer,PMHW_VDBOX_HEVC_SLICE_STATE params)1634 MOS_STATUS CodechalVdencHevcState::AddHcpPakInsertSliceHeader(
1635     PMOS_COMMAND_BUFFER cmdBuffer,
1636     PMHW_BATCH_BUFFER batchBuffer,
1637     PMHW_VDBOX_HEVC_SLICE_STATE params)
1638 {
1639     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1640 
1641     CODECHAL_ENCODE_FUNCTION_ENTER;
1642 
1643     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1644     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pBsBuffer);
1645     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcSliceParams);
1646 
1647     if (cmdBuffer == nullptr && batchBuffer == nullptr)
1648     {
1649         CODECHAL_ENCODE_ASSERTMESSAGE("There was no valid buffer to add the HW command to.");
1650         return MOS_STATUS_NULL_POINTER;
1651     }
1652 
1653     // Insert slice header
1654     MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
1655     MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
1656     pakInsertObjectParams.bLastHeader = true;
1657     pakInsertObjectParams.bEmulationByteBitsInsert = true;
1658     pakInsertObjectParams.pBatchBufferForPakSlices = batchBuffer;
1659 
1660     // App does the slice header packing, set the skip count passed by the app
1661     pakInsertObjectParams.uiSkipEmulationCheckCount = params->uiSkipEmulationCheckCount;
1662     pakInsertObjectParams.pBsBuffer = params->pBsBuffer;
1663     pakInsertObjectParams.dwBitSize = params->dwLength;
1664     pakInsertObjectParams.dwOffset = params->dwOffset;
1665     pakInsertObjectParams.bVdencInUse = params->bVdencInUse;
1666 
1667     // For HEVC VDEnc Dynamic Slice
1668     PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = params->pEncodeHevcSliceParams;
1669     if (m_hevcSeqParams->SliceSizeControl)
1670     {
1671         pakInsertObjectParams.bLastHeader = false;
1672         pakInsertObjectParams.bEmulationByteBitsInsert = false;
1673         pakInsertObjectParams.dwBitSize = hevcSlcParams->BitLengthSliceHeaderStartingPortion;
1674         pakInsertObjectParams.bResetBitstreamStartingPos = true;
1675     }
1676 
1677     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(
1678         cmdBuffer,
1679         &pakInsertObjectParams));
1680 
1681     if (m_hevcSeqParams->SliceSizeControl)
1682     {
1683         // Send HCP_PAK_INSERT_OBJ command. For dynamic slice, we are skipping the beginning part of slice header.
1684         pakInsertObjectParams.bLastHeader = true;
1685         pakInsertObjectParams.dwBitSize = params->dwLength - hevcSlcParams->BitLengthSliceHeaderStartingPortion;
1686         pakInsertObjectParams.dwOffset += ((hevcSlcParams->BitLengthSliceHeaderStartingPortion + 7) / 8);   // Skips the first 5 bytes which is Start Code + Nal Unit Header
1687         pakInsertObjectParams.bResetBitstreamStartingPos = true;
1688         pakInsertObjectParams.bVdencInUse = params->bVdencInUse;
1689         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(
1690             cmdBuffer,
1691             &pakInsertObjectParams));
1692     }
1693 
1694     return eStatus;
1695 }
1696 
AddHcpWeightOffsetStateCmd(PMOS_COMMAND_BUFFER cmdBuffer,PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams)1697 MOS_STATUS CodechalVdencHevcState::AddHcpWeightOffsetStateCmd(
1698     PMOS_COMMAND_BUFFER cmdBuffer,
1699     PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams)
1700 {
1701     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1702 
1703     CODECHAL_ENCODE_FUNCTION_ENTER;
1704 
1705     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
1706     CODECHAL_ENCODE_CHK_NULL_RETURN(hevcSlcParams);
1707 
1708     MHW_VDBOX_HEVC_WEIGHTOFFSET_PARAMS hcpWeightOffsetParams;
1709     MOS_ZeroMemory(&hcpWeightOffsetParams, sizeof(hcpWeightOffsetParams));
1710 
1711     for (auto k = 0; k < 2; k++) // k=0: LIST_0, k=1: LIST_1
1712     {
1713         // Luma, Chroma offset
1714         for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
1715         {
1716             hcpWeightOffsetParams.LumaOffsets[k][i] = (int16_t)hevcSlcParams->luma_offset[k][i];
1717             // Cb, Cr
1718             for (auto j = 0; j < 2; j++)
1719             {
1720                 hcpWeightOffsetParams.ChromaOffsets[k][i][j] = (int16_t)hevcSlcParams->chroma_offset[k][i][j];
1721             }
1722         }
1723 
1724         // Luma Weight
1725         CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
1726             &hcpWeightOffsetParams.LumaWeights[k],
1727             sizeof(hcpWeightOffsetParams.LumaWeights[k]),
1728             &hevcSlcParams->delta_luma_weight[k],
1729             sizeof(hevcSlcParams->delta_luma_weight[k])));
1730 
1731         // Chroma Weight
1732         CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
1733             &hcpWeightOffsetParams.ChromaWeights[k],
1734             sizeof(hcpWeightOffsetParams.ChromaWeights[k]),
1735             &hevcSlcParams->delta_chroma_weight[k],
1736             sizeof(hevcSlcParams->delta_chroma_weight[k])));
1737     }
1738 
1739     if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_P_SLICE || hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
1740     {
1741         hcpWeightOffsetParams.ucList = LIST_0;
1742         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpWeightOffsetStateCmd(cmdBuffer, nullptr, &hcpWeightOffsetParams));
1743     }
1744 
1745     if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE)
1746     {
1747         hcpWeightOffsetParams.ucList = LIST_1;
1748         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpWeightOffsetStateCmd(cmdBuffer, nullptr, &hcpWeightOffsetParams));
1749     }
1750 
1751     return eStatus;
1752 }
1753 
AddVdencWeightOffsetStateCmd(PMOS_COMMAND_BUFFER cmdBuffer,PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams)1754 MOS_STATUS CodechalVdencHevcState::AddVdencWeightOffsetStateCmd(
1755     PMOS_COMMAND_BUFFER cmdBuffer,
1756     PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams)
1757 {
1758     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1759 
1760     CODECHAL_ENCODE_FUNCTION_ENTER;
1761 
1762     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
1763     CODECHAL_ENCODE_CHK_NULL_RETURN(hevcSlcParams);
1764 
1765     MHW_VDBOX_VDENC_WEIGHT_OFFSET_PARAMS vdencWeightOffsetParams;
1766     MOS_ZeroMemory(&vdencWeightOffsetParams, sizeof(vdencWeightOffsetParams));
1767 
1768     vdencWeightOffsetParams.bWeightedPredEnabled = m_hevcVdencWeightedPredEnabled;
1769 
1770     if (vdencWeightOffsetParams.bWeightedPredEnabled)
1771     {
1772         vdencWeightOffsetParams.dwDenom = 1 << (hevcSlcParams->luma_log2_weight_denom);
1773 
1774         // Luma offset
1775         for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
1776         {
1777             vdencWeightOffsetParams.LumaOffsets[0][i] = (int16_t)hevcSlcParams->luma_offset[0][i];
1778             vdencWeightOffsetParams.LumaOffsets[1][i] = (int16_t)hevcSlcParams->luma_offset[1][i];
1779         }
1780 
1781         // Luma Weight
1782         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(MOS_SecureMemcpy(
1783             &vdencWeightOffsetParams.LumaWeights[0],
1784             sizeof(vdencWeightOffsetParams.LumaWeights[0]),
1785             &hevcSlcParams->delta_luma_weight[0],
1786             sizeof(hevcSlcParams->delta_luma_weight[0])),
1787             "Failed to copy luma weight 0 memory.");
1788 
1789         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(MOS_SecureMemcpy(
1790             &vdencWeightOffsetParams.LumaWeights[1],
1791             sizeof(vdencWeightOffsetParams.LumaWeights[1]),
1792             &hevcSlcParams->delta_luma_weight[1],
1793             sizeof(hevcSlcParams->delta_luma_weight[1])),
1794             "Failed to copy luma weight 1 memory.");
1795     }
1796 
1797     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWeightsOffsetsStateCmd(
1798         cmdBuffer,
1799         nullptr,
1800         &vdencWeightOffsetParams));
1801 
1802     return eStatus;
1803 }
1804 
AddVdencWalkerStateCmd(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_VDBOX_HEVC_SLICE_STATE params)1805 MOS_STATUS CodechalVdencHevcState::AddVdencWalkerStateCmd(
1806     PMOS_COMMAND_BUFFER cmdBuffer,
1807     PMHW_VDBOX_HEVC_SLICE_STATE params)
1808 {
1809     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1810 
1811     CODECHAL_ENCODE_FUNCTION_ENTER;
1812 
1813     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
1814     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
1815 
1816     MHW_VDBOX_VDENC_WALKER_STATE_PARAMS vdencWalkerStateParams;
1817     vdencWalkerStateParams.Mode = CODECHAL_ENCODE_MODE_HEVC;
1818     vdencWalkerStateParams.pHevcEncSeqParams = params->pEncodeHevcSeqParams;
1819     vdencWalkerStateParams.pHevcEncPicParams = params->pEncodeHevcPicParams;
1820     vdencWalkerStateParams.pEncodeHevcSliceParams = params->pEncodeHevcSliceParams;
1821     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWalkerStateCmd(cmdBuffer, &vdencWalkerStateParams));
1822 
1823     return eStatus;
1824 }
1825 
ReadBrcPakStats(PMOS_COMMAND_BUFFER cmdBuffer)1826 MOS_STATUS CodechalVdencHevcState::ReadBrcPakStats(
1827     PMOS_COMMAND_BUFFER cmdBuffer)
1828 {
1829     CODECHAL_ENCODE_FUNCTION_ENTER;
1830 
1831     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
1832 
1833     uint32_t offset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
1834         m_encodeStatusBuf.dwNumPassesOffset +   // Num passes offset
1835         sizeof(uint32_t) * 2;                               // encodeStatus is offset by 2 DWs in the resource
1836 
1837     EncodeReadBrcPakStatsParams   readBrcPakStatsParams;
1838     readBrcPakStatsParams.pHwInterface = m_hwInterface;
1839     readBrcPakStatsParams.presBrcPakStatisticBuffer = &m_vdencBrcBuffers.resBrcPakStatisticBuffer[m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite];
1840     readBrcPakStatsParams.presStatusBuffer = &m_encodeStatusBuf.resStatusBuffer;
1841     readBrcPakStatsParams.dwStatusBufNumPassesOffset = offset;
1842     readBrcPakStatsParams.ucPass = (uint8_t) GetCurrentPass();
1843     readBrcPakStatsParams.VideoContext = m_videoContext;
1844 
1845     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStatistics(
1846         cmdBuffer,
1847         &readBrcPakStatsParams));
1848 
1849     return eStatus;
1850 }
1851 
StoreVdencStatistics(PMOS_COMMAND_BUFFER cmdBuffer)1852 MOS_STATUS CodechalVdencHevcState::StoreVdencStatistics(PMOS_COMMAND_BUFFER cmdBuffer)
1853 {
1854     CODECHAL_ENCODE_FUNCTION_ENTER;
1855 
1856     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1857 
1858     uint32_t offset = sizeof(CodechalVdencHevcLaStats) * m_currLaDataIdx;
1859     MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
1860     MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS));
1861     miCpyMemMemParams.presSrc = m_resVdencStatsBuffer; // 8X8 Normalized intra CU count is in m_resVdencStatsBuffer DW1
1862     miCpyMemMemParams.dwSrcOffset = 4;
1863     miCpyMemMemParams.presDst = &m_vdencLaStatsBuffer;
1864     miCpyMemMemParams.dwDstOffset = offset + CODECHAL_OFFSETOF(CodechalVdencHevcLaStats, intraCuCount);
1865     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
1866 
1867     return eStatus;
1868 }
1869 
StoreLookaheadStatistics(PMOS_COMMAND_BUFFER cmdBuffer)1870 MOS_STATUS CodechalVdencHevcState::StoreLookaheadStatistics(PMOS_COMMAND_BUFFER cmdBuffer)
1871 {
1872     CODECHAL_ENCODE_FUNCTION_ENTER;
1873 
1874     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1875 
1876     if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex())                                                                         \
1877     {
1878         CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum");
1879         eStatus = MOS_STATUS_INVALID_PARAMETER;
1880         return eStatus;
1881     }
1882 
1883     auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
1884     CODECHAL_ENCODE_CHK_NULL_RETURN(mmioRegisters);
1885     uint32_t offset = sizeof(CodechalVdencHevcLaStats) * m_currLaDataIdx;
1886 
1887     MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
1888     MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
1889     miStoreRegMemParams.presStoreBuffer = &m_vdencLaStatsBuffer;
1890     miStoreRegMemParams.dwOffset = offset + CODECHAL_OFFSETOF(CodechalVdencHevcLaStats, frameByteCount);
1891     miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset;
1892     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
1893 
1894     // Calculate header size including LCU header
1895     uint32_t headerBitSize = 0;
1896     for (uint32_t i = 0; i < HEVC_MAX_NAL_UNIT_TYPE; i++)
1897     {
1898         headerBitSize += m_nalUnitParams[i]->uiSize * 8;
1899     }
1900     for (uint32_t i = 0; i < m_numSlices; i++)
1901     {
1902         headerBitSize += m_slcData[i].BitSize;
1903     }
1904 
1905     // Store to headerBitCount in CodechalVdencHevcLaStats
1906     MHW_MI_STORE_DATA_PARAMS storeDataParams;
1907     storeDataParams.pOsResource      = &m_vdencLaStatsBuffer;
1908     storeDataParams.dwResourceOffset = offset + CODECHAL_OFFSETOF(CodechalVdencHevcLaStats, headerBitCount);
1909     storeDataParams.dwValue          = headerBitSize;
1910     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
1911 
1912     auto                            mmioRegistersMfx = m_mfxInterface->GetMmioRegisters(m_vdboxIndex);
1913     MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
1914     MHW_MI_FLUSH_DW_PARAMS          flushDwParams;
1915     MHW_MI_ATOMIC_PARAMS            atomicParams;
1916 
1917     MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
1918     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
1919     MOS_ZeroMemory(&atomicParams, sizeof(atomicParams));
1920     // VCS_GPR0_Lo = LCUHdrBits
1921     miLoadRegMemParams.presStoreBuffer = &m_resFrameStatStreamOutBuffer;  // LCUHdrBits is in m_resFrameStatStreamOutBuffer DW4
1922     miLoadRegMemParams.dwOffset        = 4 * sizeof(uint32_t);
1923     miLoadRegMemParams.dwRegister      = mmioRegistersMfx->generalPurposeRegister0LoOffset;  // VCS_GPR0_Lo
1924     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(cmdBuffer, &miLoadRegMemParams));
1925 
1926     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
1927         cmdBuffer,
1928         &flushDwParams));
1929 
1930     // frame headerBitCount += LCUHdrBits
1931     atomicParams.pOsResource      = &m_vdencLaStatsBuffer;
1932     atomicParams.dwResourceOffset = offset + CODECHAL_OFFSETOF(CodechalVdencHevcLaStats, headerBitCount);
1933     atomicParams.dwDataSize       = sizeof(uint32_t);
1934     atomicParams.Operation        = MHW_MI_ATOMIC_ADD;
1935     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd(
1936         cmdBuffer,
1937         &atomicParams));
1938 
1939     CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreVdencStatistics(cmdBuffer));
1940 
1941     return eStatus;
1942 }
1943 
ReadSliceSize(PMOS_COMMAND_BUFFER cmdBuffer)1944 MOS_STATUS CodechalVdencHevcState::ReadSliceSize(PMOS_COMMAND_BUFFER cmdBuffer)
1945 {
1946     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1947 
1948     CODECHAL_ENCODE_FUNCTION_ENTER;
1949 
1950     // Report slice size to app only when dynamic slice is enabled
1951     if (!m_hevcSeqParams->SliceSizeControl)
1952     {
1953         return eStatus;
1954     }
1955 
1956     MOS_LOCK_PARAMS lockFlags;
1957     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1958     lockFlags.WriteOnly = true;
1959 
1960     uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize + sizeof(uint32_t) * 2);  // encodeStatus is offset by 2 DWs in the resource
1961     uint32_t sizeOfSliceSizesBuffer = MOS_ALIGN_CEIL(CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6 * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
1962 
1963     if (IsFirstPass())
1964     {
1965         // Create/ Initialize slice report buffer once per frame, to be used across passes
1966         if (Mos_ResourceIsNull(&m_resSliceReport[m_encodeStatusBuf.wCurrIndex]))
1967         {
1968             MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
1969             MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
1970             allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
1971             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
1972             allocParamsForBufferLinear.Format = Format_Buffer;
1973             allocParamsForBufferLinear.dwBytes = sizeOfSliceSizesBuffer;
1974 
1975             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
1976                 m_osInterface,
1977                 &allocParamsForBufferLinear,
1978                 &m_resSliceReport[m_encodeStatusBuf.wCurrIndex]),
1979                 "Failed to create HEVC VDEnc Slice Report Buffer ");
1980         }
1981 
1982         // Clear slice size structure to be sent in EncodeStatusReport buffer
1983         uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex], &lockFlags);
1984         CODECHAL_ENCODE_CHK_NULL_RETURN(data);
1985         MOS_ZeroMemory(data, sizeOfSliceSizesBuffer);
1986         m_osInterface->pfnUnlockResource(m_osInterface, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex]);
1987 
1988         // Set slice size pointer in slice size structure
1989         MHW_MI_FLUSH_DW_PARAMS  miFlushDwParams;
1990         MOS_ZeroMemory(&miFlushDwParams, sizeof(miFlushDwParams));
1991         miFlushDwParams.pOsResource      = &m_encodeStatusBuf.resStatusBuffer;
1992         miFlushDwParams.dwResourceOffset = CODECHAL_OFFSETOF(EncodeStatusSliceReport, pSliceSize) + baseOffset + m_encodeStatusBuf.dwSliceReportOffset;
1993         miFlushDwParams.dwDataDW1        = (uint32_t)((uint64_t)&m_resSliceReport[m_encodeStatusBuf.wCurrIndex] & 0xFFFFFFFF);
1994         miFlushDwParams.dwDataDW2        = (uint32_t)(((uint64_t)&m_resSliceReport[m_encodeStatusBuf.wCurrIndex] & 0xFFFFFFFF00000000) >> 32);
1995         miFlushDwParams.bQWordEnable     = 1;
1996         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(
1997             cmdBuffer,
1998             &miFlushDwParams));
1999     }
2000 
2001     // Copy Slize size data buffer from PAK to be sent back to App
2002     CODECHAL_ENCODE_CHK_STATUS_RETURN(CopyDataBlock(cmdBuffer,
2003         &m_resLcuBaseAddressBuffer, 0, &m_resSliceReport[m_encodeStatusBuf.wCurrIndex], 0, sizeOfSliceSizesBuffer));
2004 
2005     MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
2006     MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS));
2007     miCpyMemMemParams.presSrc       = &m_resFrameStatStreamOutBuffer; // Slice size overflow is in m_resFrameStatStreamOutBuffer DW0[16]
2008     miCpyMemMemParams.dwSrcOffset   = 0;
2009     miCpyMemMemParams.presDst       = &m_encodeStatusBuf.resStatusBuffer;
2010     miCpyMemMemParams.dwDstOffset   = baseOffset + m_encodeStatusBuf.dwSliceReportOffset;     // Slice size overflow is at DW0 EncodeStatusSliceReport
2011     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
2012 
2013 
2014     MOS_ZeroMemory(&miCpyMemMemParams, sizeof(MHW_MI_COPY_MEM_MEM_PARAMS));
2015     miCpyMemMemParams.presSrc       = m_resSliceCountBuffer; // Number of slice sizes are stored in this buffer. Updated at runtime
2016     miCpyMemMemParams.dwSrcOffset   = 0;
2017     miCpyMemMemParams.presDst       = &m_encodeStatusBuf.resStatusBuffer;
2018     miCpyMemMemParams.dwDstOffset   = baseOffset + m_encodeStatusBuf.dwSliceReportOffset + 1;     // Num slices is located at DW1 EncodeStatusSliceReport
2019     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams));
2020 
2021     return eStatus;
2022 }
2023 
CopyDataBlock(PMOS_COMMAND_BUFFER cmdBuffer,PMOS_RESOURCE sourceSurface,uint32_t sourceOffset,PMOS_RESOURCE destSurface,uint32_t destOffset,uint32_t copySize)2024 MOS_STATUS CodechalVdencHevcState::CopyDataBlock(
2025     PMOS_COMMAND_BUFFER cmdBuffer,
2026     PMOS_RESOURCE sourceSurface,
2027     uint32_t sourceOffset,
2028     PMOS_RESOURCE destSurface,
2029     uint32_t destOffset,
2030     uint32_t copySize)
2031 {
2032     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2033 
2034     CODECHAL_ENCODE_FUNCTION_ENTER;
2035 
2036     CodechalHucStreamoutParams hucStreamOutParams;
2037     MOS_ZeroMemory(&hucStreamOutParams, sizeof(hucStreamOutParams));
2038 
2039     // Ind Obj Addr command
2040     hucStreamOutParams.dataBuffer            = sourceSurface;
2041     hucStreamOutParams.dataSize              = copySize + sourceOffset;
2042     hucStreamOutParams.dataOffset            = MOS_ALIGN_FLOOR(sourceOffset, CODECHAL_PAGE_SIZE);
2043     hucStreamOutParams.streamOutObjectBuffer = destSurface;
2044     hucStreamOutParams.streamOutObjectSize   = copySize + destOffset;
2045     hucStreamOutParams.streamOutObjectOffset = MOS_ALIGN_FLOOR(destOffset, CODECHAL_PAGE_SIZE);
2046 
2047     // Stream object params
2048     hucStreamOutParams.indStreamInLength     = copySize;
2049     hucStreamOutParams.inputRelativeOffset   = sourceOffset - hucStreamOutParams.dataOffset;
2050     hucStreamOutParams.outputRelativeOffset  = destOffset - hucStreamOutParams.streamOutObjectOffset;
2051 
2052 
2053     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->PerformHucStreamOut(
2054         &hucStreamOutParams,
2055         cmdBuffer));
2056 
2057     // wait Huc completion (use HEVC bit for now)
2058     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
2059     MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
2060     vdPipeFlushParams.Flags.bFlushHEVC       = 1;
2061     vdPipeFlushParams.Flags.bWaitDoneHEVC    = 1;
2062     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams));
2063 
2064     // Flush the engine to ensure memory written out
2065     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2066     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2067     flushDwParams.bVideoPipelineCacheInvalidate = true;
2068     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
2069 
2070     return eStatus;
2071 }
2072 
ExecutePictureLevel()2073 MOS_STATUS CodechalVdencHevcState::ExecutePictureLevel()
2074 {
2075     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2076 
2077     CODECHAL_ENCODE_FUNCTION_ENTER;
2078 
2079     PerfTagSetting perfTag;
2080     perfTag.Value             = 0;
2081     perfTag.Mode              = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
2082     perfTag.CallType          = CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE;
2083     perfTag.PictureCodingType = m_pictureCodingType;
2084     m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
2085 
2086     if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex())                                                                         \
2087     {
2088         CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum");
2089         eStatus = MOS_STATUS_INVALID_PARAMETER;
2090         return eStatus;
2091     }
2092 
2093     CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifyCommandBufferSize());
2094 
2095     if (!m_singleTaskPhaseSupportedInPak)
2096     {
2097         // Command buffer or patch list size are too small and so we cannot submit multiple pass of PAKs together
2098         m_firstTaskInPhase = true;
2099         m_lastTaskInPhase = true;
2100     }
2101 
2102     // PAK pass type for each pass: VDEnc+PAK vs. PAK-only
2103     SetPakPassType();
2104 
2105     bool pakOnlyMultipassEnable;
2106 
2107     pakOnlyMultipassEnable = m_pakOnlyPass;
2108 
2109     bool panicEnabled = (m_brcEnabled) && (m_panicEnable) && (GetCurrentPass() == 1) && !m_pakOnlyPass;
2110 
2111     uint32_t rollingILimit = (m_hevcPicParams->bEnableRollingIntraRefresh == ROLLING_I_ROW) ? MOS_ROUNDUP_DIVIDE(m_frameHeight, 32) : (m_hevcPicParams->bEnableRollingIntraRefresh == ROLLING_I_COLUMN) ? MOS_ROUNDUP_DIVIDE(m_frameWidth, 32) : 0;
2112 
2113     m_refList[m_currReconstructedPic.FrameIdx]->rollingIntraRefreshedPosition =
2114         CodecHal_Clip3(0, rollingILimit, m_hevcPicParams->IntraInsertionLocation + m_hevcPicParams->IntraInsertionSize);
2115 
2116     // For ACQP / BRC, update pic params rolling intra reference location here before cmd buffer is prepared.
2117     PCODEC_PICTURE l0RefFrameList = m_hevcSliceParams->RefPicList[LIST_0];
2118     for (uint8_t refIdx = 0; refIdx <= m_hevcSliceParams->num_ref_idx_l0_active_minus1; refIdx++)
2119     {
2120         CODEC_PICTURE refPic = l0RefFrameList[refIdx];
2121 
2122         if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
2123         {
2124             uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
2125             m_hevcPicParams->RollingIntraReferenceLocation[refIdx] = m_refList[refPicIdx]->rollingIntraRefreshedPosition;
2126         }
2127     }
2128 
2129     // clean-up per VDBOX semaphore memory
2130     int32_t currentPass = GetCurrentPass();
2131 
2132     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->CmdInitializerSetConstData(
2133         m_osInterface,
2134         m_miInterface,
2135         m_vdencInterface,
2136         m_hevcSeqParams,
2137         m_hevcPicParams,
2138         m_hevcSliceParams,
2139         m_pakOnlyPass,
2140         m_hevcVdencAcqpEnabled,
2141         m_brcEnabled,
2142         m_vdencStreamInEnabled,
2143         m_vdencNativeROIEnabled,
2144         m_brcAdaptiveRegionBoostEnable,
2145         m_hevcVdencRoundingEnabled,
2146         panicEnabled,
2147         currentPass));
2148 
2149     // Send HuC BRC Init/ Update only on first pipe.
2150     if (m_vdencHucUsed)
2151     {
2152         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->CmdInitializerExecute(true, &m_vdencReadBatchBuffer[m_currRecycledBufIdx][currentPass]));
2153 
2154         if (!m_singleTaskPhaseSupported)
2155         {
2156             //Reset earlier set PAK perf tag
2157             m_osInterface->pfnResetPerfBufferID(m_osInterface);
2158 
2159             // STF: HuC+VDEnc+PAK single BB, non-STF: HuC Init/HuC Update/(VDEnc+PAK) in separate BBs
2160             perfTag.Value                = 0;
2161             perfTag.Mode                 = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
2162             perfTag.CallType             = CODECHAL_ENCODE_PERFTAG_CALL_BRC_INIT_RESET;
2163             perfTag.PictureCodingType    = m_pictureCodingType;
2164             m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
2165         }
2166         m_resVdencBrcUpdateDmemBufferPtr[0] = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakInfo);
2167 
2168         // Invoke BRC init/reset FW
2169         if (m_brcInit || m_brcReset)
2170         {
2171             CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcInitReset());
2172         }
2173 
2174         if (!m_singleTaskPhaseSupported)
2175         {
2176             //Reset performance buffer used for BRC init
2177             m_osInterface->pfnResetPerfBufferID(m_osInterface);
2178             // STF: HuC+VDEnc+PAK single BB, non-STF: HuC Init/HuC Update/(VDEnc+PAK) in separate BBs
2179             perfTag.Value                = 0;
2180             perfTag.Mode                 = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
2181             perfTag.CallType             = CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE;
2182             perfTag.PictureCodingType    = m_pictureCodingType;
2183             m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
2184         }
2185 
2186         // Invoke BRC update FW
2187         CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcUpdate());
2188         m_brcInit = m_brcReset = false;
2189         if (!m_singleTaskPhaseSupported)
2190         {
2191             //reset performance buffer used for BRC update
2192             m_osInterface->pfnResetPerfBufferID(m_osInterface);
2193         }
2194     }
2195     else
2196     {
2197         ConstructBatchBufferHuCCQP(&m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource);
2198     }
2199 
2200     MOS_COMMAND_BUFFER cmdBuffer;
2201     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2202 
2203     if (!m_singleTaskPhaseSupported)
2204     {
2205         //PAK Perf Tag
2206         perfTag.Value             = 0;
2207         perfTag.Mode              = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
2208         perfTag.CallType          = CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE;
2209         perfTag.PictureCodingType = m_pictureCodingType;
2210         m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
2211     }
2212 
2213     if ((!m_singleTaskPhaseSupported || (m_firstTaskInPhase && !m_hevcVdencAcqpEnabled)) )
2214     {
2215         // Send command buffer header at the beginning (OS dependent)
2216         // frame tracking tag is only added in the last command buffer header
2217         bool requestFrameTracking = m_singleTaskPhaseSupported ?
2218             m_firstTaskInPhase :
2219             m_lastTaskInPhase;
2220 
2221         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
2222     }
2223 
2224     // ACQP + SSC, ACQP + WP, BRC, BRC + SSC, BRC + WP
2225     // 2nd pass for SSC, WP, BRC needs conditional batch buffer end cmd, which is decided by HUC_STATUS output from HuC
2226     if (currentPass && m_vdencHuCConditional2ndPass && (currentPass != m_uc2NdSaoPass))
2227     {
2228         MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
2229 
2230         // Insert conditional batch buffer end
2231         MOS_ZeroMemory(
2232             &miConditionalBatchBufferEndParams,
2233             sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
2234 
2235         // VDENC uses HuC FW generated semaphore for conditional 2nd pass
2236         miConditionalBatchBufferEndParams.presSemaphoreBuffer =
2237             &m_resPakMmioBuffer;
2238 
2239         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
2240             &cmdBuffer,
2241             &miConditionalBatchBufferEndParams));
2242 
2243             auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
2244             CODECHAL_ENCODE_CHK_NULL_RETURN(mmioRegisters);
2245             uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2;  // encodeStatus is offset by 2 DWs in the resource
2246 
2247             // Write back the HCP image control register for RC6 may clean it out
2248             MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
2249             MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
2250             miLoadRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2251             miLoadRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOffset;
2252             miLoadRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2253             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(&cmdBuffer, &miLoadRegMemParams));
2254 
2255             MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
2256             MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2257             miStoreRegMemParams.presStoreBuffer = &m_vdencBrcBuffers.resBrcPakStatisticBuffer[m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite];
2258             miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
2259             miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2260             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2261 
2262             MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2263             miStoreRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer;
2264             miStoreRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset;
2265             miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2266             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams));
2267     }
2268 
2269     if (!currentPass && m_osInterface->bTagResourceSync)
2270     {
2271         // This is a short term solution to solve the sync tag issue: the sync tag write for PAK is inserted at the end of 2nd pass PAK BB
2272         // which may be skipped in multi-pass PAK enabled case. The idea here is to insert the previous frame's tag at the beginning
2273         // of the BB and keep the current frame's tag at the end of the BB. There will be a delay for tag update but it should be fine
2274         // as long as Dec/VP/Enc won't depend on this PAK so soon.
2275 
2276         PMOS_RESOURCE globalGpuContextSyncTagBuffer = nullptr;
2277 
2278         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource(
2279             m_osInterface,
2280             globalGpuContextSyncTagBuffer));
2281         CODECHAL_ENCODE_CHK_NULL_RETURN(globalGpuContextSyncTagBuffer);
2282 
2283         MHW_MI_STORE_DATA_PARAMS params;
2284         params.pOsResource = globalGpuContextSyncTagBuffer;
2285         params.dwResourceOffset = m_osInterface->pfnGetGpuStatusTagOffset(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
2286         uint32_t value = m_osInterface->pfnGetGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
2287         params.dwValue = (value > 0) ? (value - 1) : 0;
2288         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &params));
2289     }
2290 
2291     if (!m_lookaheadPass || m_swLaMode)
2292     {
2293         CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2294     }
2295 
2296     MHW_VDBOX_SURFACE_PARAMS srcSurfaceParams;
2297     SetHcpSrcSurfaceParams(srcSurfaceParams);
2298 
2299     MHW_VDBOX_SURFACE_PARAMS reconSurfaceParams;
2300     SetHcpReconSurfaceParams(reconSurfaceParams);
2301 
2302     *m_pipeBufAddrParams = {};
2303     SetHcpPipeBufAddrParams(*m_pipeBufAddrParams);
2304     m_pipeBufAddrParams->pRawSurfParam = &srcSurfaceParams;
2305     m_pipeBufAddrParams->pDecodedReconParam = &reconSurfaceParams;
2306     m_mmcState->SetPipeBufAddr(m_pipeBufAddrParams, &cmdBuffer);
2307 
2308     SetHcpPipeModeSelectParams(*m_pipeModeSelectParams);
2309 
2310     // HuC modifies HCP pipe mode select command, when 2nd pass SAO is required
2311     if (m_vdencHucUsed && m_b2NdSaoPassNeeded)
2312     {
2313         // current location to add cmds in 2nd level batch buffer
2314         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].iCurrent = 0;
2315         // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
2316         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = 0;
2317 
2318         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]));
2319 
2320         // save offset for next 2nd level batch buffer usage
2321         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
2322     }
2323     else
2324     {
2325         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&cmdBuffer, m_pipeModeSelectParams));
2326     }
2327 
2328     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &srcSurfaceParams));
2329 
2330     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &reconSurfaceParams));
2331 
2332     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(&cmdBuffer, m_pipeBufAddrParams));
2333 
2334     MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
2335     SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
2336     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams));
2337 
2338     MHW_VDBOX_QM_PARAMS fqmParams, qmParams;
2339     SetHcpQmStateParams(fqmParams, qmParams);
2340     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpFqmStateCmd(&cmdBuffer, &fqmParams));
2341     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpQmStateCmd(&cmdBuffer, &qmParams));
2342 
2343     SetVdencPipeModeSelectParams(*m_pipeModeSelectParams);
2344     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(&cmdBuffer, m_pipeModeSelectParams));
2345 
2346     MHW_VDBOX_SURFACE_PARAMS dsSurfaceParams[2];
2347     SetVdencSurfaceStateParams(srcSurfaceParams, reconSurfaceParams, dsSurfaceParams[0], dsSurfaceParams[1]);
2348     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencSrcSurfaceStateCmd(&cmdBuffer, &srcSurfaceParams));
2349     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &reconSurfaceParams));
2350     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencDsRefSurfaceStateCmd(&cmdBuffer, &dsSurfaceParams[0], 2));
2351 
2352     SetVdencPipeBufAddrParams(*m_pipeBufAddrParams);
2353     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencPipeBufAddrCmd(&cmdBuffer, m_pipeBufAddrParams));
2354 
2355     MHW_VDBOX_HEVC_PIC_STATE picStateParams;
2356     SetHcpPicStateParams(picStateParams);
2357 
2358     if (m_vdencHucUsed)
2359     {
2360         // 2nd level batch buffer
2361         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
2362         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]));
2363 
2364         // save offset for next 2nd level batch buffer usage
2365         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset += m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
2366     }
2367     else
2368     {
2369         // current location to add cmds in 2nd level batch buffer
2370         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].iCurrent = 0;
2371         // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
2372         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = 0;
2373 
2374         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx]));
2375         m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
2376     }
2377 
2378     // Send HEVC_VP9_RDOQ_STATE command
2379     if (m_hevcRdoqEnabled)
2380     {
2381         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&cmdBuffer, &picStateParams));
2382     }
2383 
2384     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2385 
2386     return eStatus;
2387 }
2388 
SendHwSliceEncodeCommand(PMOS_COMMAND_BUFFER cmdBuffer,PMHW_VDBOX_HEVC_SLICE_STATE params)2389 MOS_STATUS CodechalVdencHevcState::SendHwSliceEncodeCommand(
2390     PMOS_COMMAND_BUFFER cmdBuffer,
2391     PMHW_VDBOX_HEVC_SLICE_STATE params)
2392 {
2393     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2394 
2395     CODECHAL_ENCODE_FUNCTION_ENTER;
2396 
2397     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2398     CODECHAL_ENCODE_CHK_NULL_RETURN(params);
2399     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pHevcPicIdx);
2400     CODECHAL_ENCODE_CHK_NULL_RETURN(params->presDataBuffer);
2401     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcSeqParams);
2402     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcPicParams);
2403     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcSliceParams);
2404     CODECHAL_ENCODE_CHK_NULL_RETURN(params->pBsBuffer);
2405     CODECHAL_ENCODE_CHK_NULL_RETURN(params->ppNalUnitParams);
2406 
2407     PCODEC_HEVC_ENCODE_PICTURE_PARAMS hevcPicParams = params->pEncodeHevcPicParams;
2408     PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = params->pEncodeHevcSliceParams;
2409 
2410     // VDENC does not use batch buffer for slice state
2411     // add HCP_REF_IDX command
2412     CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpRefIdxCmd(cmdBuffer, nullptr, params));
2413 
2414     if (params->bVdencHucInUse)
2415     {
2416         // 2nd level batch buffer
2417         PMHW_BATCH_BUFFER secondLevelBatchBufferUsed = params->pVdencBatchBuffer;
2418         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(cmdBuffer, secondLevelBatchBufferUsed));
2419     }
2420     else
2421     {
2422         // Weighted Prediction
2423         // This slice level command is issued, if the weighted_pred_flag or weighted_bipred_flag equals one.
2424         // If zero, then this command is not issued.
2425         if (params->bWeightedPredInUse)
2426         {
2427             CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpWeightOffsetStateCmd(cmdBuffer, hevcSlcParams));
2428         }
2429 
2430         // add HEVC Slice state commands
2431         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSliceStateCmd(cmdBuffer, params));
2432 
2433         // add HCP_PAK_INSERT_OBJECTS command
2434         CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPakInsertNALUs(cmdBuffer, params->pVdencBatchBuffer, params));
2435 
2436         CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPakInsertSliceHeader(cmdBuffer, params->pVdencBatchBuffer, params));
2437 
2438         // Send VDENC_WEIGHT_OFFSETS_STATE command
2439         CODECHAL_ENCODE_CHK_STATUS_RETURN(AddVdencWeightOffsetStateCmd(cmdBuffer, hevcSlcParams));
2440     }
2441 
2442     // Send VDENC_WALKER_STATE command
2443     CODECHAL_ENCODE_CHK_STATUS_RETURN(AddVdencWalkerStateCmd(cmdBuffer, params));
2444 
2445     return eStatus;
2446 }
2447 
ExecuteSliceLevel()2448 MOS_STATUS CodechalVdencHevcState::ExecuteSliceLevel()
2449 {
2450     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2451 
2452     CODECHAL_ENCODE_FUNCTION_ENTER;
2453 
2454     CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBatchBufferForPakSlices());
2455 
2456     MOS_COMMAND_BUFFER cmdBuffer;
2457     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2458 
2459     CODECHAL_ENCODE_CHK_NULL_RETURN(m_sliceStateParams);
2460     SetHcpSliceStateCommonParams(*m_sliceStateParams);
2461 
2462     // starting location for executing slice level cmds
2463     m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
2464 
2465     PCODEC_ENCODER_SLCDATA slcData = m_slcData;
2466     for (uint32_t startLcu = 0, slcCount = 0; slcCount < m_numSlices; slcCount++)
2467     {
2468         if (IsFirstPass())
2469         {
2470             slcData[slcCount].CmdOffset = startLcu * (m_hcpInterface->GetHcpPakObjSize()) * sizeof(uint32_t);
2471         }
2472 
2473         SetHcpSliceStateParams(*m_sliceStateParams, slcData, slcCount);
2474 
2475         CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHwSliceEncodeCommand(&cmdBuffer, m_sliceStateParams));
2476 
2477         startLcu += m_hevcSliceParams[slcCount].NumLCUsInSlice;
2478 
2479         m_batchBufferForPakSlicesStartOffset = (uint32_t)m_batchBufferForPakSlices[m_currPakSliceIdx].iCurrent;
2480 
2481         if (m_hevcVdencAcqpEnabled || m_brcEnabled)
2482         {
2483             // save offset for next 2nd level batch buffer usage
2484             // This is because we don't know how many times HCP_WEIGHTOFFSET_STATE & HCP_PAK_INSERT_OBJECT will be inserted for each slice
2485             // dwVdencBatchBufferPerSliceConstSize: constant size for each slice
2486             // m_vdencBatchBufferPerSliceVarSize:   variable size for each slice
2487             m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].dwOffset += m_hwInterface->m_vdencBatchBufferPerSliceConstSize + m_vdencBatchBufferPerSliceVarSize[slcCount];
2488         }
2489 
2490         // Send VD_PIPELINE_FLUSH command
2491         MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2492         MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2493         vdPipelineFlushParams.Flags.bWaitDoneMFX = 1;
2494         vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1;
2495         vdPipelineFlushParams.Flags.bFlushVDENC = 1;
2496         vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2497         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
2498     }
2499 
2500     if (m_useBatchBufferForPakSlices)
2501     {
2502         CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_UnlockBb(
2503             m_osInterface,
2504             &m_batchBufferForPakSlices[m_currPakSliceIdx],
2505             m_lastTaskInPhase));
2506     }
2507 
2508     // Insert end of sequence/stream if set
2509     if (m_lastPicInStream || m_lastPicInSeq)
2510     {
2511         MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams;
2512         MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams));
2513         pakInsertObjectParams.bLastPicInSeq = m_lastPicInSeq;
2514         pakInsertObjectParams.bLastPicInStream = m_lastPicInStream;
2515         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&cmdBuffer, &pakInsertObjectParams));
2516     }
2517 
2518     // Send MI_FLUSH command
2519     MHW_MI_FLUSH_DW_PARAMS flushDwParams;
2520     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2521     flushDwParams.bVideoPipelineCacheInvalidate = true;
2522     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2523 
2524     // Send VD_PIPELINE_FLUSH command
2525     MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
2526     MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
2527     vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
2528     vdPipelineFlushParams.Flags.bFlushHEVC = 1;
2529     vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
2530     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
2531 
2532     // Send MI_FLUSH command
2533     MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
2534     flushDwParams.bVideoPipelineCacheInvalidate = true;
2535     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
2536 
2537     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
2538 
2539     // BRC PAK statistics different for each pass
2540     if (m_brcEnabled)
2541     {
2542         uint32_t offset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
2543             m_encodeStatusBuf.dwNumPassesOffset +   // Num passes offset
2544             sizeof(uint32_t) * 2;                               // encodeStatus is offset by 2 DWs in the resource
2545 
2546         EncodeReadBrcPakStatsParams readBrcPakStatsParams;
2547         readBrcPakStatsParams.pHwInterface = m_hwInterface;
2548         readBrcPakStatsParams.presBrcPakStatisticBuffer = &m_vdencBrcBuffers.resBrcPakStatisticBuffer[m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite];
2549         readBrcPakStatsParams.presStatusBuffer = &m_encodeStatusBuf.resStatusBuffer;
2550         readBrcPakStatsParams.dwStatusBufNumPassesOffset = offset;
2551         readBrcPakStatsParams.ucPass = (uint8_t) GetCurrentPass();
2552         readBrcPakStatsParams.VideoContext = m_videoContext;
2553 
2554         CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStatistics(
2555             &cmdBuffer,
2556             &readBrcPakStatsParams));
2557     }
2558 
2559     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(&cmdBuffer));
2560     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSliceSize(&cmdBuffer));
2561     CODECHAL_ENCODE_CHK_STATUS_RETURN(PrepareHWMetaData(&cmdBuffer));
2562 
2563     if (m_lookaheadPass)
2564     {
2565         CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreLookaheadStatistics(&cmdBuffer));
2566     }
2567 #if USE_CODECHAL_DEBUG_TOOL
2568     if (m_brcEnabled && m_enableFakeHrdSize)
2569     {
2570         uint32_t sizeInByte = (m_pictureCodingType == I_TYPE) ? m_fakeIFrameHrdSize : m_fakePBFrameHrdSize;
2571         CODECHAL_ENCODE_CHK_STATUS_RETURN(ModifyEncodedFrameSizeWithFakeHeaderSize(
2572             &cmdBuffer,
2573             sizeInByte,
2574             m_resVdencBrcUpdateDmemBufferPtr[0],
2575             0,
2576             &m_resFrameStatStreamOutBuffer,
2577             sizeof(uint32_t) * 4));
2578     }
2579 #endif
2580 
2581     if (!m_lookaheadPass || m_swLaMode)
2582     {
2583         CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
2584     }
2585 
2586     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2587     {
2588         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
2589     }
2590 
2591     std::string pakPassName = "PAK_PASS[" + std::to_string(GetCurrentPass())+"]";
2592     CODECHAL_DEBUG_TOOL(
2593         CODECHAL_ENCODE_CHK_STATUS_RETURN( m_debugInterface->DumpCmdBuffer(
2594             &cmdBuffer,
2595             CODECHAL_NUM_MEDIA_STATES,
2596             pakPassName.data()));)
2597 
2598     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
2599 
2600     if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
2601     {
2602         bool renderingFlags = m_videoContextUsesNullHw;
2603 
2604         CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
2605 
2606         CODECHAL_DEBUG_TOOL(
2607             if (m_mmcState)
2608             {
2609                 m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
2610             }
2611         )
2612 
2613         if (IsLastPass() &&
2614             m_signalEnc &&
2615             m_currRefSync &&
2616             !Mos_ResourceIsNull(&m_currRefSync->resSyncObject))
2617         {
2618             // signal semaphore
2619             MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
2620             syncParams.GpuContext = m_videoContext;
2621             syncParams.presSyncResource = &m_currRefSync->resSyncObject;
2622 
2623             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
2624             m_currRefSync->uiSemaphoreObjCount++;
2625             m_currRefSync->bInUsed = true;
2626         }
2627     }
2628 
2629     // HuC FW outputs are ready at this point if single task phase is enabled
2630     if (m_vdencHucUsed && m_singleTaskPhaseSupported)
2631     {
2632         // HuC Output STF=1
2633         CODECHAL_DEBUG_TOOL(DumpHucBrcUpdate(false));
2634     }
2635 
2636     // Reset parameters for next PAK execution
2637     if (IsLastPass())
2638     {
2639         if (!m_singleTaskPhaseSupported)
2640         {
2641             m_osInterface->pfnResetPerfBufferID(m_osInterface);
2642         }
2643 
2644         m_currPakSliceIdx = (m_currPakSliceIdx + 1) % CODECHAL_HEVC_NUM_PAK_SLICE_BATCH_BUFFERS;
2645 
2646         if (m_hevcSeqParams->ParallelBRC)
2647         {
2648             m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite =
2649                 (m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
2650         }
2651 
2652         m_newPpsHeader = 0;
2653         m_newSeqHeader = 0;
2654         m_frameNum++;
2655     }
2656 
2657     return eStatus;
2658 }
2659 
ReadHcpStatus(PMOS_COMMAND_BUFFER cmdBuffer)2660 MOS_STATUS CodechalVdencHevcState::ReadHcpStatus(PMOS_COMMAND_BUFFER cmdBuffer)
2661 {
2662     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2663 
2664     CODECHAL_ENCODE_FUNCTION_ENTER;
2665 
2666     CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
2667 
2668     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::ReadHcpStatus(cmdBuffer));
2669 
2670     auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
2671     CODECHAL_ENCODE_CHK_NULL_RETURN(mmioRegisters);
2672     // Slice Size Conformance
2673     if (m_hevcSeqParams->SliceSizeControl)
2674     {
2675         MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
2676         MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2677         miStoreRegMemParams.presStoreBuffer = m_resSliceCountBuffer;
2678         miStoreRegMemParams.dwOffset = 0;
2679         miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncSliceCountRegOffset;
2680         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2681 
2682         MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2683         miStoreRegMemParams.presStoreBuffer = m_resVdencModeTimerBuffer;
2684         miStoreRegMemParams.dwOffset = 0;
2685         miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncVdencModeTimerRegOffset;
2686         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2687     }
2688 
2689     if (m_vdencHucUsed)
2690     {
2691         // Store PAK frameSize MMIO to PakInfo buffer
2692         MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
2693         MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
2694         miStoreRegMemParams.presStoreBuffer = m_resVdencBrcUpdateDmemBufferPtr[0];
2695         miStoreRegMemParams.dwOffset = 0;
2696         miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset;
2697         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
2698     }
2699 
2700     CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadImageStatus(cmdBuffer))
2701 
2702     return eStatus;
2703 }
2704 
SetSequenceStructs()2705 MOS_STATUS CodechalVdencHevcState::SetSequenceStructs()
2706 {
2707     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2708 
2709     CODECHAL_ENCODE_FUNCTION_ENTER;
2710 
2711     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::SetSequenceStructs());
2712 
2713     switch (m_hevcSeqParams->TargetUsage)
2714     {
2715         case 1: case 2:                         // Quality mode
2716             m_hevcSeqParams->TargetUsage = 1;
2717             break;
2718         case 3: case 4: case 5:                 // Normal mode
2719             m_hevcSeqParams->TargetUsage = 4;
2720             break;
2721         case 6: case 7:                         // Speed mode
2722             m_hevcSeqParams->TargetUsage = 7;
2723             break;
2724         default:
2725             m_hevcSeqParams->TargetUsage = 4;
2726             break;
2727     }
2728 
2729     m_targetUsage = (uint32_t)m_hevcSeqParams->TargetUsage;
2730 
2731     // enable motion adaptive under game streamming scenario for better quality
2732     if (m_hevcSeqParams->ScenarioInfo == ESCENARIO_GAMESTREAMING)
2733     {
2734         m_enableMotionAdaptive = true;
2735     }
2736 
2737     // ACQP is by default disabled, enable it when SSC/QpAdjust required.
2738     if (m_hevcSeqParams->SliceSizeControl == true ||
2739         m_hevcSeqParams->QpAdjustment == true)
2740     {
2741         m_hevcVdencAcqpEnabled = true;
2742     }
2743 
2744     // Get row store cache offset as all the needed information is got here
2745     if (m_vdencInterface->IsRowStoreCachingSupported())
2746     {
2747         MHW_VDBOX_ROWSTORE_PARAMS rowStoreParams;
2748         rowStoreParams.Mode = m_mode;
2749         rowStoreParams.dwPicWidth = m_frameWidth;
2750         rowStoreParams.ucChromaFormat   = m_chromaFormat;
2751         rowStoreParams.ucBitDepthMinus8 = m_hevcSeqParams->bit_depth_luma_minus8;
2752         rowStoreParams.ucLCUSize        = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
2753         // VDEnc only support LCU64 for now
2754         if (rowStoreParams.ucLCUSize != MAX_LCU_SIZE)
2755         {
2756             CODECHAL_ENCODE_ASSERTMESSAGE("HEVC VDEnc only support LCU64 for now.");
2757             return MOS_STATUS_INVALID_PARAMETER;
2758         }
2759         m_hwInterface->SetRowstoreCachingOffsets(&rowStoreParams);
2760     }
2761 
2762 
2763     if (m_hevcSeqParams->VBVBufferSizeInBit < m_hevcSeqParams->InitVBVBufferFullnessInBit)
2764     {
2765         CODECHAL_ENCODE_NORMALMESSAGE(
2766             "VBVBufferSizeInBit is less than InitVBVBufferFullnessInBit, \
2767             min(VBVBufferSizeInBit, InitVBVBufferFullnessInBit) will set to \
2768             hucVdencBrcInitDmem->InitBufFull_U32 and hucVdencBrcUpdateDmem->TARGETSIZE_U32(except Low Delay BRC).\n");
2769     }
2770 
2771     m_lookaheadDepth = m_hevcSeqParams->LookaheadDepth;
2772     m_lookaheadPass  = (m_lookaheadDepth > 0) && (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP);
2773 
2774     if (m_lookaheadPass)
2775     {
2776         if (m_hevcSeqParams->MaxAdaptiveGopPicSize < m_hevcSeqParams->MinAdaptiveGopPicSize)
2777         {
2778             m_hevcSeqParams->MaxAdaptiveGopPicSize = m_hevcSeqParams->MinAdaptiveGopPicSize;
2779         }
2780         else if ((m_hevcSeqParams->MaxAdaptiveGopPicSize > 0) && (m_hevcSeqParams->MinAdaptiveGopPicSize == 0))
2781         {
2782             m_hevcSeqParams->MinAdaptiveGopPicSize = (m_hevcSeqParams->MaxAdaptiveGopPicSize + 1) >> 1;
2783         }
2784 
2785         m_lookaheadAdaptiveI = (m_hevcSeqParams->MaxAdaptiveGopPicSize != m_hevcSeqParams->MinAdaptiveGopPicSize);
2786         if (!m_lookaheadAdaptiveI && (m_hevcSeqParams->MaxAdaptiveGopPicSize == 0))
2787         {
2788             if (m_hevcSeqParams->GopPicSize > 0)
2789             {
2790                 m_hevcSeqParams->MaxAdaptiveGopPicSize = m_hevcSeqParams->GopPicSize;
2791                 m_hevcSeqParams->MinAdaptiveGopPicSize = m_hevcSeqParams->GopPicSize;
2792             }
2793             else
2794             {
2795                 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid GopPicSize in LPLA!");
2796                 return MOS_STATUS_INVALID_PARAMETER;
2797             }
2798         }
2799     }
2800 
2801     if (m_lookaheadDepth > 0)
2802     {
2803         uint64_t targetBitRate = (uint64_t)m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
2804         double frameRate     = (m_hevcSeqParams->FrameRate.Denominator ? (double)m_hevcSeqParams->FrameRate.Numerator / m_hevcSeqParams->FrameRate.Denominator : 30);
2805         if ((frameRate < 1) || (targetBitRate < frameRate) || (targetBitRate > 0xFFFFFFFF))
2806         {
2807             CODECHAL_ENCODE_ASSERTMESSAGE("Invalid FrameRate or TargetBitRate in LPLA!");
2808             return MOS_STATUS_INVALID_PARAMETER;
2809         }
2810 
2811         m_averageFrameSize = (uint32_t)(targetBitRate / frameRate);
2812 
2813         if (m_hevcSeqParams->VBVBufferSizeInBit < m_hevcSeqParams->InitVBVBufferFullnessInBit)
2814         {
2815             CODECHAL_ENCODE_ASSERTMESSAGE("VBVBufferSizeInBit is less than InitVBVBufferFullnessInBit\n");
2816             eStatus = MOS_STATUS_INVALID_PARAMETER;
2817             return eStatus;
2818         }
2819 
2820         if (m_targetBufferFulness == 0 && m_prevTargetFrameSize == 0)
2821         {
2822             m_targetBufferFulness = m_hevcSeqParams->VBVBufferSizeInBit - m_hevcSeqParams->InitVBVBufferFullnessInBit;
2823             if (m_lookaheadPass)
2824             {
2825                 uint32_t initVbvFullnessInFrames = MOS_MIN(m_hevcSeqParams->InitVBVBufferFullnessInBit, m_hevcSeqParams->VBVBufferSizeInBit) / m_averageFrameSize;
2826                 uint32_t vbvBufferSizeInFrames = m_hevcSeqParams->VBVBufferSizeInBit / m_averageFrameSize;
2827                 uint32_t encBufferFullness = (vbvBufferSizeInFrames - initVbvFullnessInFrames) * m_averageFrameSize;
2828                 m_bufferFulnessError = (int32_t)((int64_t)m_targetBufferFulness - (int64_t)encBufferFullness);
2829             }
2830         }
2831     }
2832 
2833     return eStatus;
2834 }
2835 
SetPictureStructs()2836 MOS_STATUS CodechalVdencHevcState::SetPictureStructs()
2837 {
2838     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2839 
2840     CODECHAL_ENCODE_FUNCTION_ENTER;
2841 
2842     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::SetPictureStructs());
2843 
2844     m_virtualEngineBbIndex = m_currOriginalPic.FrameIdx;
2845 
2846     //Enable only for TU1
2847     if (m_hevcSeqParams->TargetUsage != 1)
2848     {
2849         m_hmeEnabled = m_b16XMeEnabled = m_b32XMeEnabled = false;
2850         m_16xMeSupported = false;
2851     }
2852 
2853     // SSC can be satisfied in single VDEnc+PAK pass when required.
2854     // However it is not 100% guaranteed due to delay in HW.
2855     // When it happens, PAK would indicate SSC violation in MMIO register
2856     // and HuC would adjust SSC threshold and triggers another VDEnc+PAK pass.
2857     // SSC requires HuC for all target usages. (allow 1 pass SSC temporarily for testing purpose)
2858     if (m_hevcSeqParams->SliceSizeControl)
2859     {
2860         m_vdencHuCConditional2ndPass = true;
2861     }
2862 
2863     // Weighted Prediction is supported only with VDEnc, only applicable to P/B frames
2864     if (m_hevcPicParams->weighted_pred_flag || m_hevcPicParams->weighted_bipred_flag)
2865     {
2866         // with SAO, needs to increase total number of passes to 3 later (2 for SAO, 1 for WP)
2867         m_hevcVdencWeightedPredEnabled = true;
2868         m_vdencHuCConditional2ndPass = true;
2869 
2870         // Set ACQP enabled if GPU base WP is required.
2871         if(m_hevcPicParams->bEnableGPUWeightedPrediction)
2872         {
2873             m_hevcVdencAcqpEnabled = true;
2874         }
2875     }
2876 
2877     if (m_lookaheadPass)
2878     {
2879         m_vdencHuCConditional2ndPass = m_lookaheadAdaptiveI && m_hevcPicParams->CodingType != I_TYPE;  //conditional 2nd pass for adaptive IDR
2880     }
2881 
2882     if (m_brcEnabled)  // VDEnc BRC supports maximum 2 PAK passes
2883     {
2884         if (m_hevcPicParams->BRCPrecision == 1)  // single-pass BRC, App requirment with first priority
2885         {
2886             m_numPasses = 0;
2887             // There is no need of additional pass for SSC, violation rate could be high but ok
2888         }
2889         else if (m_multipassBrcSupported)   // multi-pass BRC is supported
2890         {
2891             m_numPasses = CODECHAL_VDENC_BRC_NUM_OF_PASSES - 1;
2892             m_vdencHuCConditional2ndPass = true;
2893         }
2894         else
2895         {
2896             m_numPasses = 0;
2897         }
2898 
2899         m_vdencBrcEnabled = true;
2900         m_hevcVdencAcqpEnabled = false;  // when BRC is enabled, ACQP has to be turned off
2901     }
2902     else   // CQP, ACQP
2903     {
2904         m_numPasses = 0;
2905 
2906         // ACQP + SSC, ACQP + WP. CQP + SSC/WP donot need 2nd pass
2907         // driver programs 2nd pass, but it will be decided by conditional batch buffer end cmd to execute 2nd pass
2908         if (m_vdencHuCConditional2ndPass && (m_hevcVdencAcqpEnabled || m_lookaheadPass))
2909         {
2910             m_numPasses += 1;
2911         }
2912     }
2913 
2914     CODECHAL_ENCODE_VERBOSEMESSAGE("m_numPasses = %d",m_numPasses);
2915 
2916     m_vdencHucUsed = m_hevcVdencAcqpEnabled || m_vdencBrcEnabled;
2917 
2918     // VDEnc always needs to enable due to pak fractional QP features
2919     // In VDENC mode, this field "Cu_Qp_Delta_Enabled_Flag" should always be set to 1.
2920     CODECHAL_ENCODE_ASSERT(m_hevcPicParams->cu_qp_delta_enabled_flag == 1);
2921 
2922     // Restriction: If RollingI is enabled, ROI needs to be disabled
2923     if (m_hevcPicParams->bEnableRollingIntraRefresh)
2924     {
2925         m_hevcPicParams->NumROI = 0;
2926     }
2927 
2928     //VDEnc StreamIn enabled if case of ROI (All frames), MBQP(LCUQP), DirtyRect and SHME (ldB frames)
2929     m_vdencStreamInEnabled = (m_vdencEnabled) && (m_hevcPicParams->NumROI ||
2930                                                   m_encodeParams.bMbQpDataEnabled ||
2931                                                   (m_hevcPicParams->NumDirtyRects > 0 && (B_TYPE == m_hevcPicParams->CodingType)) || (m_b16XMeEnabled));
2932 
2933     CODECHAL_ENCODE_CHK_STATUS_RETURN(PrepareVDEncStreamInData());
2934 
2935     if (!m_lookaheadPass)
2936     {
2937         if ((m_lookaheadDepth > 0) && (m_prevTargetFrameSize > 0))
2938         {
2939             int64_t targetBufferFulness = (int64_t)m_targetBufferFulness;
2940             targetBufferFulness += (((int64_t)m_prevTargetFrameSize) << 3) - (int64_t)m_averageFrameSize;
2941             m_targetBufferFulness = targetBufferFulness < 0 ? 0 : (targetBufferFulness > 0xFFFFFFFF ? 0xFFFFFFFF : (uint32_t)targetBufferFulness);
2942         }
2943 
2944         m_prevTargetFrameSize = m_hevcPicParams->TargetFrameSize;
2945     }
2946 
2947     return eStatus;
2948 }
2949 
SetupRegionBoosting(PMOS_RESOURCE vdencStreamIn,uint16_t boostIndex)2950 MOS_STATUS CodechalVdencHevcState::SetupRegionBoosting(PMOS_RESOURCE vdencStreamIn, uint16_t boostIndex)
2951 {
2952     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2953 
2954     CODECHAL_ENCODE_FUNCTION_ENTER;
2955     CODECHAL_ENCODE_CHK_NULL_RETURN(vdencStreamIn);
2956 
2957     MOS_LOCK_PARAMS lockFlags;
2958     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2959     lockFlags.WriteOnly = 1;
2960 
2961     uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(
2962         m_osInterface,
2963         vdencStreamIn,
2964         &lockFlags);
2965     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
2966 
2967     uint32_t streamInWidth = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32);
2968     uint32_t streamInHeight = (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32);
2969     int32_t streamInNumCUs = streamInWidth * streamInHeight;
2970 
2971     MOS_ZeroMemory(data, streamInNumCUs * 64);
2972 
2973     MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
2974 
2975     MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
2976     streaminDataParams.setQpRoiCtrl = true;
2977     uint32_t roiCtrl = 85; // All four 16x16 blocks within the 32x32 blocks share the same region ID 1 (01010101).
2978     for (uint16_t y = 0; y < streamInHeight; y++)
2979     {
2980         if ((y & 7) == boostIndex)
2981         {
2982             for (uint16_t x = 0; x < streamInWidth; x++)
2983             {
2984                 streaminDataParams.roiCtrl = 85;
2985                 SetStreaminDataPerRegion(streamInWidth, y, y+1, x, x+1, &streaminDataParams, data);
2986             }
2987         }
2988     }
2989 
2990     MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
2991     streaminDataParams.maxTuSize = 3;    //Maximum TU Size allowed, restriction to be set to 3
2992     streaminDataParams.maxCuSize = 2;    //For ARB, currently support 32x32 block
2993     switch (m_hevcSeqParams->TargetUsage)
2994     {
2995     case 1:
2996     case 4:
2997         streaminDataParams.numMergeCandidateCu64x64 = 4;
2998         streaminDataParams.numMergeCandidateCu32x32 = 3;
2999         streaminDataParams.numMergeCandidateCu16x16 = 2;
3000         streaminDataParams.numMergeCandidateCu8x8   = 1;
3001         streaminDataParams.numImePredictors         = m_imgStateImePredictors;
3002         break;
3003     case 7:
3004         streaminDataParams.numMergeCandidateCu64x64 = 2;
3005         streaminDataParams.numMergeCandidateCu32x32 = 2;
3006         streaminDataParams.numMergeCandidateCu16x16 = 2;
3007         streaminDataParams.numMergeCandidateCu8x8   = 0;
3008         streaminDataParams.numImePredictors         = 4;
3009         break;
3010     }
3011 
3012     for (auto i = 0; i < streamInNumCUs; i++)
3013     {
3014         SetStreaminDataPerLcu(&streaminDataParams, data + (i * 64));
3015     }
3016 
3017     m_osInterface->pfnUnlockResource(
3018         m_osInterface,
3019         vdencStreamIn);
3020 
3021     return eStatus;
3022 }
3023 
PrepareVDEncStreamInData()3024 MOS_STATUS CodechalVdencHevcState::PrepareVDEncStreamInData()
3025 {
3026     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3027 
3028     CODECHAL_ENCODE_FUNCTION_ENTER;
3029 
3030     if (m_vdencStreamInEnabled && m_encodeParams.bMbQpDataEnabled)
3031     {
3032         if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
3033         {
3034             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupMbQpStreamIn(&m_resVdencStreamInBuffer[m_currRecycledBufIdx]));
3035         }
3036         else if (m_vdencHucUsed)
3037         {
3038             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBRCROIStreamIn(&m_resVdencStreamInBuffer[m_currRecycledBufIdx], &m_vdencDeltaQpBuffer[m_currRecycledBufIdx]));
3039         }
3040     }
3041 
3042 
3043     if (m_brcAdaptiveRegionBoostSupported && m_hevcPicParams->TargetFrameSize && !m_lookaheadDepth)
3044     {
3045         // Adaptive region boost is enabled for TCBRC only
3046         m_brcAdaptiveRegionBoostEnable = true;
3047         m_vdencStreamInEnabled         = true;
3048     }
3049     else
3050     {
3051         m_brcAdaptiveRegionBoostEnable = false;
3052     }
3053 
3054     if (!m_brcAdaptiveRegionBoostEnable && m_vdencStreamInEnabled && m_hevcPicParams->NumROI)
3055     {
3056         ProcessRoiDeltaQp();
3057 
3058         if (m_vdencHucUsed && !m_vdencNativeROIEnabled)
3059         {
3060             //ForceQp ROI in ACQP, BRC mode only
3061             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBRCROIStreamIn(&m_resVdencStreamInBuffer[m_currRecycledBufIdx], &m_vdencDeltaQpBuffer[m_currRecycledBufIdx]));
3062         }
3063         else
3064         {
3065             //Native ROI
3066             CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupROIStreamIn(&(m_resVdencStreamInBuffer[m_currRecycledBufIdx])));
3067         }
3068     }
3069     else if (!m_brcAdaptiveRegionBoostEnable && m_vdencStreamInEnabled && (m_hevcPicParams->NumDirtyRects > 0 && (B_TYPE == m_hevcPicParams->CodingType)))
3070     {
3071         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupDirtyRectStreamIn(&(m_resVdencStreamInBuffer[m_currRecycledBufIdx])));
3072     }
3073 
3074     if (m_brcAdaptiveRegionBoostEnable)
3075     {
3076         uint16_t rowOffset[8] = {0, 3, 5, 2, 7, 4, 1, 6};
3077         uint16_t circularFrameIdx = (m_storeData - 1) & 7;
3078         CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupRegionBoosting(&(m_resVdencStreamInBuffer[m_currRecycledBufIdx]), rowOffset[circularFrameIdx]));
3079     }
3080 
3081     return eStatus;
3082 }
3083 
CalcScaledDimensions()3084 MOS_STATUS CodechalVdencHevcState::CalcScaledDimensions()
3085 {
3086     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3087 
3088     CODECHAL_ENCODE_FUNCTION_ENTER;
3089 
3090     // HME Scaling WxH
3091     m_downscaledWidthInMb4x =
3092         CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / SCALE_FACTOR_4x);
3093     m_downscaledHeightInMb4x =
3094         CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight / SCALE_FACTOR_4x);
3095     m_downscaledWidth4x =
3096         m_downscaledWidthInMb4x * CODECHAL_MACROBLOCK_WIDTH;
3097     m_downscaledHeight4x =
3098         m_downscaledHeightInMb4x * CODECHAL_MACROBLOCK_HEIGHT;
3099 
3100     // SuperHME Scaling WxH
3101     m_downscaledWidthInMb16x =
3102         CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / SCALE_FACTOR_16x);
3103     m_downscaledHeightInMb16x =
3104         CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight / SCALE_FACTOR_16x);
3105     m_downscaledWidth16x =
3106         m_downscaledWidthInMb16x * CODECHAL_MACROBLOCK_WIDTH;
3107     m_downscaledHeight16x =
3108         m_downscaledHeightInMb16x * CODECHAL_MACROBLOCK_HEIGHT;
3109 
3110     return eStatus;
3111 }
3112 
ValidateRefFrameData(PCODEC_HEVC_ENCODE_SLICE_PARAMS slcParams)3113 MOS_STATUS CodechalVdencHevcState::ValidateRefFrameData(PCODEC_HEVC_ENCODE_SLICE_PARAMS slcParams)
3114 {
3115     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3116 
3117     CODECHAL_ENCODE_CHK_NULL_RETURN(slcParams);
3118 
3119     uint8_t maxNumRef0 = m_numMaxVdencL0Ref;
3120     uint8_t maxNumRef1 = m_numMaxVdencL1Ref;
3121 
3122     if (slcParams->num_ref_idx_l0_active_minus1 > maxNumRef0 - 1)
3123     {
3124         CODECHAL_ENCODE_ASSERT(false);
3125         slcParams->num_ref_idx_l0_active_minus1 = maxNumRef0 - 1;
3126     }
3127 
3128     if (slcParams->num_ref_idx_l1_active_minus1 > maxNumRef1 - 1)
3129     {
3130         CODECHAL_ENCODE_ASSERT(false);
3131         slcParams->num_ref_idx_l1_active_minus1 = maxNumRef1 - 1;
3132     }
3133 
3134     // For HEVC VDEnc, L0 and L1 must contain the same (number of) elements. If not, the input slc param is not good for VDEnc.
3135     if (slcParams->num_ref_idx_l0_active_minus1 != slcParams->num_ref_idx_l1_active_minus1)
3136     {
3137         CODECHAL_ENCODE_ASSERT(false);
3138         slcParams->num_ref_idx_l1_active_minus1 = slcParams->num_ref_idx_l0_active_minus1;
3139     }
3140 
3141     for (auto j = 0; j < CODEC_MAX_NUM_REF_FRAME_HEVC; j++)
3142     {
3143         if (slcParams->RefPicList[0][j].PicEntry != slcParams->RefPicList[1][j].PicEntry)
3144         {
3145             CODECHAL_ENCODE_ASSERT(false);
3146             eStatus = MOS_STATUS_INVALID_PARAMETER;
3147             return eStatus;
3148         }
3149     }
3150 
3151     return eStatus;
3152 }
3153 
InitializePicture(const EncoderParams & params)3154 MOS_STATUS CodechalVdencHevcState::InitializePicture(const EncoderParams& params)
3155 {
3156     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3157 
3158     CODECHAL_ENCODE_FUNCTION_ENTER;
3159 
3160     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::InitializePicture(params));
3161 
3162     m_resVdencStatsBuffer = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, vdencStats);
3163     m_resPakStatsBuffer = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, pakStats);
3164     m_resSliceCountBuffer = &m_sliceCountBuffer;
3165     m_resVdencModeTimerBuffer = &m_vdencModeTimerBuffer;
3166 
3167     return eStatus;
3168 }
3169 
UserFeatureKeyReport()3170 MOS_STATUS CodechalVdencHevcState::UserFeatureKeyReport()
3171 {
3172     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3173 
3174     CODECHAL_ENCODE_FUNCTION_ENTER;
3175 
3176     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::UserFeatureKeyReport());
3177 
3178 #if (_DEBUG || _RELEASE_INTERNAL)
3179     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_VDENC_IN_USE_ID, m_vdencEnabled, m_osInterface->pOsContext);
3180     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_ACQP_ENABLE_ID, m_hevcVdencAcqpEnabled, m_osInterface->pOsContext);
3181 #endif
3182 
3183     return eStatus;
3184 }
3185 
GetStatusReport(EncodeStatus * encodeStatus,EncodeStatusReport * encodeStatusReport)3186 MOS_STATUS CodechalVdencHevcState::GetStatusReport(
3187     EncodeStatus *encodeStatus,
3188     EncodeStatusReport *encodeStatusReport)
3189 {
3190     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3191 
3192     CODECHAL_ENCODE_FUNCTION_ENTER;
3193 
3194     // common initilization
3195     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::GetStatusReport(encodeStatus, encodeStatusReport));
3196 
3197     MOS_LOCK_PARAMS lockFlags;
3198     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
3199     lockFlags.ReadOnly = 1;
3200 
3201     uint32_t* sliceSize = nullptr;
3202     // pSliceSize is set/ allocated only when dynamic slice is enabled. Cannot use SSC flag here, as it is an asynchronous call
3203     if (encodeStatus->sliceReport.pSliceSize)
3204     {
3205         sliceSize = (uint32_t*)m_osInterface->pfnLockResource(m_osInterface, encodeStatus->sliceReport.pSliceSize, &lockFlags);
3206         CODECHAL_ENCODE_CHK_NULL_RETURN(sliceSize);
3207 
3208         encodeStatusReport->NumberSlices            = encodeStatus->sliceReport.NumberSlices;
3209         encodeStatusReport->SizeOfSliceSizesBuffer  = sizeof(uint16_t) * encodeStatus->sliceReport.NumberSlices;
3210         encodeStatusReport->SliceSizeOverflow       = (encodeStatus->sliceReport.SliceSizeOverflow >> 16) & 1;
3211         encodeStatusReport->pSliceSizes             = (uint16_t*)sliceSize;
3212 
3213         uint16_t prevCumulativeSliceSize = 0;
3214         // HW writes out a DW for each slice size. Copy in place the DW into 16bit fields expected by App
3215         for (auto sliceCount = 0; sliceCount < encodeStatus->sliceReport.NumberSlices; sliceCount++)
3216         {
3217             // PAK output the sliceSize at 16DW intervals.
3218             CODECHAL_ENCODE_CHK_NULL_RETURN(&sliceSize[sliceCount * 16]);
3219             uint32_t CurrAccumulatedSliceSize           = sliceSize[sliceCount * 16];
3220 
3221             //convert cummulative slice size to individual, first slice may have PPS/SPS,
3222             encodeStatusReport->pSliceSizes[sliceCount] = CurrAccumulatedSliceSize - prevCumulativeSliceSize;
3223             prevCumulativeSliceSize += encodeStatusReport->pSliceSizes[sliceCount];
3224         }
3225         m_osInterface->pfnUnlockResource(m_osInterface, encodeStatus->sliceReport.pSliceSize);
3226     }
3227 
3228     if (m_lookaheadPass && m_lookaheadReport && (encodeStatus->lookaheadStatus.targetFrameSize > 0))
3229     {
3230         encodeStatusReport->pLookaheadStatus = &encodeStatus->lookaheadStatus;
3231         encodeStatus->lookaheadStatus.isValid = 1;
3232         uint64_t targetFrameSize = (uint64_t)encodeStatus->lookaheadStatus.targetFrameSize * m_averageFrameSize;
3233         encodeStatus->lookaheadStatus.targetFrameSize = (uint32_t)((targetFrameSize + (32*8)) / (64*8)); // Convert bits to bytes. 64 is normalized average frame size used in lookahead analysis kernel
3234         uint64_t targetBufferFulness = (uint64_t)encodeStatus->lookaheadStatus.targetBufferFulness * m_averageFrameSize;
3235         encodeStatus->lookaheadStatus.targetBufferFulness = (uint32_t)((targetBufferFulness + 32) / 64); // 64 is normalized average frame size used in lookahead analysis kernel
3236         // Apply rounding error to targetFrameSize to align target buffer fullness between lookahead pass and encode pass
3237         if (m_prevTargetFrameSize > 0)
3238         {
3239             int64_t encTargetBufferFulness = (int64_t)m_targetBufferFulness;
3240             encTargetBufferFulness += (((int64_t)m_prevTargetFrameSize) << 3) - (int64_t)m_averageFrameSize;
3241             m_targetBufferFulness = encTargetBufferFulness < 0 ?
3242                 0 : (encTargetBufferFulness > 0xFFFFFFFF ? 0xFFFFFFFF : (uint32_t)encTargetBufferFulness);
3243             int32_t deltaBits = (int32_t)((int64_t)(encodeStatus->lookaheadStatus.targetBufferFulness) + m_bufferFulnessError - (int64_t)(m_targetBufferFulness));
3244             deltaBits /= 64;
3245             if (deltaBits > 8)
3246             {
3247                 if ((uint32_t)deltaBits > encodeStatus->lookaheadStatus.targetFrameSize)
3248                 {
3249                     deltaBits = (int32_t)(encodeStatus->lookaheadStatus.targetFrameSize);
3250                 }
3251                 encodeStatus->lookaheadStatus.targetFrameSize += (uint32_t)(deltaBits >> 3);
3252             }
3253             else if (deltaBits < -8)
3254             {
3255                 if ((-deltaBits) > (int32_t)(encodeStatus->lookaheadStatus.targetFrameSize))
3256                 {
3257                     deltaBits = -(int32_t)(encodeStatus->lookaheadStatus.targetFrameSize);
3258                 }
3259                 encodeStatus->lookaheadStatus.targetFrameSize -= (uint32_t)((-deltaBits) >> 3);
3260             }
3261         }
3262         m_prevTargetFrameSize = encodeStatus->lookaheadStatus.targetFrameSize;
3263 
3264         if (encodeStatus->lookaheadStatus.cqmHint > 4)
3265         {
3266             // Currently only 0x00 and 0x01 are valid. Report invalid (0xFF) for other values.
3267             encodeStatus->lookaheadStatus.cqmHint = 0xFF;
3268         }
3269 
3270         // ensure CQM hint never exceed the number of PPS
3271         if (encodeStatus->lookaheadStatus.cqmHint <= 2)
3272         {
3273             encodeStatus->lookaheadStatus.cqmHint = 0;
3274         }
3275         else if (encodeStatus->lookaheadStatus.cqmHint <= 4)
3276         {
3277             encodeStatus->lookaheadStatus.cqmHint = 1;
3278         }
3279 
3280         if (encodeStatus->lookaheadStatus.pyramidDeltaQP == 0)
3281         {
3282             encodeStatus->lookaheadStatus.miniGopSize = 1;
3283         }
3284         else if (m_hevcSeqParams->GopRefDist == 1) // LPLA only supports P pyramid for this condition
3285         {
3286             encodeStatus->lookaheadStatus.miniGopSize = 4;
3287         }
3288         else
3289         {
3290             encodeStatus->lookaheadStatus.miniGopSize = m_hevcSeqParams->GopRefDist;
3291         }
3292     }
3293     else
3294     {
3295         encodeStatusReport->pLookaheadStatus = nullptr;
3296         encodeStatus->lookaheadStatus.isValid = 0;
3297         encodeStatus->lookaheadStatus.cqmHint = 0xFF;
3298         encodeStatus->lookaheadStatus.targetFrameSize = 0;
3299         encodeStatus->lookaheadStatus.targetBufferFulness = 0;
3300     }
3301 
3302     return eStatus;
3303 }
3304 
AllocatePakResources()3305 MOS_STATUS CodechalVdencHevcState::AllocatePakResources()
3306 {
3307     CODECHAL_ENCODE_FUNCTION_ENTER;
3308 
3309     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3310 
3311     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::AllocatePakResources());
3312 
3313     MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3314     MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3315     allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3316     allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3317     allocParamsForBufferLinear.Format = Format_Buffer;
3318 
3319     // Allocate Frame Statistics Streamout Data Destination Buffer. DW98-100 in HCP PipeBufAddr command
3320     uint32_t size                       = MOS_ALIGN_CEIL(m_sizeOfHcpPakFrameStats * m_maxTileNumber, CODECHAL_PAGE_SIZE);  //Each tile has 8 cache size bytes of data, Align to page is HuC requirement
3321     allocParamsForBufferLinear.dwBytes = size;
3322     allocParamsForBufferLinear.pBufName = "FrameStatStreamOutBuffer";
3323 
3324     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3325                                                   m_osInterface,
3326                                                   &allocParamsForBufferLinear,
3327                                                   &m_resFrameStatStreamOutBuffer),
3328         "Failed to create VDENC FrameStatStreamOutBuffer Buffer");
3329 
3330     // PAK Statistics buffer
3331     size = MOS_ALIGN_CEIL(m_vdencBrcPakStatsBufferSize, CODECHAL_PAGE_SIZE);
3332     CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
3333         m_standard, size, 1, pakStats, "pakStats"));
3334 
3335     // Slice Count buffer 1 DW = 4 Bytes
3336     allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
3337     allocParamsForBufferLinear.pBufName = "Slice Count Buffer";
3338 
3339     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3340         m_osInterface,
3341         &allocParamsForBufferLinear,
3342         &m_sliceCountBuffer),
3343         "Failed to create VDENC Slice Count Buffer");
3344 
3345     // VDEncMode Timer buffer 1 DW = 4 Bytes
3346     allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
3347     allocParamsForBufferLinear.pBufName = "VDEncMode Timer Buffer";
3348 
3349     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3350         m_osInterface,
3351         &allocParamsForBufferLinear,
3352         &m_vdencModeTimerBuffer),
3353         "Failed to create VDEncMode Timer Buffer");
3354 
3355     return eStatus;
3356 }
3357 
FreePakResources()3358 MOS_STATUS CodechalVdencHevcState::FreePakResources()
3359 {
3360     CODECHAL_ENCODE_FUNCTION_ENTER;
3361 
3362     m_osInterface->pfnFreeResource(m_osInterface, &m_resFrameStatStreamOutBuffer);
3363     m_osInterface->pfnFreeResource(m_osInterface, &m_sliceCountBuffer);
3364     m_osInterface->pfnFreeResource(m_osInterface, &m_vdencModeTimerBuffer);
3365 
3366     for (uint32_t i = 0; i < CODECHAL_ENCODE_STATUS_NUM; i++)
3367     {
3368         if (!Mos_ResourceIsNull(&m_resSliceReport[i]))
3369         {
3370             m_osInterface->pfnFreeResource(m_osInterface, &m_resSliceReport[i]);
3371         }
3372     }
3373 
3374     if (m_swLaMode != nullptr)
3375     {
3376         m_osInterface->pfnFreeLibrary(m_swLaMode);
3377         m_swLaMode = nullptr;
3378     }
3379 
3380     return CodechalEncodeHevcBase::FreePakResources();
3381 }
3382 
AllocateEncResources()3383 MOS_STATUS CodechalVdencHevcState::AllocateEncResources()
3384 {
3385     CODECHAL_ENCODE_FUNCTION_ENTER;
3386 
3387     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3388 
3389     MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3390     MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3391     allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3392     allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3393     allocParamsForBufferLinear.Format = Format_Buffer;
3394 
3395     // PAK stream-out buffer
3396     allocParamsForBufferLinear.dwBytes = CODECHAL_HEVC_PAK_STREAMOUT_SIZE;
3397     allocParamsForBufferLinear.pBufName = "Pak StreamOut Buffer";
3398     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3399         m_osInterface,
3400         &allocParamsForBufferLinear,
3401         &m_resStreamOutBuffer[0]),
3402         "Failed to allocate Pak Stream Out Buffer.");
3403 
3404     // VDENC Intra Row Store Scratch buffer
3405     // 1 cacheline per MB
3406     uint32_t size = m_picWidthInMb * CODECHAL_CACHELINE_SIZE;
3407     CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
3408         m_standard, size, 1, vdencIntraRowStoreScratch, "vdencIntraRowStoreScratch"));
3409 
3410     // VDENC Statistics buffer, only needed for BRC
3411     // The size is 19 CL for each tile, allocated with worst case, optimize later
3412     size = MOS_ALIGN_CEIL(m_vdencBrcStatsBufferSize * m_maxTileNumber, CODECHAL_PAGE_SIZE);
3413     CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
3414         m_standard, size, 1, vdencStats, "vdencStats"));
3415 
3416     if (m_hucCmdInitializer)
3417     {
3418         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucCmdInitializer->CmdInitializerAllocateResources(m_hwInterface));
3419     }
3420 
3421     return eStatus;
3422 }
3423 
FreeEncResources()3424 MOS_STATUS CodechalVdencHevcState::FreeEncResources()
3425 {
3426     CODECHAL_ENCODE_FUNCTION_ENTER;
3427 
3428     // PAK stream-out buffer de-allocated inside CodecHalEncodeReleaseResources()
3429 
3430     if (m_hucCmdInitializer)
3431     {
3432         m_hucCmdInitializer->CmdInitializerFreeResources();
3433     }
3434     MOS_Delete(m_hucCmdInitializer);
3435 
3436     return MOS_STATUS_SUCCESS;
3437 }
3438 
AllocateBrcResources()3439 MOS_STATUS CodechalVdencHevcState::AllocateBrcResources()
3440 {
3441     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3442 
3443     CODECHAL_ENCODE_FUNCTION_ENTER;
3444 
3445     // initiate allocation paramters and lock flags
3446     MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
3447     MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
3448     allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
3449     allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
3450     allocParamsForBufferLinear.Format = Format_Buffer;
3451 
3452     allocParamsForBufferLinear.dwBytes  = m_hevcBrcPakStatisticsSize;
3453     allocParamsForBufferLinear.pBufName = "BRC PAK Statistics Buffer";
3454 
3455     MOS_LOCK_PARAMS lockFlagsWriteOnly;
3456     MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
3457     lockFlagsWriteOnly.WriteOnly = true;
3458 
3459     uint8_t *data = nullptr;
3460     for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
3461     {
3462         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3463             m_osInterface,
3464             &allocParamsForBufferLinear,
3465             &m_vdencBrcBuffers.resBrcPakStatisticBuffer[i]),
3466             "Failed to allocate BRC PAK Statistics Buffer.");
3467 
3468         CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint8_t *)m_osInterface->pfnLockResource(
3469             m_osInterface,
3470             &(m_vdencBrcBuffers.resBrcPakStatisticBuffer[i]),
3471             &lockFlagsWriteOnly));
3472 
3473         MOS_ZeroMemory(data, m_hevcBrcPakStatisticsSize);
3474         m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcBuffers.resBrcPakStatisticBuffer[i]);
3475     }
3476 
3477     // PAK Info buffer
3478     uint32_t size = MOS_ALIGN_CEIL(sizeof(CodechalVdencHevcPakInfo), CODECHAL_PAGE_SIZE);
3479     CODECHAL_ENCODE_CHK_NULL_RETURN(m_allocator->AllocateResource(
3480         m_standard, size, 1, pakInfo, "pakInfo"));
3481 
3482     // HuC FW Region 6: Data Buffer of Current Picture
3483     // Data (1024 bytes) for current
3484     // Data (1024 bytes) for ref0
3485     // Data (1024 bytes) for ref1
3486     // Data (1024 bytes) for ref2
3487     allocParamsForBufferLinear.dwBytes = CODECHAL_PAGE_SIZE * 4;
3488     allocParamsForBufferLinear.pBufName = "Data from Pictures Buffer for Weighted Prediction";
3489 
3490     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3491         m_osInterface,
3492         &allocParamsForBufferLinear,
3493         &m_dataFromPicsBuffer),
3494         "Failed to create Data from Pictures Buffer for Weighted Prediction");
3495 
3496     for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
3497     {
3498         // Delta QP for ROI Buffer
3499         // 1 byte for each 32x32 block, maximum region size is 8192 bytes for 4K/2K resolution, currently the allocation size is fixed
3500         allocParamsForBufferLinear.dwBytes = m_deltaQpRoiBufferSize;
3501         allocParamsForBufferLinear.pBufName = "Delta QP for ROI Buffer";
3502 
3503         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3504             m_osInterface,
3505             &allocParamsForBufferLinear,
3506             &m_vdencDeltaQpBuffer[k]),
3507             "Failed to create Delta QP for ROI Buffer");
3508 
3509         // BRC update DMEM
3510         allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_vdencBrcUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
3511         allocParamsForBufferLinear.pBufName = "VDENC BrcUpdate DmemBuffer";
3512 
3513         for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES_FOR_TILE_REPLAY; i++)
3514         {
3515             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3516                 m_osInterface,
3517                 &allocParamsForBufferLinear,
3518                 &m_vdencBrcUpdateDmemBuffer[k][i]),
3519                 "Failed to create VDENC BrcUpdate DmemBuffer");
3520 
3521             CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint8_t *)m_osInterface->pfnLockResource(
3522                 m_osInterface,
3523                 &m_vdencBrcUpdateDmemBuffer[k][i],
3524                 &lockFlagsWriteOnly));
3525 
3526             MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
3527             m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcUpdateDmemBuffer[k][i]);
3528         }
3529 
3530         // BRC init/reset DMEM
3531         allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_vdencBrcInitDmemBufferSize, CODECHAL_CACHELINE_SIZE);
3532         allocParamsForBufferLinear.pBufName = "VDENC BrcInit DmemBuffer";
3533 
3534         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3535             m_osInterface,
3536             &allocParamsForBufferLinear,
3537             &m_vdencBrcInitDmemBuffer[k]),
3538             "Failed to create VDENC BrcInit DmemBuffer");
3539 
3540         CODECHAL_ENCODE_CHK_NULL_RETURN(data = (uint8_t *)m_osInterface->pfnLockResource(
3541             m_osInterface,
3542             &m_vdencBrcInitDmemBuffer[k],
3543             &lockFlagsWriteOnly));
3544 
3545         MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
3546         m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencBrcInitDmemBuffer[k]);
3547 
3548         // Const Data buffer
3549         allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_vdencBrcConstDataBufferSize, CODECHAL_PAGE_SIZE);
3550         allocParamsForBufferLinear.pBufName = "VDENC BRC Const Data Buffer";
3551 
3552         CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3553             m_osInterface,
3554             &allocParamsForBufferLinear,
3555             &m_vdencBrcConstDataBuffer[k]),
3556             "Failed to create VDENC BRC Const Data Buffer");
3557 
3558         // VDEnc read batch buffer (input for HuC FW)
3559         allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_hwInterface->m_vdencReadBatchBufferSize, CODECHAL_PAGE_SIZE);
3560         allocParamsForBufferLinear.pBufName = "VDENC Read Batch Buffer";
3561 
3562         for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
3563         {
3564             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3565                 m_osInterface,
3566                 &allocParamsForBufferLinear,
3567                 &m_vdencReadBatchBuffer[k][i]),
3568                 "Failed to allocate VDENC Read Batch Buffer");
3569         }
3570 
3571         // Lookahead Update DMEM
3572         allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_vdencLaUpdateDmemBufferSize, CODECHAL_CACHELINE_SIZE);
3573         allocParamsForBufferLinear.pBufName = "VDENC Lookahead update Dmem Buffer";
3574 
3575         for (auto i = 0; i < CODECHAL_LPLA_NUM_OF_PASSES; i++)
3576         {
3577             CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3578                 m_osInterface,
3579                 &allocParamsForBufferLinear,
3580                 &m_vdencLaUpdateDmemBuffer[k][i]),
3581                 "Failed to create VDENC Lookahead Update Dmem Buffer");
3582         }
3583     }
3584 
3585     for (auto j = 0; j < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; j++)
3586     {
3587         // VDENC uses second level batch buffer
3588         MOS_ZeroMemory(&m_vdenc2ndLevelBatchBuffer[j], sizeof(MHW_BATCH_BUFFER));
3589         m_vdenc2ndLevelBatchBuffer[j].bSecondLevel = true;
3590         CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
3591             m_osInterface,
3592             &m_vdenc2ndLevelBatchBuffer[j],
3593             nullptr,
3594             m_hwInterface->m_vdenc2ndLevelBatchBufferSize));
3595     }
3596 
3597     // BRC history buffer
3598     allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_brcHistoryBufSize, CODECHAL_PAGE_SIZE);
3599     allocParamsForBufferLinear.pBufName = "VDENC BRC History Buffer";
3600 
3601     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3602         m_osInterface,
3603         &allocParamsForBufferLinear,
3604         &m_vdencBrcHistoryBuffer),
3605         "Failed to create VDENC BRC History Buffer");
3606 
3607     // Lookahead Init DMEM
3608     allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_vdencLaInitDmemBufferSize, CODECHAL_CACHELINE_SIZE);
3609     allocParamsForBufferLinear.pBufName = "VDENC Lookahead Init DmemBuffer";
3610 
3611     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3612         m_osInterface,
3613         &allocParamsForBufferLinear,
3614         &m_vdencLaInitDmemBuffer),
3615         "Failed to create VDENC Lookahead Init DmemBuffer");
3616 
3617     // Lookahead history buffer
3618     allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_LaHistoryBufSize, CODECHAL_PAGE_SIZE);
3619     allocParamsForBufferLinear.pBufName = "VDENC Lookahead History Buffer";
3620 
3621     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3622         m_osInterface,
3623         &allocParamsForBufferLinear,
3624         &m_vdencLaHistoryBuffer),
3625         "Failed to create VDENC Lookahead History Buffer");
3626 
3627     // Debug buffer
3628     allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_brcDebugBufSize, CODECHAL_PAGE_SIZE);
3629     allocParamsForBufferLinear.pBufName = "VDENC BRC Debug Buffer";
3630 
3631     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3632         m_osInterface,
3633         &allocParamsForBufferLinear,
3634         &m_vdencBrcDbgBuffer),
3635         "Failed to create VDENC BRC Debug Buffer");
3636 
3637     // Output ROI Streamin Buffer
3638     // 16 DWORDs (VDENC_HEVC_VP9_STREAMIN_STATE) for each 32x32 block, maximum region size is 65536 bytes for 8K/8K resolution, currently the allocation size is fixed
3639     allocParamsForBufferLinear.dwBytes = m_roiStreamInBufferSize;
3640     allocParamsForBufferLinear.pBufName = "Output ROI Streamin Buffer";
3641 
3642     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
3643         m_osInterface,
3644         &allocParamsForBufferLinear,
3645         &m_vdencOutputROIStreaminBuffer));
3646 
3647     // Buffer to store VDEnc frame statistics for lookahead BRC
3648     allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_brcLooaheadStatsBufferSize, CODECHAL_PAGE_SIZE);
3649     allocParamsForBufferLinear.pBufName = "VDENC Lookahead Statistics Buffer";
3650 
3651     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3652         m_osInterface,
3653         &allocParamsForBufferLinear,
3654         &m_vdencLaStatsBuffer),
3655         "Failed to create VDENC Lookahead Statistics Buffer");
3656 
3657     CodechalVdencHevcLaStats *lookaheadInfo = (CodechalVdencHevcLaStats *)m_osInterface->pfnLockResource(
3658         m_osInterface,
3659         &m_vdencLaStatsBuffer,
3660         &lockFlagsWriteOnly);
3661     CODECHAL_ENCODE_CHK_NULL_RETURN(lookaheadInfo);
3662     MOS_ZeroMemory(lookaheadInfo, allocParamsForBufferLinear.dwBytes);
3663     m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaStatsBuffer);
3664 
3665     // Buffer to store lookahead output
3666     allocParamsForBufferLinear.dwBytes  = MOS_ALIGN_CEIL(m_brcLooaheadDataBufferSize, CODECHAL_PAGE_SIZE);
3667     allocParamsForBufferLinear.pBufName = "VDENC Lookahead Data Buffer";
3668 
3669     CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
3670         m_osInterface,
3671         &allocParamsForBufferLinear,
3672         &m_vdencLaDataBuffer),
3673         "Failed to create VDENC Lookahead Data Buffer");
3674 
3675     CodechalVdencHevcLaData *lookaheadData = (CodechalVdencHevcLaData *)m_osInterface->pfnLockResource(
3676         m_osInterface,
3677         &m_vdencLaDataBuffer,
3678         &lockFlagsWriteOnly);
3679     CODECHAL_ENCODE_CHK_NULL_RETURN(lookaheadData);
3680     MOS_ZeroMemory(lookaheadData, allocParamsForBufferLinear.dwBytes);
3681     m_osInterface->pfnUnlockResource(m_osInterface, &m_vdencLaDataBuffer);
3682 
3683     return eStatus;
3684 }
3685 
FreeBrcResources()3686 MOS_STATUS CodechalVdencHevcState::FreeBrcResources()
3687 {
3688     CODECHAL_ENCODE_FUNCTION_ENTER;
3689 
3690     if (m_swBrcMode != nullptr)
3691     {
3692         m_osInterface->pfnFreeLibrary(m_swBrcMode);
3693         m_swBrcMode = nullptr;
3694     }
3695 
3696     for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
3697     {
3698         m_osInterface->pfnFreeResource(
3699             m_osInterface,
3700             &m_vdencBrcBuffers.resBrcPakStatisticBuffer[i]);
3701     }
3702 
3703     m_osInterface->pfnFreeResource(m_osInterface, &m_dataFromPicsBuffer);
3704 
3705     for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
3706     {
3707         m_osInterface->pfnFreeResource(m_osInterface, &m_vdencDeltaQpBuffer[k]);
3708 
3709         for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES; i++)
3710         {
3711             m_osInterface->pfnFreeResource(m_osInterface, &m_vdencReadBatchBuffer[k][i]);
3712         }
3713 
3714         for (auto i = 0; i < CODECHAL_VDENC_BRC_NUM_OF_PASSES_FOR_TILE_REPLAY; i++)
3715         {
3716             m_osInterface->pfnFreeResource(m_osInterface, &m_vdencBrcUpdateDmemBuffer[k][i]);
3717         }
3718 
3719         m_osInterface->pfnFreeResource(m_osInterface, &m_vdencBrcInitDmemBuffer[k]);
3720         m_osInterface->pfnFreeResource(m_osInterface, &m_vdencBrcConstDataBuffer[k]);
3721 
3722         for (auto i = 0; i < CODECHAL_LPLA_NUM_OF_PASSES; i++)
3723         {
3724             m_osInterface->pfnFreeResource(m_osInterface, &m_vdencLaUpdateDmemBuffer[k][i]);
3725         }
3726     }
3727 
3728     for (auto j = 0; j < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; j++)
3729     {
3730         Mhw_FreeBb(m_osInterface, &m_vdenc2ndLevelBatchBuffer[j], nullptr);
3731     }
3732 
3733     m_osInterface->pfnFreeResource(m_osInterface, &m_vdencBrcHistoryBuffer);
3734     m_osInterface->pfnFreeResource(m_osInterface, &m_vdencBrcDbgBuffer);
3735     m_osInterface->pfnFreeResource(m_osInterface, &m_vdencOutputROIStreaminBuffer);
3736     m_osInterface->pfnFreeResource(m_osInterface, &m_vdencLaStatsBuffer);
3737     m_osInterface->pfnFreeResource(m_osInterface, &m_vdencLaDataBuffer);
3738     m_osInterface->pfnFreeResource(m_osInterface, &m_vdencLaInitDmemBuffer);
3739     m_osInterface->pfnFreeResource(m_osInterface, &m_vdencLaHistoryBuffer);
3740 
3741     return MOS_STATUS_SUCCESS;
3742 }
3743 
Initialize(CodechalSetting * settings)3744 MOS_STATUS CodechalVdencHevcState::Initialize(CodechalSetting * settings)
3745 {
3746     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3747 
3748     CODECHAL_ENCODE_FUNCTION_ENTER;
3749 
3750     // common initilization
3751     CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeHevcBase::Initialize(settings));
3752 
3753     m_vdencBrcBuffers.uiCurrBrcPakStasIdxForRead = 0;
3754     //Reading buffer is with 2 frames late for BRC kernel uses the PAK statstic info of the frame before the previous frame
3755     m_vdencBrcBuffers.uiCurrBrcPakStasIdxForWrite =
3756         (m_vdencBrcBuffers.uiCurrBrcPakStasIdxForRead + 2) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM;
3757 
3758     uint32_t vdencPictureStatesSize = 0, vdencPicturePatchListSize = 0;
3759     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetVdencStateCommandsDataSize(
3760         CODECHAL_ENCODE_MODE_HEVC,
3761         &vdencPictureStatesSize,
3762         &vdencPicturePatchListSize));
3763 
3764     //the following code used to calculate ulMBCodeSize:
3765     //pakObjCmdStreamOutDataSize = 2*BYTES_PER_DWORD*(numOfLcu*NUM_PAK_DWS_PER_LCU + numOfLcu*maxNumOfCUperLCU*NUM_DWS_PER_CU); // Multiply by 2 for sideband
3766     //const uint32_t maxNumOfCUperLCU = (64/8)*(64/8);
3767     // NUM_PAK_DWS_PER_LCU 5
3768     // NUM_DWS_PER_CU 8
3769     uint32_t numOfLCU = MOS_ROUNDUP_DIVIDE(m_frameWidth, MAX_LCU_SIZE) * MOS_ROUNDUP_DIVIDE(m_frameHeight, MAX_LCU_SIZE);
3770     m_mbCodeSize = MOS_ALIGN_CEIL(2 * sizeof(uint32_t) * numOfLCU * (5 + 64 * 8), CODECHAL_PAGE_SIZE);
3771 
3772     m_defaultPictureStatesSize += vdencPictureStatesSize;
3773     m_defaultPicturePatchListSize += vdencPicturePatchListSize;
3774     m_extraPictureStatesSize += m_hwInterface->m_hucCommandBufferSize;  // For slice size reporting, add the HuC copy commands
3775 
3776     MOS_USER_FEATURE_VALUE_DATA userFeatureData;
3777     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3778     MOS_UserFeature_ReadValue_ID(
3779         nullptr,
3780         __MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID,
3781         &userFeatureData,
3782         m_osInterface->pOsContext);
3783     m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false;
3784 
3785     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3786     MOS_UserFeature_ReadValue_ID(
3787         nullptr,
3788         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_RDOQ_ENABLE_ID,
3789         &userFeatureData,
3790         m_osInterface->pOsContext);
3791     m_hevcRdoqEnabled = userFeatureData.i32Data ? true : false;
3792 
3793     // Multi-Pass BRC
3794     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3795     MOS_UserFeature_ReadValue_ID(
3796         nullptr,
3797         __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_MULTIPASS_BRC_ENABLE_ID,
3798         &userFeatureData,
3799         m_osInterface->pOsContext);
3800     m_multipassBrcSupported = (userFeatureData.i32Data) ? true : false;
3801 
3802     if (m_codecFunction != CODECHAL_FUNCTION_PAK)
3803     {
3804         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3805         userFeatureData.i32Data = 1;
3806         userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
3807         MOS_UserFeature_ReadValue_ID(
3808             nullptr,
3809             __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ME_ENABLE_ID,
3810             &userFeatureData,
3811             m_osInterface->pOsContext);
3812         m_hmeSupported = (userFeatureData.i32Data) ? true : false;
3813 
3814         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3815         userFeatureData.i32Data = 1;
3816         userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
3817         MOS_UserFeature_ReadValue_ID(
3818             nullptr,
3819             __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_16xME_ENABLE_ID,
3820             &userFeatureData,
3821             m_osInterface->pOsContext);
3822         m_16xMeSupported = (userFeatureData.i32Data) ? true : false;
3823     }
3824 
3825     if (m_codecFunction == CODECHAL_FUNCTION_ENC_VDENC_PAK)
3826     {
3827         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3828         MOS_UserFeature_ReadValue_ID(
3829             nullptr,
3830             __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_ACQP_ENABLE_ID,
3831             &userFeatureData,
3832             m_osInterface->pOsContext);
3833         m_hevcVdencAcqpEnabled = userFeatureData.i32Data ? true : false;
3834 
3835         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3836         MOS_UserFeature_ReadValue_ID(
3837             nullptr,
3838             __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_VQI_ENABLE_ID,
3839             &userFeatureData,
3840             m_osInterface->pOsContext);
3841         m_hevcVisualQualityImprovement = userFeatureData.i32Data ? true : false;
3842 
3843         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3844         MOS_UserFeature_ReadValue_ID(
3845             nullptr,
3846             __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_ROUNDING_ENABLE_ID,
3847             &userFeatureData,
3848             m_osInterface->pOsContext);
3849         m_hevcVdencRoundingEnabled = userFeatureData.i32Data ? true : false;
3850 
3851         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3852         MOS_UserFeature_ReadValue_ID(
3853             nullptr,
3854             __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_PAKOBJCMD_STREAMOUT_ENABLE_ID,
3855             &userFeatureData,
3856             m_osInterface->pOsContext);
3857         m_vdencPakObjCmdStreamOutEnabled = userFeatureData.i32Data ? true : false;
3858 
3859 #if (_DEBUG || _RELEASE_INTERNAL)
3860         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3861         MOS_UserFeature_ReadValue_ID(
3862             nullptr,
3863             __MEDIA_USER_FEATURE_VALUE_ENCODE_CQM_QP_THRESHOLD_ID,
3864             &userFeatureData,
3865             m_osInterface->pOsContext);
3866         m_cqmQpThreshold = (uint8_t)userFeatureData.u32Data;
3867 #endif
3868     }
3869 
3870     m_minScaledDimension = CODECHAL_ENCODE_MIN_SCALED_SURFACE_SIZE;
3871     m_minScaledDimensionInMb = (CODECHAL_ENCODE_MIN_SCALED_SURFACE_SIZE + 15) >> 4;
3872 
3873     if (m_frameWidth < 128 || m_frameHeight < 128)
3874     {
3875         m_16xMeSupported = false;
3876         m_32xMeSupported = false;
3877     }
3878 
3879     else if (m_frameWidth < 512 || m_frameHeight < 512)
3880     {
3881         m_16xMeSupported = true;
3882         m_32xMeSupported = false;
3883     }
3884 
3885     else
3886     {
3887         m_16xMeSupported = true;
3888         m_32xMeSupported = true;
3889     }
3890 
3891     if (m_16xMeSupported)
3892     {
3893         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3894         MOS_UserFeature_ReadValue_ID(
3895             nullptr,
3896             __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_16xME_ENABLE_ID,
3897             &userFeatureData,
3898             m_osInterface->pOsContext);
3899         m_16xMeSupported = (userFeatureData.i32Data) ? true : false;
3900     }
3901 
3902     if (m_32xMeSupported)
3903     {
3904         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3905         MOS_UserFeature_ReadValue_ID(
3906             nullptr,
3907             __MEDIA_USER_FEATURE_VALUE_HEVC_VDENC_32xME_ENABLE_ID,
3908             &userFeatureData,
3909             m_osInterface->pOsContext);
3910         m_32xMeSupported = (userFeatureData.i32Data) ? true : false;
3911     }
3912 
3913     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3914     MOS_UserFeature_ReadValue_ID(
3915         nullptr,
3916         __MEDIA_USER_FEATURE_VALUE_ENCODE_LA_SOFTWARE_ID,
3917         &userFeatureData,
3918         m_osInterface->pOsContext);
3919 
3920     if (userFeatureData.i32Data)
3921     {
3922         MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
3923         char path_buffer[256];
3924         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
3925         MOS_ZeroMemory(path_buffer, 256);
3926         userFeatureData.StringData.pStringData = path_buffer;
3927 
3928         statusKey = MOS_UserFeature_ReadValue_ID(
3929             nullptr,
3930             __MEDIA_USER_FEATURE_VALUE_ENCODE_LA_SOFTWARE_PATH_ID,
3931             &userFeatureData,
3932             m_osInterface->pOsContext);
3933 
3934         if (statusKey == MOS_STATUS_SUCCESS && userFeatureData.StringData.uSize > 0)
3935         {
3936             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnLoadLibrary(m_osInterface, path_buffer, &m_swLaMode));
3937         }
3938     }
3939 
3940     // SW LA DLL Reporting
3941     CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENCODE_LA_SOFTWARE_IN_USE_ID, (m_swLaMode == nullptr) ? false : true, m_osInterface->pOsContext);
3942 
3943     return eStatus;
3944 }
3945 
CodechalVdencHevcState(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)3946 CodechalVdencHevcState::CodechalVdencHevcState(
3947     CodechalHwInterface* hwInterface,
3948     CodechalDebugInterface* debugInterface,
3949     PCODECHAL_STANDARD_INFO standardInfo)
3950     :CodechalEncodeHevcBase(hwInterface, debugInterface, standardInfo)
3951 {
3952     m_fieldScalingOutputInterleaved = false;
3953     m_2xMeSupported = false;
3954     m_combinedDownScaleAndDepthConversion = false;
3955     m_vdencBrcStatsBufferSize = m_brcStatsBufSize;
3956     m_vdencBrcPakStatsBufferSize = m_brcPakStatsBufSize;
3957     m_vdencLaInitDmemBufferSize = sizeof(CodechalVdencHevcLaDmem);
3958     m_vdencLaUpdateDmemBufferSize = sizeof(CodechalVdencHevcLaDmem);
3959 
3960     MOS_ZeroMemory(&m_sliceCountBuffer, sizeof(m_sliceCountBuffer));
3961     MOS_ZeroMemory(&m_vdencModeTimerBuffer, sizeof(m_vdencModeTimerBuffer));
3962 
3963     MOS_ZeroMemory(&m_vdencBrcBuffers, sizeof(m_vdencBrcBuffers));
3964     MOS_ZeroMemory(&m_dataFromPicsBuffer, sizeof(m_dataFromPicsBuffer));
3965     MOS_ZeroMemory(&m_vdencDeltaQpBuffer, sizeof(m_vdencDeltaQpBuffer));
3966     MOS_ZeroMemory(&m_vdencOutputROIStreaminBuffer, sizeof(m_vdencOutputROIStreaminBuffer));
3967     MOS_ZeroMemory(m_vdencBrcUpdateDmemBuffer, sizeof(m_vdencBrcUpdateDmemBuffer));
3968     MOS_ZeroMemory(&m_vdencBrcInitDmemBuffer, sizeof(m_vdencBrcInitDmemBuffer));
3969     MOS_ZeroMemory(&m_vdencBrcConstDataBuffer, sizeof(m_vdencBrcConstDataBuffer));
3970     MOS_ZeroMemory(&m_vdencBrcHistoryBuffer, sizeof(m_vdencBrcHistoryBuffer));
3971     MOS_ZeroMemory(&m_vdencReadBatchBuffer, sizeof(m_vdencReadBatchBuffer));
3972     MOS_ZeroMemory(&m_vdencReadBatchBuffer, sizeof(m_vdencGroup3BatchBuffer));
3973     MOS_ZeroMemory(&m_vdencBrcDbgBuffer, sizeof(m_vdencBrcDbgBuffer));
3974     MOS_ZeroMemory(&m_vdenc2ndLevelBatchBuffer, sizeof(m_vdenc2ndLevelBatchBuffer));
3975     MOS_ZeroMemory(m_resSliceReport, sizeof(m_resSliceReport));
3976     MOS_ZeroMemory(&m_vdencLaStatsBuffer, sizeof(m_vdencLaStatsBuffer));
3977     MOS_ZeroMemory(&m_vdencLaDataBuffer, sizeof(m_vdencLaDataBuffer));
3978 
3979 }
3980 
StoreHucErrorStatus(MmioRegistersHuc * mmioRegisters,PMOS_COMMAND_BUFFER cmdBuffer,bool addToEncodeStatus)3981 MOS_STATUS CodechalVdencHevcState::StoreHucErrorStatus(MmioRegistersHuc* mmioRegisters, PMOS_COMMAND_BUFFER cmdBuffer, bool addToEncodeStatus)
3982 {
3983     // Write Huc Error Flag mask: DW1 (mask value)
3984     MHW_MI_STORE_DATA_PARAMS storeDataParams;
3985     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
3986     storeDataParams.pOsResource = &m_resHucErrorStatusBuffer;
3987     storeDataParams.dwResourceOffset = sizeof(uint32_t);
3988     storeDataParams.dwValue = CODECHAL_VDENC_HEVC_BRC_HUC_STATUS_MEMORY_ACCESS_ERROR_MASK;
3989     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
3990 
3991     // store HUC_STATUS register: DW0 (actual value)
3992     MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
3993     MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
3994     storeRegParams.presStoreBuffer = &m_resHucErrorStatusBuffer;
3995     storeRegParams.dwOffset = 0;
3996     storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
3997     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams));
3998 
3999     if(addToEncodeStatus)
4000     {
4001         EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf;
4002 
4003         uint32_t baseOffset =
4004             (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2;  // pEncodeStatus is offset by 2 DWs in the resource
4005 
4006         // store HUC_STATUS register
4007         MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
4008         MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
4009         storeRegParams.presStoreBuffer = &encodeStatusBuf.resStatusBuffer;
4010         storeRegParams.dwOffset = baseOffset + encodeStatusBuf.dwHuCStatusRegOffset;
4011         storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset;
4012         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(
4013             cmdBuffer,
4014             &storeRegParams));
4015     }
4016 
4017     return MOS_STATUS_SUCCESS;
4018 }
4019 
PrepareHWMetaData(PMOS_COMMAND_BUFFER cmdBuffer)4020 MOS_STATUS CodechalVdencHevcState::PrepareHWMetaData(PMOS_COMMAND_BUFFER cmdBuffer)
4021 {
4022     CODECHAL_ENCODE_FUNCTION_ENTER;
4023     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4024 
4025     if (!m_presMetadataBuffer)
4026     {
4027         return MOS_STATUS_SUCCESS;
4028     }
4029 
4030     // Intra/Inter/Skip CU Cnt /SubregionSliceSizeCalc
4031     auto xCalAtomic = [&](PMOS_RESOURCE presDst, uint32_t dstOffset, PMOS_RESOURCE presSrc, uint32_t srcOffset, MHW_COMMON_MI_ATOMIC_OPCODE opCode) {
4032         auto                            mmioRegistersMfx = m_mfxInterface->GetMmioRegisters(m_vdboxIndex);
4033         MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams;
4034         MHW_MI_FLUSH_DW_PARAMS          flushDwParams;
4035         MHW_MI_ATOMIC_PARAMS            atomicParams;
4036 
4037         MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams));
4038         MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
4039         MOS_ZeroMemory(&atomicParams, sizeof(atomicParams));
4040 
4041         miLoadRegMemParams.presStoreBuffer = presSrc;
4042         miLoadRegMemParams.dwOffset        = srcOffset;
4043         miLoadRegMemParams.dwRegister      = mmioRegistersMfx->generalPurposeRegister0LoOffset;
4044         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(cmdBuffer, &miLoadRegMemParams));
4045 
4046         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
4047 
4048         atomicParams.pOsResource      = presDst;
4049         atomicParams.dwResourceOffset = dstOffset;
4050         atomicParams.dwDataSize       = sizeof(uint32_t);
4051         atomicParams.Operation        = opCode;
4052         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd(cmdBuffer, &atomicParams));
4053 
4054         return MOS_STATUS_SUCCESS;
4055     };
4056 
4057     MHW_MI_STORE_DATA_PARAMS storeDataParams;
4058     MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
4059     storeDataParams.pOsResource         = m_presMetadataBuffer;
4060     storeDataParams.dwResourceOffset    = m_metaDataOffset.dwEncodeErrorFlags;
4061     storeDataParams.dwValue             = 0;  // No error
4062     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
4063 
4064     storeDataParams.dwResourceOffset    = m_metaDataOffset.dwWrittenSubregionsCount;
4065     storeDataParams.dwValue             = m_numSlices;
4066     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
4067 
4068     MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams;
4069     MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams));
4070     for (uint16_t slcCount = 0; slcCount < m_numSlices; slcCount++)
4071     {
4072         uint32_t subRegionSartOffset = m_metaDataOffset.dwMetaDataSize + slcCount*m_metaDataOffset.dwMetaDataSubRegionSize;
4073 
4074         storeDataParams.dwResourceOffset    = subRegionSartOffset + m_metaDataOffset.dwbStartOffset;
4075         storeDataParams.dwValue             = 0;
4076         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
4077 
4078         storeDataParams.dwResourceOffset    = subRegionSartOffset + m_metaDataOffset.dwbHeaderSize;
4079         storeDataParams.dwValue             = m_slcData[slcCount].BitSize;
4080         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
4081 
4082         miCpyMemMemParams.presSrc           = &m_resLcuBaseAddressBuffer;
4083         miCpyMemMemParams.presDst           = m_presMetadataBuffer;
4084         miCpyMemMemParams.dwSrcOffset       = slcCount * 16 * sizeof(uint32_t); //slice size offset in resLcuBaseAddressBuffer is 16DW
4085         miCpyMemMemParams.dwDstOffset       = subRegionSartOffset + m_metaDataOffset.dwbSize;
4086         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer,&miCpyMemMemParams));
4087         if (slcCount)
4088         {
4089             CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, subRegionSartOffset + m_metaDataOffset.dwbSize,
4090                 &m_resLcuBaseAddressBuffer, (slcCount - 1) * 16 * sizeof(uint32_t), MHW_MI_ATOMIC_SUB));
4091         }
4092     }
4093     auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
4094     CODECHAL_ENCODE_CHK_NULL_RETURN(mmioRegisters);
4095     MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams;
4096     MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams));
4097     miStoreRegMemParams.presStoreBuffer = m_presMetadataBuffer;
4098     miStoreRegMemParams.dwOffset = m_metaDataOffset.dwEncodedBitstreamWrittenBytesCount;
4099     miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset;
4100     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &miStoreRegMemParams));
4101 
4102     // Statistics
4103     // Average QP
4104     if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
4105     {
4106         storeDataParams.dwResourceOffset = m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwAverageQP;
4107         storeDataParams.dwValue = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
4108         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
4109     }
4110     else
4111     {
4112         miCpyMemMemParams.presSrc           = &m_vdenc2ndLevelBatchBuffer[m_currRecycledBufIdx].OsResource;
4113         miCpyMemMemParams.dwSrcOffset       = 0x6F * sizeof(uint32_t);
4114         miCpyMemMemParams.presDst           = m_presMetadataBuffer;
4115         miCpyMemMemParams.dwDstOffset       = m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwAverageQP;
4116         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer,&miCpyMemMemParams));
4117 
4118         MHW_MI_ATOMIC_PARAMS atomicParams;
4119         MOS_ZeroMemory((&atomicParams), sizeof(atomicParams));
4120         atomicParams.pOsResource            = m_presMetadataBuffer;
4121         atomicParams.dwResourceOffset       = m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwAverageQP;
4122         atomicParams.dwDataSize             = sizeof(uint32_t);
4123         atomicParams.Operation              = MHW_MI_ATOMIC_AND;
4124         atomicParams.bInlineData            = true;
4125         atomicParams.dwOperand1Data[0]      = 0xFF;
4126         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiAtomicCmd(cmdBuffer, &atomicParams));
4127     }
4128 
4129     // LCUSkipIn8x8Unit
4130     miCpyMemMemParams.presSrc           = &m_resFrameStatStreamOutBuffer;
4131     miCpyMemMemParams.dwSrcOffset       = 7 * sizeof(uint32_t);
4132     miCpyMemMemParams.presDst           = m_presMetadataBuffer;
4133     miCpyMemMemParams.dwDstOffset       = m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwSkipCodingUnitsCount;
4134     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer,&miCpyMemMemParams));
4135     CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwSkipCodingUnitsCount,
4136         &m_resFrameStatStreamOutBuffer, 7 * sizeof(uint32_t), MHW_MI_ATOMIC_ADD));
4137     CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwSkipCodingUnitsCount,
4138         &m_resFrameStatStreamOutBuffer, 7 * sizeof(uint32_t), MHW_MI_ATOMIC_ADD));
4139     CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwSkipCodingUnitsCount,
4140         &m_resFrameStatStreamOutBuffer, 7 * sizeof(uint32_t), MHW_MI_ATOMIC_ADD));
4141 
4142     // NumCU_IntraDC, NumCU_IntraPlanar, NumCU_IntraAngular
4143     miCpyMemMemParams.presSrc           = &m_resFrameStatStreamOutBuffer;
4144     miCpyMemMemParams.dwSrcOffset       = 20 * sizeof(uint32_t);
4145     miCpyMemMemParams.dwDstOffset       = m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwIntraCodingUnitsCount;
4146     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer,&miCpyMemMemParams));
4147     CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwIntraCodingUnitsCount,
4148         &m_resFrameStatStreamOutBuffer, 21 * sizeof(uint32_t), MHW_MI_ATOMIC_ADD));
4149     CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwIntraCodingUnitsCount,
4150         &m_resFrameStatStreamOutBuffer, 22 * sizeof(uint32_t), MHW_MI_ATOMIC_ADD));
4151 
4152     //NumCU_Merge (LCUSkipIn8x8Unit), NumCU_MVdirL0, NumCU_MVdirL1, NumCU_MVdirBi
4153     miCpyMemMemParams.presSrc           = &m_resFrameStatStreamOutBuffer;
4154     miCpyMemMemParams.dwSrcOffset       = 27 * sizeof(uint32_t);
4155     miCpyMemMemParams.dwDstOffset       = m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwInterCodingUnitsCount;
4156     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer,&miCpyMemMemParams));
4157     CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwInterCodingUnitsCount,
4158         &m_resFrameStatStreamOutBuffer, 28 * sizeof(uint32_t), MHW_MI_ATOMIC_ADD));
4159     CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwInterCodingUnitsCount,
4160         &m_resFrameStatStreamOutBuffer, 29 * sizeof(uint32_t), MHW_MI_ATOMIC_ADD));
4161     CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwInterCodingUnitsCount,
4162         &m_resFrameStatStreamOutBuffer, 30 * sizeof(uint32_t), MHW_MI_ATOMIC_ADD));
4163     CODECHAL_ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwInterCodingUnitsCount,
4164         m_presMetadataBuffer, m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwSkipCodingUnitsCount, MHW_MI_ATOMIC_SUB));
4165 
4166     // Average MV_X/MV_Y, report (0,0) as temp solution, later may need kernel involved
4167     storeDataParams.dwResourceOffset = m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwAverageMotionEstimationXDirection;
4168     storeDataParams.dwValue          = 0;
4169     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
4170 
4171     storeDataParams.dwResourceOffset = m_metaDataOffset.dwEncodeStats + m_metaDataOffset.dwAverageMotionEstimationYDirection;
4172     storeDataParams.dwValue          = 0;
4173     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams));
4174 
4175     return eStatus;
4176 }
4177 
SetupForceIntraStreamIn(PMOS_RESOURCE streamIn)4178 MOS_STATUS CodechalVdencHevcState::SetupForceIntraStreamIn(PMOS_RESOURCE streamIn)
4179 {
4180     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4181 
4182     CODECHAL_ENCODE_FUNCTION_ENTER;
4183 
4184     CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
4185 
4186     MOS_LOCK_PARAMS lockFlags;
4187     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
4188     lockFlags.WriteOnly = true;
4189 
4190     uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
4191         m_osInterface,
4192         streamIn,
4193         &lockFlags);
4194     CODECHAL_ENCODE_CHK_NULL_RETURN(data);
4195 
4196     MHW_VDBOX_VDENC_STREAMIN_STATE_PARAMS streaminDataParams;
4197     uint32_t streamInWidth  = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32);
4198     uint32_t streamInHeight = (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32);
4199 
4200     if (m_lookaheadPass)
4201     {
4202         // lookahead pass should lower QP by 2 to encode force intra frame.
4203         MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
4204         streaminDataParams.setQpRoiCtrl = true;
4205         streaminDataParams.forceQp[0] = m_hevcPicParams->QpY - 2;
4206         streaminDataParams.forceQp[1] = m_hevcPicParams->QpY - 2;
4207         streaminDataParams.forceQp[2] = m_hevcPicParams->QpY - 2;
4208         streaminDataParams.forceQp[3] = m_hevcPicParams->QpY - 2;
4209         SetStreaminDataPerRegion(streamInWidth, 0, streamInHeight, 0, streamInWidth, &streaminDataParams, data);
4210     }
4211 
4212     MOS_ZeroMemory(&streaminDataParams, sizeof(streaminDataParams));
4213     streaminDataParams.puTypeCtrl = 1;  //force intra
4214     streaminDataParams.maxTuSize = 3;
4215     streaminDataParams.maxCuSize = 3;
4216     switch (m_hevcSeqParams->TargetUsage)
4217     {
4218     case 1:
4219     case 4:
4220         streaminDataParams.numMergeCandidateCu64x64 = 4;
4221         streaminDataParams.numMergeCandidateCu32x32 = 3;
4222         streaminDataParams.numMergeCandidateCu16x16 = 2;
4223         streaminDataParams.numMergeCandidateCu8x8   = 1;
4224         streaminDataParams.numImePredictors         = m_imgStateImePredictors;
4225         break;
4226     case 7:
4227         streaminDataParams.numMergeCandidateCu64x64 = 2;
4228         streaminDataParams.numMergeCandidateCu32x32 = 2;
4229         streaminDataParams.numMergeCandidateCu16x16 = 2;
4230         streaminDataParams.numMergeCandidateCu8x8   = 0;
4231         streaminDataParams.numImePredictors         = 4;
4232         break;
4233     }
4234 
4235     uint32_t streamInNumCUs = streamInWidth * streamInHeight;
4236     for (uint32_t i = 0; i < streamInNumCUs; i++)
4237     {
4238         SetStreaminDataPerLcu(&streaminDataParams, data + (i * 64));
4239     }
4240 
4241     m_osInterface->pfnUnlockResource(
4242         m_osInterface,
4243         streamIn);
4244 
4245     return eStatus;
4246 }
4247 
4248 #if USE_CODECHAL_DEBUG_TOOL
DumpHucBrcInit()4249 MOS_STATUS CodechalVdencHevcState::DumpHucBrcInit()
4250 {
4251     CODECHAL_ENCODE_FUNCTION_ENTER;
4252     int32_t currentPass = GetCurrentPass();
4253     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
4254         &m_vdencBrcInitDmemBuffer[m_currRecycledBufIdx],
4255         m_vdencBrcInitDmemBufferSize,
4256         currentPass,
4257         hucRegionDumpInit));
4258 
4259     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4260         &m_vdencBrcHistoryBuffer,
4261         0,
4262         CODECHAL_VDENC_HEVC_BRC_HISTORY_BUF_SIZE,
4263         0,
4264         "_History",
4265         true,
4266         currentPass,
4267         hucRegionDumpInit));
4268     return MOS_STATUS_SUCCESS;
4269 }
4270 
DumpHucBrcUpdate(bool isInput)4271 MOS_STATUS CodechalVdencHevcState::DumpHucBrcUpdate(bool isInput)
4272 {
4273     CODECHAL_ENCODE_FUNCTION_ENTER;
4274     int32_t currentPass = GetCurrentPass();
4275     if (isInput)
4276     {
4277         //Dump HucBrcUpdate input buffers
4278         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
4279             &m_vdencBrcUpdateDmemBuffer[m_currRecycledBufIdx][currentPass],
4280             m_vdencBrcUpdateDmemBufferSize,
4281             currentPass,
4282             hucRegionDumpUpdate));
4283 
4284         // Region 1 - VDENC Statistics Buffer dump
4285         auto vdencStatusBuffer = m_virtualAddrParams.regionParams[1].presRegion;
4286         auto vdencStatusOffset = m_virtualAddrParams.regionParams[1].dwOffset;
4287         if (vdencStatusBuffer)
4288         {
4289             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4290                 vdencStatusBuffer,
4291                 vdencStatusOffset,
4292                 m_vdencBrcStatsBufferSize,
4293                 1,
4294                 "_VdencStats",
4295                 true,
4296                 currentPass,
4297                 hucRegionDumpUpdate));
4298         }
4299 
4300         // Region 2 - PAK Statistics Buffer dump
4301         auto frameStatStreamOutBuffer = m_virtualAddrParams.regionParams[2].presRegion;
4302         auto frameStatStreamOutOffset = m_virtualAddrParams.regionParams[2].dwOffset;
4303         if (frameStatStreamOutBuffer)
4304         {
4305             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4306                 frameStatStreamOutBuffer,
4307                 frameStatStreamOutOffset,
4308                 m_vdencBrcPakStatsBufferSize,
4309                 2,
4310                 "_PakStats",
4311                 true,
4312                 currentPass,
4313                 hucRegionDumpUpdate));
4314         }
4315 
4316         // Region 3 - Input SLB Buffer
4317         auto vdencReadBatchBuffer = m_virtualAddrParams.regionParams[3].presRegion;
4318         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4319             vdencReadBatchBuffer,
4320             0,
4321             m_hwInterface->m_vdencReadBatchBufferSize,
4322             3,
4323             "_Slb",
4324             true,
4325             currentPass,
4326             hucRegionDumpUpdate));
4327 
4328         // Region 4 - Constant Data Buffer dump
4329         auto vdencBrcConstDataBuffer = m_virtualAddrParams.regionParams[4].presRegion;
4330         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4331             vdencBrcConstDataBuffer,
4332             0,
4333             m_vdencBrcConstDataBufferSize,
4334             4,
4335             "_ConstData",
4336             true,
4337             currentPass,
4338             hucRegionDumpUpdate));
4339 
4340         // Region 7 - Slice Stat Streamout (Input)
4341         auto lucBasedAddressBuffer = m_virtualAddrParams.regionParams[7].presRegion;
4342         auto lucBasedAddressOffset = m_virtualAddrParams.regionParams[7].dwOffset;
4343         if (lucBasedAddressBuffer)
4344         {
4345             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4346                 lucBasedAddressBuffer,
4347                 lucBasedAddressOffset,
4348                 CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6 * CODECHAL_CACHELINE_SIZE,
4349                 7,
4350                 "_SliceStat",
4351                 true,
4352                 currentPass,
4353                 hucRegionDumpUpdate));
4354         }
4355 
4356         // Region 8 - PAK MMIO Buffer dump
4357         auto pakInfoBufffer = m_virtualAddrParams.regionParams[8].presRegion;
4358         if (pakInfoBufffer)
4359         {
4360             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4361                 pakInfoBufffer,
4362                 0,
4363                 sizeof(CodechalVdencHevcPakInfo),
4364                 8,
4365                 "_PakMmio",
4366                 true,
4367                 currentPass,
4368                 hucRegionDumpUpdate));
4369         }
4370 
4371         // Region 9 - Streamin Buffer for ROI (Input)
4372         auto streamInBufferSize = (MOS_ALIGN_CEIL(m_frameWidth, 64) / 32) * (MOS_ALIGN_CEIL(m_frameHeight, 64) / 32) * CODECHAL_CACHELINE_SIZE;
4373         auto stramInBuffer = m_virtualAddrParams.regionParams[9].presRegion;
4374         if (stramInBuffer)
4375         {
4376             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4377                 stramInBuffer,
4378                 0,
4379                 streamInBufferSize,
4380                 9,
4381                 "_RoiStreamin",
4382                 true,
4383                 currentPass,
4384                 hucRegionDumpUpdate));
4385         }
4386 
4387         // Region 10 - Delta QP for ROI Buffer
4388         auto vdencDeltaQpBuffer = m_virtualAddrParams.regionParams[10].presRegion;
4389         if (vdencDeltaQpBuffer)
4390         {
4391             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4392                 vdencDeltaQpBuffer,
4393                 0,
4394                 m_deltaQpRoiBufferSize,
4395                 10,
4396                 "_DeltaQp",
4397                 true,
4398                 currentPass,
4399                 hucRegionDumpUpdate));
4400         }
4401 
4402         // Region 12 - Input SLB Buffer
4403         auto slbBuffer = m_virtualAddrParams.regionParams[12].presRegion;
4404         if (slbBuffer)
4405         {
4406             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4407                 slbBuffer,
4408                 0,
4409                 m_hwInterface->m_vdencGroup3BatchBufferSize,
4410                 12,
4411                 "_Slb",
4412                 true,
4413                 currentPass,
4414                 hucRegionDumpUpdate));
4415         }
4416     }
4417     else
4418     {
4419         // Region 5 - Output SLB Buffer
4420         auto vdenc2ndLevelBatchBuffer = m_virtualAddrParams.regionParams[5].presRegion;
4421         if (vdenc2ndLevelBatchBuffer)
4422         {
4423             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4424                 vdenc2ndLevelBatchBuffer,
4425                 0,
4426                 m_hwInterface->m_vdenc2ndLevelBatchBufferSize,
4427                 5,
4428                 "_Slb",
4429                 false,
4430                 currentPass,
4431                 hucRegionDumpUpdate));
4432         }
4433 
4434         // Region 11 - Output ROI Streamin Buffer
4435         auto vdencOutputROIStreaminBuffer = m_virtualAddrParams.regionParams[11].presRegion;
4436         if (vdencOutputROIStreaminBuffer)
4437         {
4438             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4439                 vdencOutputROIStreaminBuffer,
4440                 0,
4441                 m_roiStreamInBufferSize,
4442                 11,
4443                 "_RoiStreamin",
4444                 false,
4445                 currentPass,
4446                 hucRegionDumpUpdate));
4447         }
4448     }
4449 
4450     // Region 0 - History Buffer dump (Input/Output)
4451     auto vdencBrcHistoryBuffer = m_virtualAddrParams.regionParams[0].presRegion;
4452     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4453         vdencBrcHistoryBuffer,
4454         0,
4455         m_brcHistoryBufSize,
4456         0,
4457         "_History",
4458         isInput,
4459         currentPass,
4460         hucRegionDumpUpdate));
4461 
4462     // Region 6 - Data from Pictures for Weighted Prediction (Input/Output)
4463     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4464         &m_dataFromPicsBuffer,
4465         0,
4466         CODECHAL_PAGE_SIZE * 4,
4467         6,
4468         "_PicsData",
4469         isInput,
4470         currentPass,
4471         hucRegionDumpUpdate));
4472 
4473     // Region 15 - Debug Output
4474     auto debugBuffer = m_virtualAddrParams.regionParams[15].presRegion;
4475     if (debugBuffer)
4476     {
4477         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(
4478             debugBuffer,
4479             0,
4480             0x1000,
4481             15,
4482             "_Debug",
4483             isInput,
4484             currentPass,
4485             hucRegionDumpUpdate));
4486     }
4487     return MOS_STATUS_SUCCESS;
4488 }
4489 
DumpVdencOutputs()4490 MOS_STATUS CodechalVdencHevcState::DumpVdencOutputs()
4491 {
4492     CODECHAL_ENCODE_FUNCTION_ENTER;
4493 
4494     // Dump VDENC Stats Buffer
4495     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4496         m_resVdencStatsBuffer,
4497         CodechalDbgAttr::attrVdencOutput,
4498         "_Stats",
4499         m_vdencBrcStatsBufferSize,
4500         0,
4501         CODECHAL_NUM_MEDIA_STATES));
4502 
4503     // Dump PAK Stats Buffer
4504     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4505         m_resPakStatsBuffer,
4506         CodechalDbgAttr::attrVdencOutput,
4507         "_PakStats",
4508         m_vdencBrcPakStatsBufferSize,
4509         0,
4510         CODECHAL_NUM_MEDIA_STATES));
4511 
4512     // Dump PAK MMIO Buffer
4513     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4514         &m_resPakMmioBuffer,
4515         CodechalDbgKernel::kernelBrcUpdate,
4516         m_currPass ? "_MmioReg_Output_Pass1" : "_MmioReg_Output_Pass0",
4517         sizeof(VdencBrcPakMmio),
4518         0,
4519         CODECHAL_NUM_MEDIA_STATES));
4520 
4521     // Dump PAK Obj Cmd Buffer
4522     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4523         m_resVdencPakObjCmdStreamOutBuffer,
4524         CodechalDbgAttr::attrVdencOutput,
4525         "_MbCode",
4526         m_mvOffset,
4527         0,
4528         CODECHAL_NUM_MEDIA_STATES));
4529 
4530     // Dump CU Record Cmd Buffer
4531     CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4532         m_resVdencPakObjCmdStreamOutBuffer,
4533         CodechalDbgAttr::attrVdencOutput,
4534         "_CURecord",
4535         m_mbCodeSize - m_mvOffset,
4536         m_mvOffset,
4537         CODECHAL_NUM_MEDIA_STATES));
4538 
4539     // Slice Size Conformance
4540     if (m_hevcSeqParams->SliceSizeControl)
4541     {
4542         uint32_t dwSize = CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6*CODECHAL_CACHELINE_SIZE;
4543         if (!m_hevcPicParams->tiles_enabled_flag)
4544         {
4545             // Slice Size StreamOut Surface
4546             CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4547                 &m_resLcuBaseAddressBuffer,
4548                 CodechalDbgAttr::attrVdencOutput,
4549                 "_SliceSize",
4550                 dwSize,
4551                 0,
4552                 CODECHAL_NUM_MEDIA_STATES));
4553         }
4554 
4555         dwSize = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
4556         // Slice Count buffer 1 DW = 4 Bytes
4557         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4558             m_resSliceCountBuffer,
4559             CodechalDbgAttr::attrVdencOutput,
4560             "_SliceCount",
4561             dwSize,
4562             0,
4563             CODECHAL_NUM_MEDIA_STATES));
4564 
4565         // VDEncMode Timer buffer 1 DW = 4 Bytes
4566         CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4567             m_resVdencModeTimerBuffer,
4568             CodechalDbgAttr::attrVdencOutput,
4569             "_ModeTimer",
4570             dwSize,
4571             0,
4572             CODECHAL_NUM_MEDIA_STATES));
4573     }
4574 
4575     return MOS_STATUS_SUCCESS;
4576 }
4577 
ModifyEncodedFrameSizeWithFakeHeaderSize(PMOS_COMMAND_BUFFER cmdBuffer,uint32_t fakeHeaderSizeInByte,PMOS_RESOURCE resBrcUpdateCurbe,uint32_t targetSizePos,PMOS_RESOURCE resPakStat,uint32_t slcHrdSizePos)4578 MOS_STATUS CodechalVdencHevcState::ModifyEncodedFrameSizeWithFakeHeaderSize(
4579     PMOS_COMMAND_BUFFER                 cmdBuffer,
4580     uint32_t                            fakeHeaderSizeInByte,
4581     PMOS_RESOURCE                       resBrcUpdateCurbe,
4582     uint32_t                            targetSizePos,
4583     PMOS_RESOURCE                       resPakStat,
4584     uint32_t                            slcHrdSizePos
4585 )
4586 {
4587     MOS_STATUS                          eStatus = MOS_STATUS_SUCCESS;
4588 
4589     CODECHAL_ENCODE_FUNCTION_ENTER;
4590 
4591     //calculate slice headers size
4592     PCODEC_ENCODER_SLCDATA slcData = m_slcData;
4593     CODECHAL_ENCODE_CHK_NULL_RETURN(slcData);
4594     uint32_t totalSliceHeaderSize = 0;
4595     for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++)
4596     {
4597         totalSliceHeaderSize += (slcData->BitSize + 7) >> 3;
4598         slcData++;
4599     }
4600 
4601     uint32_t firstHdrSz = 0;
4602     for (uint32_t i = 0; i < m_encodeParams.uiNumNalUnits; i++)
4603     {
4604         firstHdrSz += m_encodeParams.ppNALUnitParams[i]->uiSize;
4605     }
4606 
4607     totalSliceHeaderSize += firstHdrSz;
4608 
4609     CODECHAL_ENCODE_CHK_STATUS_RETURN(AddBufferWithIMMValue(
4610         cmdBuffer,
4611         resBrcUpdateCurbe,
4612         targetSizePos,
4613         fakeHeaderSizeInByte - totalSliceHeaderSize,
4614         true));
4615 
4616     CODECHAL_ENCODE_CHK_STATUS_RETURN(AddBufferWithIMMValue(
4617         cmdBuffer,
4618         resPakStat,
4619         slcHrdSizePos,
4620         fakeHeaderSizeInByte * 8,
4621         true));
4622 
4623     return eStatus;
4624 }
4625 #endif
4626