1 /*
2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "common_video/h264/sps_parser.h"
12
13 #include <cstdint>
14 #include <vector>
15
16 #include "common_video/h264/h264_common.h"
17 #include "rtc_base/bitstream_reader.h"
18
19 namespace {
20 constexpr int kScalingDeltaMin = -128;
21 constexpr int kScaldingDeltaMax = 127;
22 } // namespace
23
24 namespace webrtc {
25
26 SpsParser::SpsState::SpsState() = default;
27 SpsParser::SpsState::SpsState(const SpsState&) = default;
28 SpsParser::SpsState::~SpsState() = default;
29
30 // General note: this is based off the 02/2014 version of the H.264 standard.
31 // You can find it on this page:
32 // http://www.itu.int/rec/T-REC-H.264
33
34 // Unpack RBSP and parse SPS state from the supplied buffer.
ParseSps(const uint8_t * data,size_t length)35 absl::optional<SpsParser::SpsState> SpsParser::ParseSps(const uint8_t* data,
36 size_t length) {
37 std::vector<uint8_t> unpacked_buffer = H264::ParseRbsp(data, length);
38 BitstreamReader reader(unpacked_buffer);
39 return ParseSpsUpToVui(reader);
40 }
41
ParseSpsUpToVui(BitstreamReader & reader)42 absl::optional<SpsParser::SpsState> SpsParser::ParseSpsUpToVui(
43 BitstreamReader& reader) {
44 // Now, we need to use a bitstream reader to parse through the actual AVC SPS
45 // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the
46 // H.264 standard for a complete description.
47 // Since we only care about resolution, we ignore the majority of fields, but
48 // we still have to actively parse through a lot of the data, since many of
49 // the fields have variable size.
50 // We're particularly interested in:
51 // chroma_format_idc -> affects crop units
52 // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16).
53 // frame_crop_*_offset -> crop information
54
55 SpsState sps;
56
57 // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is
58 // 0. It defaults to 1, when not specified.
59 uint32_t chroma_format_idc = 1;
60
61 // profile_idc: u(8). We need it to determine if we need to read/skip chroma
62 // formats.
63 uint8_t profile_idc = reader.Read<uint8_t>();
64 // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits
65 // 1 bit each for the flags + 2 bits + 8 bits for level_idc = 16 bits.
66 reader.ConsumeBits(16);
67 // seq_parameter_set_id: ue(v)
68 sps.id = reader.ReadExponentialGolomb();
69 sps.separate_colour_plane_flag = 0;
70 // See if profile_idc has chroma format information.
71 if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 ||
72 profile_idc == 244 || profile_idc == 44 || profile_idc == 83 ||
73 profile_idc == 86 || profile_idc == 118 || profile_idc == 128 ||
74 profile_idc == 138 || profile_idc == 139 || profile_idc == 134) {
75 // chroma_format_idc: ue(v)
76 chroma_format_idc = reader.ReadExponentialGolomb();
77 if (chroma_format_idc == 3) {
78 // separate_colour_plane_flag: u(1)
79 sps.separate_colour_plane_flag = reader.ReadBit();
80 }
81 // bit_depth_luma_minus8: ue(v)
82 reader.ReadExponentialGolomb();
83 // bit_depth_chroma_minus8: ue(v)
84 reader.ReadExponentialGolomb();
85 // qpprime_y_zero_transform_bypass_flag: u(1)
86 reader.ConsumeBits(1);
87 // seq_scaling_matrix_present_flag: u(1)
88 if (reader.Read<bool>()) {
89 // Process the scaling lists just enough to be able to properly
90 // skip over them, so we can still read the resolution on streams
91 // where this is included.
92 int scaling_list_count = (chroma_format_idc == 3 ? 12 : 8);
93 for (int i = 0; i < scaling_list_count; ++i) {
94 // seq_scaling_list_present_flag[i] : u(1)
95 if (reader.Read<bool>()) {
96 int last_scale = 8;
97 int next_scale = 8;
98 int size_of_scaling_list = i < 6 ? 16 : 64;
99 for (int j = 0; j < size_of_scaling_list; j++) {
100 if (next_scale != 0) {
101 // delta_scale: se(v)
102 int delta_scale = reader.ReadSignedExponentialGolomb();
103 if (!reader.Ok() || delta_scale < kScalingDeltaMin ||
104 delta_scale > kScaldingDeltaMax) {
105 return absl::nullopt;
106 }
107 next_scale = (last_scale + delta_scale + 256) % 256;
108 }
109 if (next_scale != 0)
110 last_scale = next_scale;
111 }
112 }
113 }
114 }
115 }
116 // log2_max_frame_num and log2_max_pic_order_cnt_lsb are used with
117 // BitstreamReader::ReadBits, which can read at most 64 bits at a time. We
118 // also have to avoid overflow when adding 4 to the on-wire golomb value,
119 // e.g., for evil input data, ReadExponentialGolomb might return 0xfffc.
120 const uint32_t kMaxLog2Minus4 = 32 - 4;
121
122 // log2_max_frame_num_minus4: ue(v)
123 uint32_t log2_max_frame_num_minus4 = reader.ReadExponentialGolomb();
124 if (!reader.Ok() || log2_max_frame_num_minus4 > kMaxLog2Minus4) {
125 return absl::nullopt;
126 }
127 sps.log2_max_frame_num = log2_max_frame_num_minus4 + 4;
128
129 // pic_order_cnt_type: ue(v)
130 sps.pic_order_cnt_type = reader.ReadExponentialGolomb();
131 if (sps.pic_order_cnt_type == 0) {
132 // log2_max_pic_order_cnt_lsb_minus4: ue(v)
133 uint32_t log2_max_pic_order_cnt_lsb_minus4 = reader.ReadExponentialGolomb();
134 if (!reader.Ok() || log2_max_pic_order_cnt_lsb_minus4 > kMaxLog2Minus4) {
135 return absl::nullopt;
136 }
137 sps.log2_max_pic_order_cnt_lsb = log2_max_pic_order_cnt_lsb_minus4 + 4;
138 } else if (sps.pic_order_cnt_type == 1) {
139 // delta_pic_order_always_zero_flag: u(1)
140 sps.delta_pic_order_always_zero_flag = reader.ReadBit();
141 // offset_for_non_ref_pic: se(v)
142 reader.ReadExponentialGolomb();
143 // offset_for_top_to_bottom_field: se(v)
144 reader.ReadExponentialGolomb();
145 // num_ref_frames_in_pic_order_cnt_cycle: ue(v)
146 uint32_t num_ref_frames_in_pic_order_cnt_cycle =
147 reader.ReadExponentialGolomb();
148 for (size_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) {
149 // offset_for_ref_frame[i]: se(v)
150 reader.ReadExponentialGolomb();
151 if (!reader.Ok()) {
152 return absl::nullopt;
153 }
154 }
155 }
156 // max_num_ref_frames: ue(v)
157 sps.max_num_ref_frames = reader.ReadExponentialGolomb();
158 // gaps_in_frame_num_value_allowed_flag: u(1)
159 reader.ConsumeBits(1);
160 //
161 // IMPORTANT ONES! Now we're getting to resolution. First we read the pic
162 // width/height in macroblocks (16x16), which gives us the base resolution,
163 // and then we continue on until we hit the frame crop offsets, which are used
164 // to signify resolutions that aren't multiples of 16.
165 //
166 // pic_width_in_mbs_minus1: ue(v)
167 sps.width = 16 * (reader.ReadExponentialGolomb() + 1);
168 // pic_height_in_map_units_minus1: ue(v)
169 uint32_t pic_height_in_map_units_minus1 = reader.ReadExponentialGolomb();
170 // frame_mbs_only_flag: u(1)
171 sps.frame_mbs_only_flag = reader.ReadBit();
172 if (!sps.frame_mbs_only_flag) {
173 // mb_adaptive_frame_field_flag: u(1)
174 reader.ConsumeBits(1);
175 }
176 sps.height =
177 16 * (2 - sps.frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1);
178 // direct_8x8_inference_flag: u(1)
179 reader.ConsumeBits(1);
180 //
181 // MORE IMPORTANT ONES! Now we're at the frame crop information.
182 //
183 uint32_t frame_crop_left_offset = 0;
184 uint32_t frame_crop_right_offset = 0;
185 uint32_t frame_crop_top_offset = 0;
186 uint32_t frame_crop_bottom_offset = 0;
187 // frame_cropping_flag: u(1)
188 if (reader.Read<bool>()) {
189 // frame_crop_{left, right, top, bottom}_offset: ue(v)
190 frame_crop_left_offset = reader.ReadExponentialGolomb();
191 frame_crop_right_offset = reader.ReadExponentialGolomb();
192 frame_crop_top_offset = reader.ReadExponentialGolomb();
193 frame_crop_bottom_offset = reader.ReadExponentialGolomb();
194 }
195 // vui_parameters_present_flag: u(1)
196 sps.vui_params_present = reader.ReadBit();
197
198 // Far enough! We don't use the rest of the SPS.
199 if (!reader.Ok()) {
200 return absl::nullopt;
201 }
202
203 // Figure out the crop units in pixels. That's based on the chroma format's
204 // sampling, which is indicated by chroma_format_idc.
205 if (sps.separate_colour_plane_flag || chroma_format_idc == 0) {
206 frame_crop_bottom_offset *= (2 - sps.frame_mbs_only_flag);
207 frame_crop_top_offset *= (2 - sps.frame_mbs_only_flag);
208 } else if (!sps.separate_colour_plane_flag && chroma_format_idc > 0) {
209 // Width multipliers for formats 1 (4:2:0) and 2 (4:2:2).
210 if (chroma_format_idc == 1 || chroma_format_idc == 2) {
211 frame_crop_left_offset *= 2;
212 frame_crop_right_offset *= 2;
213 }
214 // Height multipliers for format 1 (4:2:0).
215 if (chroma_format_idc == 1) {
216 frame_crop_top_offset *= 2;
217 frame_crop_bottom_offset *= 2;
218 }
219 }
220 // Subtract the crop for each dimension.
221 sps.width -= (frame_crop_left_offset + frame_crop_right_offset);
222 sps.height -= (frame_crop_top_offset + frame_crop_bottom_offset);
223
224 return sps;
225 }
226
227 } // namespace webrtc
228