1 /*
2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10 #include "common_video/h264/h264_bitstream_parser.h"
11
12 #include <stdlib.h>
13
14 #include <cstdint>
15 #include <vector>
16
17 #include "common_video/h264/h264_common.h"
18 #include "rtc_base/bitstream_reader.h"
19 #include "rtc_base/logging.h"
20
21 namespace webrtc {
22 namespace {
23
24 constexpr int kMaxAbsQpDeltaValue = 51;
25 constexpr int kMinQpValue = 0;
26 constexpr int kMaxQpValue = 51;
27
28 } // namespace
29
30 H264BitstreamParser::H264BitstreamParser() = default;
31 H264BitstreamParser::~H264BitstreamParser() = default;
32
ParseNonParameterSetNalu(const uint8_t * source,size_t source_length,uint8_t nalu_type)33 H264BitstreamParser::Result H264BitstreamParser::ParseNonParameterSetNalu(
34 const uint8_t* source,
35 size_t source_length,
36 uint8_t nalu_type) {
37 if (!sps_ || !pps_)
38 return kInvalidStream;
39
40 last_slice_qp_delta_ = absl::nullopt;
41 const std::vector<uint8_t> slice_rbsp =
42 H264::ParseRbsp(source, source_length);
43 if (slice_rbsp.size() < H264::kNaluTypeSize)
44 return kInvalidStream;
45
46 BitstreamReader slice_reader(slice_rbsp);
47 slice_reader.ConsumeBits(H264::kNaluTypeSize * 8);
48
49 // Check to see if this is an IDR slice, which has an extra field to parse
50 // out.
51 bool is_idr = (source[0] & 0x0F) == H264::NaluType::kIdr;
52 uint8_t nal_ref_idc = (source[0] & 0x60) >> 5;
53
54 // first_mb_in_slice: ue(v)
55 slice_reader.ReadExponentialGolomb();
56 // slice_type: ue(v)
57 uint32_t slice_type = slice_reader.ReadExponentialGolomb();
58 // slice_type's 5..9 range is used to indicate that all slices of a picture
59 // have the same value of slice_type % 5, we don't care about that, so we map
60 // to the corresponding 0..4 range.
61 slice_type %= 5;
62 // pic_parameter_set_id: ue(v)
63 slice_reader.ReadExponentialGolomb();
64 if (sps_->separate_colour_plane_flag == 1) {
65 // colour_plane_id
66 slice_reader.ConsumeBits(2);
67 }
68 // frame_num: u(v)
69 // Represented by log2_max_frame_num bits.
70 slice_reader.ConsumeBits(sps_->log2_max_frame_num);
71 bool field_pic_flag = false;
72 if (sps_->frame_mbs_only_flag == 0) {
73 // field_pic_flag: u(1)
74 field_pic_flag = slice_reader.Read<bool>();
75 if (field_pic_flag) {
76 // bottom_field_flag: u(1)
77 slice_reader.ConsumeBits(1);
78 }
79 }
80 if (is_idr) {
81 // idr_pic_id: ue(v)
82 slice_reader.ReadExponentialGolomb();
83 }
84 // pic_order_cnt_lsb: u(v)
85 // Represented by sps_.log2_max_pic_order_cnt_lsb bits.
86 if (sps_->pic_order_cnt_type == 0) {
87 slice_reader.ConsumeBits(sps_->log2_max_pic_order_cnt_lsb);
88 if (pps_->bottom_field_pic_order_in_frame_present_flag && !field_pic_flag) {
89 // delta_pic_order_cnt_bottom: se(v)
90 slice_reader.ReadExponentialGolomb();
91 }
92 }
93 if (sps_->pic_order_cnt_type == 1 &&
94 !sps_->delta_pic_order_always_zero_flag) {
95 // delta_pic_order_cnt[0]: se(v)
96 slice_reader.ReadExponentialGolomb();
97 if (pps_->bottom_field_pic_order_in_frame_present_flag && !field_pic_flag) {
98 // delta_pic_order_cnt[1]: se(v)
99 slice_reader.ReadExponentialGolomb();
100 }
101 }
102 if (pps_->redundant_pic_cnt_present_flag) {
103 // redundant_pic_cnt: ue(v)
104 slice_reader.ReadExponentialGolomb();
105 }
106 if (slice_type == H264::SliceType::kB) {
107 // direct_spatial_mv_pred_flag: u(1)
108 slice_reader.ConsumeBits(1);
109 }
110 switch (slice_type) {
111 case H264::SliceType::kP:
112 case H264::SliceType::kB:
113 case H264::SliceType::kSp:
114 // num_ref_idx_active_override_flag: u(1)
115 if (slice_reader.Read<bool>()) {
116 // num_ref_idx_l0_active_minus1: ue(v)
117 slice_reader.ReadExponentialGolomb();
118 if (slice_type == H264::SliceType::kB) {
119 // num_ref_idx_l1_active_minus1: ue(v)
120 slice_reader.ReadExponentialGolomb();
121 }
122 }
123 break;
124 default:
125 break;
126 }
127 if (!slice_reader.Ok()) {
128 return kInvalidStream;
129 }
130 // assume nal_unit_type != 20 && nal_unit_type != 21:
131 if (nalu_type == 20 || nalu_type == 21) {
132 RTC_LOG(LS_ERROR) << "Unsupported nal unit type.";
133 return kUnsupportedStream;
134 }
135 // if (nal_unit_type == 20 || nal_unit_type == 21)
136 // ref_pic_list_mvc_modification()
137 // else
138 {
139 // ref_pic_list_modification():
140 // `slice_type` checks here don't use named constants as they aren't named
141 // in the spec for this segment. Keeping them consistent makes it easier to
142 // verify that they are both the same.
143 if (slice_type % 5 != 2 && slice_type % 5 != 4) {
144 // ref_pic_list_modification_flag_l0: u(1)
145 if (slice_reader.Read<bool>()) {
146 uint32_t modification_of_pic_nums_idc;
147 do {
148 // modification_of_pic_nums_idc: ue(v)
149 modification_of_pic_nums_idc = slice_reader.ReadExponentialGolomb();
150 if (modification_of_pic_nums_idc == 0 ||
151 modification_of_pic_nums_idc == 1) {
152 // abs_diff_pic_num_minus1: ue(v)
153 slice_reader.ReadExponentialGolomb();
154 } else if (modification_of_pic_nums_idc == 2) {
155 // long_term_pic_num: ue(v)
156 slice_reader.ReadExponentialGolomb();
157 }
158 } while (modification_of_pic_nums_idc != 3 && slice_reader.Ok());
159 }
160 }
161 if (slice_type % 5 == 1) {
162 // ref_pic_list_modification_flag_l1: u(1)
163 if (slice_reader.Read<bool>()) {
164 uint32_t modification_of_pic_nums_idc;
165 do {
166 // modification_of_pic_nums_idc: ue(v)
167 modification_of_pic_nums_idc = slice_reader.ReadExponentialGolomb();
168 if (modification_of_pic_nums_idc == 0 ||
169 modification_of_pic_nums_idc == 1) {
170 // abs_diff_pic_num_minus1: ue(v)
171 slice_reader.ReadExponentialGolomb();
172 } else if (modification_of_pic_nums_idc == 2) {
173 // long_term_pic_num: ue(v)
174 slice_reader.ReadExponentialGolomb();
175 }
176 } while (modification_of_pic_nums_idc != 3 && slice_reader.Ok());
177 }
178 }
179 }
180 if (!slice_reader.Ok()) {
181 return kInvalidStream;
182 }
183 // TODO(pbos): Do we need support for pred_weight_table()?
184 if ((pps_->weighted_pred_flag && (slice_type == H264::SliceType::kP ||
185 slice_type == H264::SliceType::kSp)) ||
186 (pps_->weighted_bipred_idc == 1 && slice_type == H264::SliceType::kB)) {
187 RTC_LOG(LS_ERROR) << "Streams with pred_weight_table unsupported.";
188 return kUnsupportedStream;
189 }
190 // if ((weighted_pred_flag && (slice_type == P || slice_type == SP)) ||
191 // (weighted_bipred_idc == 1 && slice_type == B)) {
192 // pred_weight_table()
193 // }
194 if (nal_ref_idc != 0) {
195 // dec_ref_pic_marking():
196 if (is_idr) {
197 // no_output_of_prior_pics_flag: u(1)
198 // long_term_reference_flag: u(1)
199 slice_reader.ConsumeBits(2);
200 } else {
201 // adaptive_ref_pic_marking_mode_flag: u(1)
202 if (slice_reader.Read<bool>()) {
203 uint32_t memory_management_control_operation;
204 do {
205 // memory_management_control_operation: ue(v)
206 memory_management_control_operation =
207 slice_reader.ReadExponentialGolomb();
208 if (memory_management_control_operation == 1 ||
209 memory_management_control_operation == 3) {
210 // difference_of_pic_nums_minus1: ue(v)
211 slice_reader.ReadExponentialGolomb();
212 }
213 if (memory_management_control_operation == 2) {
214 // long_term_pic_num: ue(v)
215 slice_reader.ReadExponentialGolomb();
216 }
217 if (memory_management_control_operation == 3 ||
218 memory_management_control_operation == 6) {
219 // long_term_frame_idx: ue(v)
220 slice_reader.ReadExponentialGolomb();
221 }
222 if (memory_management_control_operation == 4) {
223 // max_long_term_frame_idx_plus1: ue(v)
224 slice_reader.ReadExponentialGolomb();
225 }
226 } while (memory_management_control_operation != 0 && slice_reader.Ok());
227 }
228 }
229 }
230 if (pps_->entropy_coding_mode_flag && slice_type != H264::SliceType::kI &&
231 slice_type != H264::SliceType::kSi) {
232 // cabac_init_idc: ue(v)
233 slice_reader.ReadExponentialGolomb();
234 }
235
236 int last_slice_qp_delta = slice_reader.ReadSignedExponentialGolomb();
237 if (!slice_reader.Ok()) {
238 return kInvalidStream;
239 }
240 if (abs(last_slice_qp_delta) > kMaxAbsQpDeltaValue) {
241 // Something has gone wrong, and the parsed value is invalid.
242 RTC_LOG(LS_WARNING) << "Parsed QP value out of range.";
243 return kInvalidStream;
244 }
245
246 last_slice_qp_delta_ = last_slice_qp_delta;
247 return kOk;
248 }
249
ParseSlice(const uint8_t * slice,size_t length)250 void H264BitstreamParser::ParseSlice(const uint8_t* slice, size_t length) {
251 H264::NaluType nalu_type = H264::ParseNaluType(slice[0]);
252 switch (nalu_type) {
253 case H264::NaluType::kSps: {
254 sps_ = SpsParser::ParseSps(slice + H264::kNaluTypeSize,
255 length - H264::kNaluTypeSize);
256 if (!sps_)
257 RTC_DLOG(LS_WARNING) << "Unable to parse SPS from H264 bitstream.";
258 break;
259 }
260 case H264::NaluType::kPps: {
261 pps_ = PpsParser::ParsePps(slice + H264::kNaluTypeSize,
262 length - H264::kNaluTypeSize);
263 if (!pps_)
264 RTC_DLOG(LS_WARNING) << "Unable to parse PPS from H264 bitstream.";
265 break;
266 }
267 case H264::NaluType::kAud:
268 case H264::NaluType::kSei:
269 case H264::NaluType::kPrefix:
270 break; // Ignore these nalus, as we don't care about their contents.
271 default:
272 Result res = ParseNonParameterSetNalu(slice, length, nalu_type);
273 if (res != kOk)
274 RTC_DLOG(LS_INFO) << "Failed to parse bitstream. Error: " << res;
275 break;
276 }
277 }
278
ParseBitstream(rtc::ArrayView<const uint8_t> bitstream)279 void H264BitstreamParser::ParseBitstream(
280 rtc::ArrayView<const uint8_t> bitstream) {
281 std::vector<H264::NaluIndex> nalu_indices =
282 H264::FindNaluIndices(bitstream.data(), bitstream.size());
283 for (const H264::NaluIndex& index : nalu_indices)
284 ParseSlice(bitstream.data() + index.payload_start_offset,
285 index.payload_size);
286 }
287
GetLastSliceQp() const288 absl::optional<int> H264BitstreamParser::GetLastSliceQp() const {
289 if (!last_slice_qp_delta_ || !pps_)
290 return absl::nullopt;
291 const int qp = 26 + pps_->pic_init_qp_minus26 + *last_slice_qp_delta_;
292 if (qp < kMinQpValue || qp > kMaxQpValue) {
293 RTC_LOG(LS_ERROR) << "Parsed invalid QP from bitstream.";
294 return absl::nullopt;
295 }
296 return qp;
297 }
298
299 } // namespace webrtc
300