xref: /aosp_15_r20/external/webrtc/common_video/h264/h264_bitstream_parser.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 #include "common_video/h264/h264_bitstream_parser.h"
11 
12 #include <stdlib.h>
13 
14 #include <cstdint>
15 #include <vector>
16 
17 #include "common_video/h264/h264_common.h"
18 #include "rtc_base/bitstream_reader.h"
19 #include "rtc_base/logging.h"
20 
21 namespace webrtc {
22 namespace {
23 
24 constexpr int kMaxAbsQpDeltaValue = 51;
25 constexpr int kMinQpValue = 0;
26 constexpr int kMaxQpValue = 51;
27 
28 }  // namespace
29 
30 H264BitstreamParser::H264BitstreamParser() = default;
31 H264BitstreamParser::~H264BitstreamParser() = default;
32 
ParseNonParameterSetNalu(const uint8_t * source,size_t source_length,uint8_t nalu_type)33 H264BitstreamParser::Result H264BitstreamParser::ParseNonParameterSetNalu(
34     const uint8_t* source,
35     size_t source_length,
36     uint8_t nalu_type) {
37   if (!sps_ || !pps_)
38     return kInvalidStream;
39 
40   last_slice_qp_delta_ = absl::nullopt;
41   const std::vector<uint8_t> slice_rbsp =
42       H264::ParseRbsp(source, source_length);
43   if (slice_rbsp.size() < H264::kNaluTypeSize)
44     return kInvalidStream;
45 
46   BitstreamReader slice_reader(slice_rbsp);
47   slice_reader.ConsumeBits(H264::kNaluTypeSize * 8);
48 
49   // Check to see if this is an IDR slice, which has an extra field to parse
50   // out.
51   bool is_idr = (source[0] & 0x0F) == H264::NaluType::kIdr;
52   uint8_t nal_ref_idc = (source[0] & 0x60) >> 5;
53 
54   // first_mb_in_slice: ue(v)
55   slice_reader.ReadExponentialGolomb();
56   // slice_type: ue(v)
57   uint32_t slice_type = slice_reader.ReadExponentialGolomb();
58   // slice_type's 5..9 range is used to indicate that all slices of a picture
59   // have the same value of slice_type % 5, we don't care about that, so we map
60   // to the corresponding 0..4 range.
61   slice_type %= 5;
62   // pic_parameter_set_id: ue(v)
63   slice_reader.ReadExponentialGolomb();
64   if (sps_->separate_colour_plane_flag == 1) {
65     // colour_plane_id
66     slice_reader.ConsumeBits(2);
67   }
68   // frame_num: u(v)
69   // Represented by log2_max_frame_num bits.
70   slice_reader.ConsumeBits(sps_->log2_max_frame_num);
71   bool field_pic_flag = false;
72   if (sps_->frame_mbs_only_flag == 0) {
73     // field_pic_flag: u(1)
74     field_pic_flag = slice_reader.Read<bool>();
75     if (field_pic_flag) {
76       // bottom_field_flag: u(1)
77       slice_reader.ConsumeBits(1);
78     }
79   }
80   if (is_idr) {
81     // idr_pic_id: ue(v)
82     slice_reader.ReadExponentialGolomb();
83   }
84   // pic_order_cnt_lsb: u(v)
85   // Represented by sps_.log2_max_pic_order_cnt_lsb bits.
86   if (sps_->pic_order_cnt_type == 0) {
87     slice_reader.ConsumeBits(sps_->log2_max_pic_order_cnt_lsb);
88     if (pps_->bottom_field_pic_order_in_frame_present_flag && !field_pic_flag) {
89       // delta_pic_order_cnt_bottom: se(v)
90       slice_reader.ReadExponentialGolomb();
91     }
92   }
93   if (sps_->pic_order_cnt_type == 1 &&
94       !sps_->delta_pic_order_always_zero_flag) {
95     // delta_pic_order_cnt[0]: se(v)
96     slice_reader.ReadExponentialGolomb();
97     if (pps_->bottom_field_pic_order_in_frame_present_flag && !field_pic_flag) {
98       // delta_pic_order_cnt[1]: se(v)
99       slice_reader.ReadExponentialGolomb();
100     }
101   }
102   if (pps_->redundant_pic_cnt_present_flag) {
103     // redundant_pic_cnt: ue(v)
104     slice_reader.ReadExponentialGolomb();
105   }
106   if (slice_type == H264::SliceType::kB) {
107     // direct_spatial_mv_pred_flag: u(1)
108     slice_reader.ConsumeBits(1);
109   }
110   switch (slice_type) {
111     case H264::SliceType::kP:
112     case H264::SliceType::kB:
113     case H264::SliceType::kSp:
114       // num_ref_idx_active_override_flag: u(1)
115       if (slice_reader.Read<bool>()) {
116         // num_ref_idx_l0_active_minus1: ue(v)
117         slice_reader.ReadExponentialGolomb();
118         if (slice_type == H264::SliceType::kB) {
119           // num_ref_idx_l1_active_minus1: ue(v)
120           slice_reader.ReadExponentialGolomb();
121         }
122       }
123       break;
124     default:
125       break;
126   }
127   if (!slice_reader.Ok()) {
128     return kInvalidStream;
129   }
130   // assume nal_unit_type != 20 && nal_unit_type != 21:
131   if (nalu_type == 20 || nalu_type == 21) {
132     RTC_LOG(LS_ERROR) << "Unsupported nal unit type.";
133     return kUnsupportedStream;
134   }
135   // if (nal_unit_type == 20 || nal_unit_type == 21)
136   //   ref_pic_list_mvc_modification()
137   // else
138   {
139     // ref_pic_list_modification():
140     // `slice_type` checks here don't use named constants as they aren't named
141     // in the spec for this segment. Keeping them consistent makes it easier to
142     // verify that they are both the same.
143     if (slice_type % 5 != 2 && slice_type % 5 != 4) {
144       // ref_pic_list_modification_flag_l0: u(1)
145       if (slice_reader.Read<bool>()) {
146         uint32_t modification_of_pic_nums_idc;
147         do {
148           // modification_of_pic_nums_idc: ue(v)
149           modification_of_pic_nums_idc = slice_reader.ReadExponentialGolomb();
150           if (modification_of_pic_nums_idc == 0 ||
151               modification_of_pic_nums_idc == 1) {
152             // abs_diff_pic_num_minus1: ue(v)
153             slice_reader.ReadExponentialGolomb();
154           } else if (modification_of_pic_nums_idc == 2) {
155             // long_term_pic_num: ue(v)
156             slice_reader.ReadExponentialGolomb();
157           }
158         } while (modification_of_pic_nums_idc != 3 && slice_reader.Ok());
159       }
160     }
161     if (slice_type % 5 == 1) {
162       // ref_pic_list_modification_flag_l1: u(1)
163       if (slice_reader.Read<bool>()) {
164         uint32_t modification_of_pic_nums_idc;
165         do {
166           // modification_of_pic_nums_idc: ue(v)
167           modification_of_pic_nums_idc = slice_reader.ReadExponentialGolomb();
168           if (modification_of_pic_nums_idc == 0 ||
169               modification_of_pic_nums_idc == 1) {
170             // abs_diff_pic_num_minus1: ue(v)
171             slice_reader.ReadExponentialGolomb();
172           } else if (modification_of_pic_nums_idc == 2) {
173             // long_term_pic_num: ue(v)
174             slice_reader.ReadExponentialGolomb();
175           }
176         } while (modification_of_pic_nums_idc != 3 && slice_reader.Ok());
177       }
178     }
179   }
180   if (!slice_reader.Ok()) {
181     return kInvalidStream;
182   }
183   // TODO(pbos): Do we need support for pred_weight_table()?
184   if ((pps_->weighted_pred_flag && (slice_type == H264::SliceType::kP ||
185                                     slice_type == H264::SliceType::kSp)) ||
186       (pps_->weighted_bipred_idc == 1 && slice_type == H264::SliceType::kB)) {
187     RTC_LOG(LS_ERROR) << "Streams with pred_weight_table unsupported.";
188     return kUnsupportedStream;
189   }
190   // if ((weighted_pred_flag && (slice_type == P || slice_type == SP)) ||
191   //    (weighted_bipred_idc == 1 && slice_type == B)) {
192   //  pred_weight_table()
193   // }
194   if (nal_ref_idc != 0) {
195     // dec_ref_pic_marking():
196     if (is_idr) {
197       // no_output_of_prior_pics_flag: u(1)
198       // long_term_reference_flag: u(1)
199       slice_reader.ConsumeBits(2);
200     } else {
201       // adaptive_ref_pic_marking_mode_flag: u(1)
202       if (slice_reader.Read<bool>()) {
203         uint32_t memory_management_control_operation;
204         do {
205           // memory_management_control_operation: ue(v)
206           memory_management_control_operation =
207               slice_reader.ReadExponentialGolomb();
208           if (memory_management_control_operation == 1 ||
209               memory_management_control_operation == 3) {
210             // difference_of_pic_nums_minus1: ue(v)
211             slice_reader.ReadExponentialGolomb();
212           }
213           if (memory_management_control_operation == 2) {
214             // long_term_pic_num: ue(v)
215             slice_reader.ReadExponentialGolomb();
216           }
217           if (memory_management_control_operation == 3 ||
218               memory_management_control_operation == 6) {
219             // long_term_frame_idx: ue(v)
220             slice_reader.ReadExponentialGolomb();
221           }
222           if (memory_management_control_operation == 4) {
223             // max_long_term_frame_idx_plus1: ue(v)
224             slice_reader.ReadExponentialGolomb();
225           }
226         } while (memory_management_control_operation != 0 && slice_reader.Ok());
227       }
228     }
229   }
230   if (pps_->entropy_coding_mode_flag && slice_type != H264::SliceType::kI &&
231       slice_type != H264::SliceType::kSi) {
232     // cabac_init_idc: ue(v)
233     slice_reader.ReadExponentialGolomb();
234   }
235 
236   int last_slice_qp_delta = slice_reader.ReadSignedExponentialGolomb();
237   if (!slice_reader.Ok()) {
238     return kInvalidStream;
239   }
240   if (abs(last_slice_qp_delta) > kMaxAbsQpDeltaValue) {
241     // Something has gone wrong, and the parsed value is invalid.
242     RTC_LOG(LS_WARNING) << "Parsed QP value out of range.";
243     return kInvalidStream;
244   }
245 
246   last_slice_qp_delta_ = last_slice_qp_delta;
247   return kOk;
248 }
249 
ParseSlice(const uint8_t * slice,size_t length)250 void H264BitstreamParser::ParseSlice(const uint8_t* slice, size_t length) {
251   H264::NaluType nalu_type = H264::ParseNaluType(slice[0]);
252   switch (nalu_type) {
253     case H264::NaluType::kSps: {
254       sps_ = SpsParser::ParseSps(slice + H264::kNaluTypeSize,
255                                  length - H264::kNaluTypeSize);
256       if (!sps_)
257         RTC_DLOG(LS_WARNING) << "Unable to parse SPS from H264 bitstream.";
258       break;
259     }
260     case H264::NaluType::kPps: {
261       pps_ = PpsParser::ParsePps(slice + H264::kNaluTypeSize,
262                                  length - H264::kNaluTypeSize);
263       if (!pps_)
264         RTC_DLOG(LS_WARNING) << "Unable to parse PPS from H264 bitstream.";
265       break;
266     }
267     case H264::NaluType::kAud:
268     case H264::NaluType::kSei:
269     case H264::NaluType::kPrefix:
270       break;  // Ignore these nalus, as we don't care about their contents.
271     default:
272       Result res = ParseNonParameterSetNalu(slice, length, nalu_type);
273       if (res != kOk)
274         RTC_DLOG(LS_INFO) << "Failed to parse bitstream. Error: " << res;
275       break;
276   }
277 }
278 
ParseBitstream(rtc::ArrayView<const uint8_t> bitstream)279 void H264BitstreamParser::ParseBitstream(
280     rtc::ArrayView<const uint8_t> bitstream) {
281   std::vector<H264::NaluIndex> nalu_indices =
282       H264::FindNaluIndices(bitstream.data(), bitstream.size());
283   for (const H264::NaluIndex& index : nalu_indices)
284     ParseSlice(bitstream.data() + index.payload_start_offset,
285                index.payload_size);
286 }
287 
GetLastSliceQp() const288 absl::optional<int> H264BitstreamParser::GetLastSliceQp() const {
289   if (!last_slice_qp_delta_ || !pps_)
290     return absl::nullopt;
291   const int qp = 26 + pps_->pic_init_qp_minus26 + *last_slice_qp_delta_;
292   if (qp < kMinQpValue || qp > kMaxQpValue) {
293     RTC_LOG(LS_ERROR) << "Parsed invalid QP from bitstream.";
294     return absl::nullopt;
295   }
296   return qp;
297 }
298 
299 }  // namespace webrtc
300