1 /*
2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "common_video/h264/sps_parser.h"
12
13 #include "common_video/h264/h264_common.h"
14 #include "rtc_base/arraysize.h"
15 #include "rtc_base/bit_buffer.h"
16 #include "rtc_base/buffer.h"
17 #include "test/gtest.h"
18
19 namespace webrtc {
20
21 // Example SPS can be generated with ffmpeg. Here's an example set of commands,
22 // runnable on OS X:
23 // 1) Generate a video, from the camera:
24 // ffmpeg -f avfoundation -i "0" -video_size 640x360 camera.mov
25 //
26 // 2) Scale the video to the desired size:
27 // ffmpeg -i camera.mov -vf scale=640x360 scaled.mov
28 //
29 // 3) Get just the H.264 bitstream in AnnexB:
30 // ffmpeg -i scaled.mov -vcodec copy -vbsf h264_mp4toannexb -an out.h264
31 //
32 // 4) Open out.h264 and find the SPS, generally everything between the first
33 // two start codes (0 0 0 1 or 0 0 1). The first byte should be 0x67,
34 // which should be stripped out before being passed to the parser.
35
36 static const size_t kSpsBufferMaxSize = 256;
37
38 // Generates a fake SPS with basically everything empty but the width/height.
39 // Pass in a buffer of at least kSpsBufferMaxSize.
40 // The fake SPS that this generates also always has at least one emulation byte
41 // at offset 2, since the first two bytes are always 0, and has a 0x3 as the
42 // level_idc, to make sure the parser doesn't eat all 0x3 bytes.
GenerateFakeSps(uint16_t width,uint16_t height,int id,uint32_t log2_max_frame_num_minus4,uint32_t log2_max_pic_order_cnt_lsb_minus4,rtc::Buffer * out_buffer)43 void GenerateFakeSps(uint16_t width,
44 uint16_t height,
45 int id,
46 uint32_t log2_max_frame_num_minus4,
47 uint32_t log2_max_pic_order_cnt_lsb_minus4,
48 rtc::Buffer* out_buffer) {
49 uint8_t rbsp[kSpsBufferMaxSize] = {0};
50 rtc::BitBufferWriter writer(rbsp, kSpsBufferMaxSize);
51 // Profile byte.
52 writer.WriteUInt8(0);
53 // Constraint sets and reserved zero bits.
54 writer.WriteUInt8(0);
55 // level_idc.
56 writer.WriteUInt8(0x3u);
57 // seq_paramter_set_id.
58 writer.WriteExponentialGolomb(id);
59 // Profile is not special, so we skip all the chroma format settings.
60
61 // Now some bit magic.
62 // log2_max_frame_num_minus4: ue(v).
63 writer.WriteExponentialGolomb(log2_max_frame_num_minus4);
64 // pic_order_cnt_type: ue(v). 0 is the type we want.
65 writer.WriteExponentialGolomb(0);
66 // log2_max_pic_order_cnt_lsb_minus4: ue(v). 0 is fine.
67 writer.WriteExponentialGolomb(log2_max_pic_order_cnt_lsb_minus4);
68 // max_num_ref_frames: ue(v). 0 is fine.
69 writer.WriteExponentialGolomb(0);
70 // gaps_in_frame_num_value_allowed_flag: u(1).
71 writer.WriteBits(0, 1);
72 // Next are width/height. First, calculate the mbs/map_units versions.
73 uint16_t width_in_mbs_minus1 = (width + 15) / 16 - 1;
74
75 // For the height, we're going to define frame_mbs_only_flag, so we need to
76 // divide by 2. See the parser for the full calculation.
77 uint16_t height_in_map_units_minus1 = ((height + 15) / 16 - 1) / 2;
78 // Write each as ue(v).
79 writer.WriteExponentialGolomb(width_in_mbs_minus1);
80 writer.WriteExponentialGolomb(height_in_map_units_minus1);
81 // frame_mbs_only_flag: u(1). Needs to be false.
82 writer.WriteBits(0, 1);
83 // mb_adaptive_frame_field_flag: u(1).
84 writer.WriteBits(0, 1);
85 // direct_8x8_inferene_flag: u(1).
86 writer.WriteBits(0, 1);
87 // frame_cropping_flag: u(1). 1, so we can supply crop.
88 writer.WriteBits(1, 1);
89 // Now we write the left/right/top/bottom crop. For simplicity, we'll put all
90 // the crop at the left/top.
91 // We picked a 4:2:0 format, so the crops are 1/2 the pixel crop values.
92 // Left/right.
93 writer.WriteExponentialGolomb(((16 - (width % 16)) % 16) / 2);
94 writer.WriteExponentialGolomb(0);
95 // Top/bottom.
96 writer.WriteExponentialGolomb(((16 - (height % 16)) % 16) / 2);
97 writer.WriteExponentialGolomb(0);
98
99 // vui_parameters_present_flag: u(1)
100 writer.WriteBits(0, 1);
101
102 // Get the number of bytes written (including the last partial byte).
103 size_t byte_count, bit_offset;
104 writer.GetCurrentOffset(&byte_count, &bit_offset);
105 if (bit_offset > 0) {
106 byte_count++;
107 }
108
109 out_buffer->Clear();
110 H264::WriteRbsp(rbsp, byte_count, out_buffer);
111 }
112
TEST(H264SpsParserTest,TestSampleSPSHdLandscape)113 TEST(H264SpsParserTest, TestSampleSPSHdLandscape) {
114 // SPS for a 1280x720 camera capture from ffmpeg on osx. Contains
115 // emulation bytes but no cropping.
116 const uint8_t buffer[] = {0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, 0x05,
117 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, 0x00,
118 0x00, 0x2A, 0xE0, 0xF1, 0x83, 0x19, 0x60};
119 absl::optional<SpsParser::SpsState> sps =
120 SpsParser::ParseSps(buffer, arraysize(buffer));
121 ASSERT_TRUE(sps.has_value());
122 EXPECT_EQ(1280u, sps->width);
123 EXPECT_EQ(720u, sps->height);
124 }
125
TEST(H264SpsParserTest,TestSampleSPSVgaLandscape)126 TEST(H264SpsParserTest, TestSampleSPSVgaLandscape) {
127 // SPS for a 640x360 camera capture from ffmpeg on osx. Contains emulation
128 // bytes and cropping (360 isn't divisible by 16).
129 const uint8_t buffer[] = {0x7A, 0x00, 0x1E, 0xBC, 0xD9, 0x40, 0xA0, 0x2F,
130 0xF8, 0x98, 0x40, 0x00, 0x00, 0x03, 0x01, 0x80,
131 0x00, 0x00, 0x56, 0x83, 0xC5, 0x8B, 0x65, 0x80};
132 absl::optional<SpsParser::SpsState> sps =
133 SpsParser::ParseSps(buffer, arraysize(buffer));
134 ASSERT_TRUE(sps.has_value());
135 EXPECT_EQ(640u, sps->width);
136 EXPECT_EQ(360u, sps->height);
137 }
138
TEST(H264SpsParserTest,TestSampleSPSWeirdResolution)139 TEST(H264SpsParserTest, TestSampleSPSWeirdResolution) {
140 // SPS for a 200x400 camera capture from ffmpeg on osx. Horizontal and
141 // veritcal crop (neither dimension is divisible by 16).
142 const uint8_t buffer[] = {0x7A, 0x00, 0x0D, 0xBC, 0xD9, 0x43, 0x43, 0x3E,
143 0x5E, 0x10, 0x00, 0x00, 0x03, 0x00, 0x60, 0x00,
144 0x00, 0x15, 0xA0, 0xF1, 0x42, 0x99, 0x60};
145 absl::optional<SpsParser::SpsState> sps =
146 SpsParser::ParseSps(buffer, arraysize(buffer));
147 ASSERT_TRUE(sps.has_value());
148 EXPECT_EQ(200u, sps->width);
149 EXPECT_EQ(400u, sps->height);
150 }
151
TEST(H264SpsParserTest,TestSyntheticSPSQvgaLandscape)152 TEST(H264SpsParserTest, TestSyntheticSPSQvgaLandscape) {
153 rtc::Buffer buffer;
154 GenerateFakeSps(320u, 180u, 1, 0, 0, &buffer);
155 absl::optional<SpsParser::SpsState> sps =
156 SpsParser::ParseSps(buffer.data(), buffer.size());
157 ASSERT_TRUE(sps.has_value());
158 EXPECT_EQ(320u, sps->width);
159 EXPECT_EQ(180u, sps->height);
160 EXPECT_EQ(1u, sps->id);
161 }
162
TEST(H264SpsParserTest,TestSyntheticSPSWeirdResolution)163 TEST(H264SpsParserTest, TestSyntheticSPSWeirdResolution) {
164 rtc::Buffer buffer;
165 GenerateFakeSps(156u, 122u, 2, 0, 0, &buffer);
166 absl::optional<SpsParser::SpsState> sps =
167 SpsParser::ParseSps(buffer.data(), buffer.size());
168 ASSERT_TRUE(sps.has_value());
169 EXPECT_EQ(156u, sps->width);
170 EXPECT_EQ(122u, sps->height);
171 EXPECT_EQ(2u, sps->id);
172 }
173
TEST(H264SpsParserTest,TestSampleSPSWithScalingLists)174 TEST(H264SpsParserTest, TestSampleSPSWithScalingLists) {
175 // SPS from a 1920x1080 video. Contains scaling lists (and vertical cropping).
176 const uint8_t buffer[] = {0x64, 0x00, 0x2a, 0xad, 0x84, 0x01, 0x0c, 0x20,
177 0x08, 0x61, 0x00, 0x43, 0x08, 0x02, 0x18, 0x40,
178 0x10, 0xc2, 0x00, 0x84, 0x3b, 0x50, 0x3c, 0x01,
179 0x13, 0xf2, 0xcd, 0xc0, 0x40, 0x40, 0x50, 0x00,
180 0x00, 0x00, 0x10, 0x00, 0x00, 0x01, 0xe8, 0x40};
181 absl::optional<SpsParser::SpsState> sps =
182 SpsParser::ParseSps(buffer, arraysize(buffer));
183 ASSERT_TRUE(sps.has_value());
184 EXPECT_EQ(1920u, sps->width);
185 EXPECT_EQ(1080u, sps->height);
186 }
187
TEST(H264SpsParserTest,TestLog2MaxFrameNumMinus4)188 TEST(H264SpsParserTest, TestLog2MaxFrameNumMinus4) {
189 rtc::Buffer buffer;
190 GenerateFakeSps(320u, 180u, 1, 0, 0, &buffer);
191 absl::optional<SpsParser::SpsState> sps =
192 SpsParser::ParseSps(buffer.data(), buffer.size());
193 ASSERT_TRUE(sps.has_value());
194 EXPECT_EQ(320u, sps->width);
195 EXPECT_EQ(180u, sps->height);
196 EXPECT_EQ(1u, sps->id);
197 EXPECT_EQ(4u, sps->log2_max_frame_num);
198
199 GenerateFakeSps(320u, 180u, 1, 28, 0, &buffer);
200 sps = SpsParser::ParseSps(buffer.data(), buffer.size());
201 ASSERT_TRUE(sps.has_value());
202 EXPECT_EQ(320u, sps->width);
203 EXPECT_EQ(180u, sps->height);
204 EXPECT_EQ(1u, sps->id);
205 EXPECT_EQ(32u, sps->log2_max_frame_num);
206
207 GenerateFakeSps(320u, 180u, 1, 29, 0, &buffer);
208 EXPECT_FALSE(SpsParser::ParseSps(buffer.data(), buffer.size()));
209 }
210
TEST(H264SpsParserTest,TestLog2MaxPicOrderCntMinus4)211 TEST(H264SpsParserTest, TestLog2MaxPicOrderCntMinus4) {
212 rtc::Buffer buffer;
213 GenerateFakeSps(320u, 180u, 1, 0, 0, &buffer);
214 absl::optional<SpsParser::SpsState> sps =
215 SpsParser::ParseSps(buffer.data(), buffer.size());
216 ASSERT_TRUE(sps.has_value());
217 EXPECT_EQ(320u, sps->width);
218 EXPECT_EQ(180u, sps->height);
219 EXPECT_EQ(1u, sps->id);
220 EXPECT_EQ(4u, sps->log2_max_pic_order_cnt_lsb);
221
222 GenerateFakeSps(320u, 180u, 1, 0, 28, &buffer);
223 EXPECT_TRUE(static_cast<bool>(
224 sps = SpsParser::ParseSps(buffer.data(), buffer.size())));
225 EXPECT_EQ(320u, sps->width);
226 EXPECT_EQ(180u, sps->height);
227 EXPECT_EQ(1u, sps->id);
228 EXPECT_EQ(32u, sps->log2_max_pic_order_cnt_lsb);
229
230 GenerateFakeSps(320u, 180u, 1, 0, 29, &buffer);
231 EXPECT_FALSE(SpsParser::ParseSps(buffer.data(), buffer.size()));
232 }
233
234 } // namespace webrtc
235