1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/video_coding/codecs/test/videocodec_test_stats_impl.h"
12
13 #include <algorithm>
14 #include <cmath>
15 #include <iterator>
16 #include <limits>
17 #include <numeric>
18
19 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
20 #include "rtc_base/checks.h"
21 #include "rtc_base/numerics/running_statistics.h"
22 #include "rtc_base/strings/string_builder.h"
23
24 namespace webrtc {
25 namespace test {
26
27 using FrameStatistics = VideoCodecTestStats::FrameStatistics;
28 using VideoStatistics = VideoCodecTestStats::VideoStatistics;
29
30 namespace {
31 const int kMaxBitrateMismatchPercent = 20;
32 }
33
34 VideoCodecTestStatsImpl::VideoCodecTestStatsImpl() = default;
35 VideoCodecTestStatsImpl::~VideoCodecTestStatsImpl() = default;
36
AddFrame(const FrameStatistics & frame_stat)37 void VideoCodecTestStatsImpl::AddFrame(const FrameStatistics& frame_stat) {
38 const size_t timestamp = frame_stat.rtp_timestamp;
39 const size_t layer_idx = frame_stat.spatial_idx;
40 RTC_DCHECK(rtp_timestamp_to_frame_num_[layer_idx].find(timestamp) ==
41 rtp_timestamp_to_frame_num_[layer_idx].end());
42 rtp_timestamp_to_frame_num_[layer_idx][timestamp] = frame_stat.frame_number;
43 layer_stats_[layer_idx].push_back(frame_stat);
44 }
45
GetFrame(size_t frame_num,size_t layer_idx)46 FrameStatistics* VideoCodecTestStatsImpl::GetFrame(size_t frame_num,
47 size_t layer_idx) {
48 RTC_CHECK_LT(frame_num, layer_stats_[layer_idx].size());
49 return &layer_stats_[layer_idx][frame_num];
50 }
51
GetFrameWithTimestamp(size_t timestamp,size_t layer_idx)52 FrameStatistics* VideoCodecTestStatsImpl::GetFrameWithTimestamp(
53 size_t timestamp,
54 size_t layer_idx) {
55 RTC_DCHECK(rtp_timestamp_to_frame_num_[layer_idx].find(timestamp) !=
56 rtp_timestamp_to_frame_num_[layer_idx].end());
57
58 return GetFrame(rtp_timestamp_to_frame_num_[layer_idx][timestamp], layer_idx);
59 }
60
GetFrameStatistics()61 std::vector<FrameStatistics> VideoCodecTestStatsImpl::GetFrameStatistics() {
62 size_t capacity = 0;
63 for (const auto& layer_stat : layer_stats_) {
64 capacity += layer_stat.second.size();
65 }
66
67 std::vector<FrameStatistics> frame_statistics;
68 frame_statistics.reserve(capacity);
69 for (const auto& layer_stat : layer_stats_) {
70 std::copy(layer_stat.second.cbegin(), layer_stat.second.cend(),
71 std::back_inserter(frame_statistics));
72 }
73
74 return frame_statistics;
75 }
76
77 std::vector<VideoStatistics>
SliceAndCalcLayerVideoStatistic(size_t first_frame_num,size_t last_frame_num)78 VideoCodecTestStatsImpl::SliceAndCalcLayerVideoStatistic(
79 size_t first_frame_num,
80 size_t last_frame_num) {
81 std::vector<VideoStatistics> layer_stats;
82
83 size_t num_spatial_layers = 0;
84 size_t num_temporal_layers = 0;
85 GetNumberOfEncodedLayers(first_frame_num, last_frame_num, &num_spatial_layers,
86 &num_temporal_layers);
87 RTC_CHECK_GT(num_spatial_layers, 0);
88 RTC_CHECK_GT(num_temporal_layers, 0);
89
90 for (size_t spatial_idx = 0; spatial_idx < num_spatial_layers;
91 ++spatial_idx) {
92 for (size_t temporal_idx = 0; temporal_idx < num_temporal_layers;
93 ++temporal_idx) {
94 VideoStatistics layer_stat = SliceAndCalcVideoStatistic(
95 first_frame_num, last_frame_num, spatial_idx, temporal_idx, false);
96 layer_stats.push_back(layer_stat);
97 }
98 }
99
100 return layer_stats;
101 }
102
SliceAndCalcAggregatedVideoStatistic(size_t first_frame_num,size_t last_frame_num)103 VideoStatistics VideoCodecTestStatsImpl::SliceAndCalcAggregatedVideoStatistic(
104 size_t first_frame_num,
105 size_t last_frame_num) {
106 size_t num_spatial_layers = 0;
107 size_t num_temporal_layers = 0;
108 GetNumberOfEncodedLayers(first_frame_num, last_frame_num, &num_spatial_layers,
109 &num_temporal_layers);
110 RTC_CHECK_GT(num_spatial_layers, 0);
111 RTC_CHECK_GT(num_temporal_layers, 0);
112
113 return SliceAndCalcVideoStatistic(first_frame_num, last_frame_num,
114 num_spatial_layers - 1,
115 num_temporal_layers - 1, true);
116 }
117
Size(size_t spatial_idx)118 size_t VideoCodecTestStatsImpl::Size(size_t spatial_idx) {
119 return layer_stats_[spatial_idx].size();
120 }
121
Clear()122 void VideoCodecTestStatsImpl::Clear() {
123 layer_stats_.clear();
124 rtp_timestamp_to_frame_num_.clear();
125 }
126
AggregateFrameStatistic(size_t frame_num,size_t spatial_idx,bool aggregate_independent_layers)127 FrameStatistics VideoCodecTestStatsImpl::AggregateFrameStatistic(
128 size_t frame_num,
129 size_t spatial_idx,
130 bool aggregate_independent_layers) {
131 FrameStatistics frame_stat = *GetFrame(frame_num, spatial_idx);
132 bool inter_layer_predicted = frame_stat.inter_layer_predicted;
133 while (spatial_idx-- > 0) {
134 if (aggregate_independent_layers || inter_layer_predicted) {
135 FrameStatistics* base_frame_stat = GetFrame(frame_num, spatial_idx);
136 frame_stat.length_bytes += base_frame_stat->length_bytes;
137 frame_stat.target_bitrate_kbps += base_frame_stat->target_bitrate_kbps;
138
139 inter_layer_predicted = base_frame_stat->inter_layer_predicted;
140 }
141 }
142
143 return frame_stat;
144 }
145
CalcLayerTargetBitrateKbps(size_t first_frame_num,size_t last_frame_num,size_t spatial_idx,size_t temporal_idx,bool aggregate_independent_layers)146 size_t VideoCodecTestStatsImpl::CalcLayerTargetBitrateKbps(
147 size_t first_frame_num,
148 size_t last_frame_num,
149 size_t spatial_idx,
150 size_t temporal_idx,
151 bool aggregate_independent_layers) {
152 size_t target_bitrate_kbps = 0;
153
154 // We don't know if superframe includes all required spatial layers because
155 // of possible frame drops. Run through all frames in specified range, find
156 // and return maximum target bitrate. Assume that target bitrate in frame
157 // statistic is specified per temporal layer.
158 for (size_t frame_num = first_frame_num; frame_num <= last_frame_num;
159 ++frame_num) {
160 FrameStatistics superframe = AggregateFrameStatistic(
161 frame_num, spatial_idx, aggregate_independent_layers);
162
163 if (superframe.temporal_idx <= temporal_idx) {
164 target_bitrate_kbps =
165 std::max(target_bitrate_kbps, superframe.target_bitrate_kbps);
166 }
167 }
168
169 RTC_DCHECK_GT(target_bitrate_kbps, 0);
170 return target_bitrate_kbps;
171 }
172
SliceAndCalcVideoStatistic(size_t first_frame_num,size_t last_frame_num,size_t spatial_idx,size_t temporal_idx,bool aggregate_independent_layers)173 VideoStatistics VideoCodecTestStatsImpl::SliceAndCalcVideoStatistic(
174 size_t first_frame_num,
175 size_t last_frame_num,
176 size_t spatial_idx,
177 size_t temporal_idx,
178 bool aggregate_independent_layers) {
179 VideoStatistics video_stat;
180
181 float buffer_level_bits = 0.0f;
182 webrtc_impl::RunningStatistics<float> buffer_level_sec;
183
184 webrtc_impl::RunningStatistics<size_t> key_frame_size_bytes;
185 webrtc_impl::RunningStatistics<size_t> delta_frame_size_bytes;
186
187 webrtc_impl::RunningStatistics<size_t> frame_encoding_time_us;
188 webrtc_impl::RunningStatistics<size_t> frame_decoding_time_us;
189
190 webrtc_impl::RunningStatistics<float> psnr_y;
191 webrtc_impl::RunningStatistics<float> psnr_u;
192 webrtc_impl::RunningStatistics<float> psnr_v;
193 webrtc_impl::RunningStatistics<float> psnr;
194 webrtc_impl::RunningStatistics<float> ssim;
195 webrtc_impl::RunningStatistics<int> qp;
196
197 size_t rtp_timestamp_first_frame = 0;
198 size_t rtp_timestamp_prev_frame = 0;
199
200 FrameStatistics last_successfully_decoded_frame(0, 0, 0);
201
202 const size_t target_bitrate_kbps =
203 CalcLayerTargetBitrateKbps(first_frame_num, last_frame_num, spatial_idx,
204 temporal_idx, aggregate_independent_layers);
205 const size_t target_bitrate_bps = 1000 * target_bitrate_kbps;
206 RTC_CHECK_GT(target_bitrate_kbps, 0); // We divide by `target_bitrate_kbps`.
207
208 for (size_t frame_num = first_frame_num; frame_num <= last_frame_num;
209 ++frame_num) {
210 FrameStatistics frame_stat = AggregateFrameStatistic(
211 frame_num, spatial_idx, aggregate_independent_layers);
212
213 float time_since_first_frame_sec =
214 1.0f * (frame_stat.rtp_timestamp - rtp_timestamp_first_frame) /
215 kVideoPayloadTypeFrequency;
216 float time_since_prev_frame_sec =
217 1.0f * (frame_stat.rtp_timestamp - rtp_timestamp_prev_frame) /
218 kVideoPayloadTypeFrequency;
219
220 if (frame_stat.temporal_idx > temporal_idx) {
221 continue;
222 }
223
224 buffer_level_bits -= time_since_prev_frame_sec * 1000 * target_bitrate_kbps;
225 buffer_level_bits = std::max(0.0f, buffer_level_bits);
226 buffer_level_bits += 8.0 * frame_stat.length_bytes;
227 buffer_level_sec.AddSample(buffer_level_bits /
228 (1000 * target_bitrate_kbps));
229
230 video_stat.length_bytes += frame_stat.length_bytes;
231
232 if (frame_stat.encoding_successful) {
233 ++video_stat.num_encoded_frames;
234
235 if (frame_stat.frame_type == VideoFrameType::kVideoFrameKey) {
236 key_frame_size_bytes.AddSample(frame_stat.length_bytes);
237 ++video_stat.num_key_frames;
238 } else {
239 delta_frame_size_bytes.AddSample(frame_stat.length_bytes);
240 }
241
242 frame_encoding_time_us.AddSample(frame_stat.encode_time_us);
243 qp.AddSample(frame_stat.qp);
244
245 video_stat.max_nalu_size_bytes = std::max(video_stat.max_nalu_size_bytes,
246 frame_stat.max_nalu_size_bytes);
247 }
248
249 if (frame_stat.decoding_successful) {
250 ++video_stat.num_decoded_frames;
251
252 video_stat.width = std::max(video_stat.width, frame_stat.decoded_width);
253 video_stat.height =
254 std::max(video_stat.height, frame_stat.decoded_height);
255
256 if (video_stat.num_decoded_frames > 1) {
257 if (last_successfully_decoded_frame.decoded_width !=
258 frame_stat.decoded_width ||
259 last_successfully_decoded_frame.decoded_height !=
260 frame_stat.decoded_height) {
261 ++video_stat.num_spatial_resizes;
262 }
263 }
264
265 frame_decoding_time_us.AddSample(frame_stat.decode_time_us);
266 last_successfully_decoded_frame = frame_stat;
267 }
268
269 if (frame_stat.quality_analysis_successful) {
270 psnr_y.AddSample(frame_stat.psnr_y);
271 psnr_u.AddSample(frame_stat.psnr_u);
272 psnr_v.AddSample(frame_stat.psnr_v);
273 psnr.AddSample(frame_stat.psnr);
274 ssim.AddSample(frame_stat.ssim);
275 }
276
277 if (video_stat.num_input_frames > 0) {
278 if (video_stat.time_to_reach_target_bitrate_sec == 0.0f) {
279 RTC_CHECK_GT(time_since_first_frame_sec, 0);
280 const float curr_kbps =
281 8.0 * video_stat.length_bytes / 1000 / time_since_first_frame_sec;
282 const float bitrate_mismatch_percent =
283 100 * std::fabs(curr_kbps - target_bitrate_kbps) /
284 target_bitrate_kbps;
285 if (bitrate_mismatch_percent < kMaxBitrateMismatchPercent) {
286 video_stat.time_to_reach_target_bitrate_sec =
287 time_since_first_frame_sec;
288 }
289 }
290 }
291
292 rtp_timestamp_prev_frame = frame_stat.rtp_timestamp;
293 if (video_stat.num_input_frames == 0) {
294 rtp_timestamp_first_frame = frame_stat.rtp_timestamp;
295 }
296
297 ++video_stat.num_input_frames;
298 }
299
300 const size_t num_frames = last_frame_num - first_frame_num + 1;
301 const size_t timestamp_delta =
302 GetFrame(first_frame_num + 1, spatial_idx)->rtp_timestamp -
303 GetFrame(first_frame_num, spatial_idx)->rtp_timestamp;
304 RTC_CHECK_GT(timestamp_delta, 0);
305 const float input_framerate_fps =
306 1.0 * kVideoPayloadTypeFrequency / timestamp_delta;
307 RTC_CHECK_GT(input_framerate_fps, 0);
308 const float duration_sec = num_frames / input_framerate_fps;
309
310 video_stat.target_bitrate_kbps = target_bitrate_kbps;
311 video_stat.input_framerate_fps = input_framerate_fps;
312
313 video_stat.spatial_idx = spatial_idx;
314 video_stat.temporal_idx = temporal_idx;
315
316 RTC_CHECK_GT(duration_sec, 0);
317 const float bitrate_bps = 8 * video_stat.length_bytes / duration_sec;
318 video_stat.bitrate_kbps = static_cast<size_t>((bitrate_bps + 500) / 1000);
319 video_stat.framerate_fps = video_stat.num_encoded_frames / duration_sec;
320
321 // http://bugs.webrtc.org/10400: On Windows, we only get millisecond
322 // granularity in the frame encode/decode timing measurements.
323 // So we need to softly avoid a div-by-zero here.
324 const float mean_encode_time_us =
325 frame_encoding_time_us.GetMean().value_or(0);
326 video_stat.enc_speed_fps = mean_encode_time_us > 0.0f
327 ? 1000000.0f / mean_encode_time_us
328 : std::numeric_limits<float>::max();
329 const float mean_decode_time_us =
330 frame_decoding_time_us.GetMean().value_or(0);
331 video_stat.dec_speed_fps = mean_decode_time_us > 0.0f
332 ? 1000000.0f / mean_decode_time_us
333 : std::numeric_limits<float>::max();
334
335 video_stat.avg_encode_latency_sec =
336 frame_encoding_time_us.GetMean().value_or(0) / 1000000.0f;
337 video_stat.max_encode_latency_sec =
338 frame_encoding_time_us.GetMax().value_or(0) / 1000000.0f;
339
340 video_stat.avg_decode_latency_sec =
341 frame_decoding_time_us.GetMean().value_or(0) / 1000000.0f;
342 video_stat.max_decode_latency_sec =
343 frame_decoding_time_us.GetMax().value_or(0) / 1000000.0f;
344
345 auto MaxDelaySec = [target_bitrate_kbps](
346 const webrtc_impl::RunningStatistics<size_t>& stats) {
347 return 8 * stats.GetMax().value_or(0) / 1000 / target_bitrate_kbps;
348 };
349
350 video_stat.avg_delay_sec = buffer_level_sec.GetMean().value_or(0);
351 video_stat.max_key_frame_delay_sec = MaxDelaySec(key_frame_size_bytes);
352 video_stat.max_delta_frame_delay_sec = MaxDelaySec(delta_frame_size_bytes);
353
354 video_stat.avg_bitrate_mismatch_pct =
355 100 * (bitrate_bps - target_bitrate_bps) / target_bitrate_bps;
356 video_stat.avg_framerate_mismatch_pct =
357 100 * (video_stat.framerate_fps - input_framerate_fps) /
358 input_framerate_fps;
359
360 video_stat.avg_key_frame_size_bytes =
361 key_frame_size_bytes.GetMean().value_or(0);
362 video_stat.avg_delta_frame_size_bytes =
363 delta_frame_size_bytes.GetMean().value_or(0);
364 video_stat.avg_qp = qp.GetMean().value_or(0);
365
366 video_stat.avg_psnr_y = psnr_y.GetMean().value_or(0);
367 video_stat.avg_psnr_u = psnr_u.GetMean().value_or(0);
368 video_stat.avg_psnr_v = psnr_v.GetMean().value_or(0);
369 video_stat.avg_psnr = psnr.GetMean().value_or(0);
370 video_stat.min_psnr =
371 psnr.GetMin().value_or(std::numeric_limits<float>::max());
372 video_stat.avg_ssim = ssim.GetMean().value_or(0);
373 video_stat.min_ssim =
374 ssim.GetMin().value_or(std::numeric_limits<float>::max());
375
376 return video_stat;
377 }
378
GetNumberOfEncodedLayers(size_t first_frame_num,size_t last_frame_num,size_t * num_encoded_spatial_layers,size_t * num_encoded_temporal_layers)379 void VideoCodecTestStatsImpl::GetNumberOfEncodedLayers(
380 size_t first_frame_num,
381 size_t last_frame_num,
382 size_t* num_encoded_spatial_layers,
383 size_t* num_encoded_temporal_layers) {
384 *num_encoded_spatial_layers = 0;
385 *num_encoded_temporal_layers = 0;
386
387 const size_t num_spatial_layers = layer_stats_.size();
388
389 for (size_t frame_num = first_frame_num; frame_num <= last_frame_num;
390 ++frame_num) {
391 for (size_t spatial_idx = 0; spatial_idx < num_spatial_layers;
392 ++spatial_idx) {
393 FrameStatistics* frame_stat = GetFrame(frame_num, spatial_idx);
394 if (frame_stat->encoding_successful) {
395 *num_encoded_spatial_layers =
396 std::max(*num_encoded_spatial_layers, frame_stat->spatial_idx + 1);
397 *num_encoded_temporal_layers = std::max(*num_encoded_temporal_layers,
398 frame_stat->temporal_idx + 1);
399 }
400 }
401 }
402 }
403
404 } // namespace test
405 } // namespace webrtc
406