1 /*
2 * Copyright (c) 2010 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "media/base/video_adapter.h"
12
13 #include <algorithm>
14 #include <cmath>
15 #include <cstdlib>
16 #include <limits>
17 #include <utility>
18
19 #include "absl/types/optional.h"
20 #include "media/base/video_common.h"
21 #include "rtc_base/checks.h"
22 #include "rtc_base/logging.h"
23 #include "rtc_base/strings/string_builder.h"
24 #include "rtc_base/time_utils.h"
25 #include "system_wrappers/include/field_trial.h"
26
27 namespace {
28
29 struct Fraction {
30 int numerator;
31 int denominator;
32
DivideByGcd__anon4cc909e10111::Fraction33 void DivideByGcd() {
34 int g = cricket::GreatestCommonDivisor(numerator, denominator);
35 numerator /= g;
36 denominator /= g;
37 }
38
39 // Determines number of output pixels if both width and height of an input of
40 // `input_pixels` pixels is scaled with the fraction numerator / denominator.
scale_pixel_count__anon4cc909e10111::Fraction41 int scale_pixel_count(int input_pixels) {
42 return (numerator * numerator * input_pixels) / (denominator * denominator);
43 }
44 };
45
46 // Round `value_to_round` to a multiple of `multiple`. Prefer rounding upwards,
47 // but never more than `max_value`.
roundUp(int value_to_round,int multiple,int max_value)48 int roundUp(int value_to_round, int multiple, int max_value) {
49 const int rounded_value =
50 (value_to_round + multiple - 1) / multiple * multiple;
51 return rounded_value <= max_value ? rounded_value
52 : (max_value / multiple * multiple);
53 }
54
55 // Generates a scale factor that makes `input_pixels` close to `target_pixels`,
56 // but no higher than `max_pixels`.
FindScale(int input_width,int input_height,int target_pixels,int max_pixels,bool variable_start_scale_factor)57 Fraction FindScale(int input_width,
58 int input_height,
59 int target_pixels,
60 int max_pixels,
61 bool variable_start_scale_factor) {
62 // This function only makes sense for a positive target.
63 RTC_DCHECK_GT(target_pixels, 0);
64 RTC_DCHECK_GT(max_pixels, 0);
65 RTC_DCHECK_GE(max_pixels, target_pixels);
66
67 const int input_pixels = input_width * input_height;
68
69 // Don't scale up original.
70 if (target_pixels >= input_pixels)
71 return Fraction{1, 1};
72
73 Fraction current_scale = Fraction{1, 1};
74 Fraction best_scale = Fraction{1, 1};
75
76 if (variable_start_scale_factor) {
77 // Start scaling down by 2/3 depending on `input_width` and `input_height`.
78 if (input_width % 3 == 0 && input_height % 3 == 0) {
79 // 2/3 (then alternates 3/4, 2/3, 3/4,...).
80 current_scale = Fraction{6, 6};
81 }
82 if (input_width % 9 == 0 && input_height % 9 == 0) {
83 // 2/3, 2/3 (then alternates 3/4, 2/3, 3/4,...).
84 current_scale = Fraction{36, 36};
85 }
86 }
87
88 // The minimum (absolute) difference between the number of output pixels and
89 // the target pixel count.
90 int min_pixel_diff = std::numeric_limits<int>::max();
91 if (input_pixels <= max_pixels) {
92 // Start condition for 1/1 case, if it is less than max.
93 min_pixel_diff = std::abs(input_pixels - target_pixels);
94 }
95
96 // Alternately scale down by 3/4 and 2/3. This results in fractions which are
97 // effectively scalable. For instance, starting at 1280x720 will result in
98 // the series (3/4) => 960x540, (1/2) => 640x360, (3/8) => 480x270,
99 // (1/4) => 320x180, (3/16) => 240x125, (1/8) => 160x90.
100 while (current_scale.scale_pixel_count(input_pixels) > target_pixels) {
101 if (current_scale.numerator % 3 == 0 &&
102 current_scale.denominator % 2 == 0) {
103 // Multiply by 2/3.
104 current_scale.numerator /= 3;
105 current_scale.denominator /= 2;
106 } else {
107 // Multiply by 3/4.
108 current_scale.numerator *= 3;
109 current_scale.denominator *= 4;
110 }
111
112 int output_pixels = current_scale.scale_pixel_count(input_pixels);
113 if (output_pixels <= max_pixels) {
114 int diff = std::abs(target_pixels - output_pixels);
115 if (diff < min_pixel_diff) {
116 min_pixel_diff = diff;
117 best_scale = current_scale;
118 }
119 }
120 }
121 best_scale.DivideByGcd();
122
123 return best_scale;
124 }
125
Swap(const absl::optional<std::pair<int,int>> & in)126 absl::optional<std::pair<int, int>> Swap(
127 const absl::optional<std::pair<int, int>>& in) {
128 if (!in) {
129 return absl::nullopt;
130 }
131 return std::make_pair(in->second, in->first);
132 }
133
134 } // namespace
135
136 namespace cricket {
137
VideoAdapter(int source_resolution_alignment)138 VideoAdapter::VideoAdapter(int source_resolution_alignment)
139 : frames_in_(0),
140 frames_out_(0),
141 frames_scaled_(0),
142 adaption_changes_(0),
143 previous_width_(0),
144 previous_height_(0),
145 variable_start_scale_factor_(!webrtc::field_trial::IsDisabled(
146 "WebRTC-Video-VariableStartScaleFactor")),
147 source_resolution_alignment_(source_resolution_alignment),
148 resolution_alignment_(source_resolution_alignment),
149 resolution_request_target_pixel_count_(std::numeric_limits<int>::max()),
150 resolution_request_max_pixel_count_(std::numeric_limits<int>::max()),
151 max_framerate_request_(std::numeric_limits<int>::max()) {}
152
VideoAdapter()153 VideoAdapter::VideoAdapter() : VideoAdapter(1) {}
154
~VideoAdapter()155 VideoAdapter::~VideoAdapter() {}
156
DropFrame(int64_t in_timestamp_ns)157 bool VideoAdapter::DropFrame(int64_t in_timestamp_ns) {
158 int max_fps = max_framerate_request_;
159 if (output_format_request_.max_fps)
160 max_fps = std::min(max_fps, *output_format_request_.max_fps);
161
162 framerate_controller_.SetMaxFramerate(max_fps);
163 return framerate_controller_.ShouldDropFrame(in_timestamp_ns);
164 }
165
AdaptFrameResolution(int in_width,int in_height,int64_t in_timestamp_ns,int * cropped_width,int * cropped_height,int * out_width,int * out_height)166 bool VideoAdapter::AdaptFrameResolution(int in_width,
167 int in_height,
168 int64_t in_timestamp_ns,
169 int* cropped_width,
170 int* cropped_height,
171 int* out_width,
172 int* out_height) {
173 webrtc::MutexLock lock(&mutex_);
174 ++frames_in_;
175
176 // The max output pixel count is the minimum of the requests from
177 // OnOutputFormatRequest and OnResolutionFramerateRequest.
178 int max_pixel_count = resolution_request_max_pixel_count_;
179
180 // Select target aspect ratio and max pixel count depending on input frame
181 // orientation.
182 absl::optional<std::pair<int, int>> target_aspect_ratio;
183 if (in_width > in_height) {
184 target_aspect_ratio = output_format_request_.target_landscape_aspect_ratio;
185 if (output_format_request_.max_landscape_pixel_count)
186 max_pixel_count = std::min(
187 max_pixel_count, *output_format_request_.max_landscape_pixel_count);
188 } else {
189 target_aspect_ratio = output_format_request_.target_portrait_aspect_ratio;
190 if (output_format_request_.max_portrait_pixel_count)
191 max_pixel_count = std::min(
192 max_pixel_count, *output_format_request_.max_portrait_pixel_count);
193 }
194
195 int target_pixel_count =
196 std::min(resolution_request_target_pixel_count_, max_pixel_count);
197
198 // Drop the input frame if necessary.
199 if (max_pixel_count <= 0 || DropFrame(in_timestamp_ns)) {
200 // Show VAdapt log every 90 frames dropped. (3 seconds)
201 if ((frames_in_ - frames_out_) % 90 == 0) {
202 // TODO(fbarchard): Reduce to LS_VERBOSE when adapter info is not needed
203 // in default calls.
204 RTC_LOG(LS_INFO) << "VAdapt Drop Frame: scaled " << frames_scaled_
205 << " / out " << frames_out_ << " / in " << frames_in_
206 << " Changes: " << adaption_changes_
207 << " Input: " << in_width << "x" << in_height
208 << " timestamp: " << in_timestamp_ns
209 << " Output fps: " << max_framerate_request_ << "/"
210 << output_format_request_.max_fps.value_or(-1)
211 << " alignment: " << resolution_alignment_;
212 }
213
214 // Drop frame.
215 return false;
216 }
217
218 // Calculate how the input should be cropped.
219 if (!target_aspect_ratio || target_aspect_ratio->first <= 0 ||
220 target_aspect_ratio->second <= 0) {
221 *cropped_width = in_width;
222 *cropped_height = in_height;
223 } else {
224 const float requested_aspect =
225 target_aspect_ratio->first /
226 static_cast<float>(target_aspect_ratio->second);
227 *cropped_width =
228 std::min(in_width, static_cast<int>(in_height * requested_aspect));
229 *cropped_height =
230 std::min(in_height, static_cast<int>(in_width / requested_aspect));
231 }
232 const Fraction scale =
233 FindScale(*cropped_width, *cropped_height, target_pixel_count,
234 max_pixel_count, variable_start_scale_factor_);
235 // Adjust cropping slightly to get correctly aligned output size and a perfect
236 // scale factor.
237 *cropped_width = roundUp(*cropped_width,
238 scale.denominator * resolution_alignment_, in_width);
239 *cropped_height = roundUp(
240 *cropped_height, scale.denominator * resolution_alignment_, in_height);
241 RTC_DCHECK_EQ(0, *cropped_width % scale.denominator);
242 RTC_DCHECK_EQ(0, *cropped_height % scale.denominator);
243
244 // Calculate final output size.
245 *out_width = *cropped_width / scale.denominator * scale.numerator;
246 *out_height = *cropped_height / scale.denominator * scale.numerator;
247 RTC_DCHECK_EQ(0, *out_width % resolution_alignment_);
248 RTC_DCHECK_EQ(0, *out_height % resolution_alignment_);
249
250 ++frames_out_;
251 if (scale.numerator != scale.denominator)
252 ++frames_scaled_;
253
254 if (previous_width_ &&
255 (previous_width_ != *out_width || previous_height_ != *out_height)) {
256 ++adaption_changes_;
257 RTC_LOG(LS_INFO) << "Frame size changed: scaled " << frames_scaled_
258 << " / out " << frames_out_ << " / in " << frames_in_
259 << " Changes: " << adaption_changes_
260 << " Input: " << in_width << "x" << in_height
261 << " Scale: " << scale.numerator << "/"
262 << scale.denominator << " Output: " << *out_width << "x"
263 << *out_height << " fps: " << max_framerate_request_ << "/"
264 << output_format_request_.max_fps.value_or(-1)
265 << " alignment: " << resolution_alignment_;
266 }
267
268 previous_width_ = *out_width;
269 previous_height_ = *out_height;
270
271 return true;
272 }
273
OnOutputFormatRequest(const absl::optional<VideoFormat> & format)274 void VideoAdapter::OnOutputFormatRequest(
275 const absl::optional<VideoFormat>& format) {
276 absl::optional<std::pair<int, int>> target_aspect_ratio;
277 absl::optional<int> max_pixel_count;
278 absl::optional<int> max_fps;
279 if (format) {
280 target_aspect_ratio = std::make_pair(format->width, format->height);
281 max_pixel_count = format->width * format->height;
282 if (format->interval > 0)
283 max_fps = rtc::kNumNanosecsPerSec / format->interval;
284 }
285 OnOutputFormatRequest(target_aspect_ratio, max_pixel_count, max_fps);
286 }
287
OnOutputFormatRequest(const absl::optional<std::pair<int,int>> & target_aspect_ratio,const absl::optional<int> & max_pixel_count,const absl::optional<int> & max_fps)288 void VideoAdapter::OnOutputFormatRequest(
289 const absl::optional<std::pair<int, int>>& target_aspect_ratio,
290 const absl::optional<int>& max_pixel_count,
291 const absl::optional<int>& max_fps) {
292 absl::optional<std::pair<int, int>> target_landscape_aspect_ratio;
293 absl::optional<std::pair<int, int>> target_portrait_aspect_ratio;
294 if (target_aspect_ratio && target_aspect_ratio->first > 0 &&
295 target_aspect_ratio->second > 0) {
296 // Maintain input orientation.
297 const int max_side =
298 std::max(target_aspect_ratio->first, target_aspect_ratio->second);
299 const int min_side =
300 std::min(target_aspect_ratio->first, target_aspect_ratio->second);
301 target_landscape_aspect_ratio = std::make_pair(max_side, min_side);
302 target_portrait_aspect_ratio = std::make_pair(min_side, max_side);
303 }
304 OnOutputFormatRequest(target_landscape_aspect_ratio, max_pixel_count,
305 target_portrait_aspect_ratio, max_pixel_count, max_fps);
306 }
307
OnOutputFormatRequest(const absl::optional<std::pair<int,int>> & target_landscape_aspect_ratio,const absl::optional<int> & max_landscape_pixel_count,const absl::optional<std::pair<int,int>> & target_portrait_aspect_ratio,const absl::optional<int> & max_portrait_pixel_count,const absl::optional<int> & max_fps)308 void VideoAdapter::OnOutputFormatRequest(
309 const absl::optional<std::pair<int, int>>& target_landscape_aspect_ratio,
310 const absl::optional<int>& max_landscape_pixel_count,
311 const absl::optional<std::pair<int, int>>& target_portrait_aspect_ratio,
312 const absl::optional<int>& max_portrait_pixel_count,
313 const absl::optional<int>& max_fps) {
314 webrtc::MutexLock lock(&mutex_);
315
316 OutputFormatRequest request = {
317 .target_landscape_aspect_ratio = target_landscape_aspect_ratio,
318 .max_landscape_pixel_count = max_landscape_pixel_count,
319 .target_portrait_aspect_ratio = target_portrait_aspect_ratio,
320 .max_portrait_pixel_count = max_portrait_pixel_count,
321 .max_fps = max_fps};
322
323 if (stashed_output_format_request_) {
324 // Save the output format request for later use in case the encoder making
325 // this call would become active, because currently all active encoders use
326 // requested_resolution instead.
327 stashed_output_format_request_ = request;
328 RTC_LOG(LS_INFO) << "Stashing OnOutputFormatRequest: "
329 << stashed_output_format_request_->ToString();
330 } else {
331 output_format_request_ = request;
332 RTC_LOG(LS_INFO) << "Setting output_format_request_: "
333 << output_format_request_.ToString();
334 }
335
336 framerate_controller_.Reset();
337 }
338
OnSinkWants(const rtc::VideoSinkWants & sink_wants)339 void VideoAdapter::OnSinkWants(const rtc::VideoSinkWants& sink_wants) {
340 webrtc::MutexLock lock(&mutex_);
341 resolution_request_max_pixel_count_ = sink_wants.max_pixel_count;
342 resolution_request_target_pixel_count_ =
343 sink_wants.target_pixel_count.value_or(
344 resolution_request_max_pixel_count_);
345 max_framerate_request_ = sink_wants.max_framerate_fps;
346 resolution_alignment_ = cricket::LeastCommonMultiple(
347 source_resolution_alignment_, sink_wants.resolution_alignment);
348
349 if (!sink_wants.aggregates) {
350 RTC_LOG(LS_WARNING)
351 << "These should always be created by VideoBroadcaster!";
352 return;
353 }
354
355 // If requested_resolution is used, and there are no active encoders
356 // that are NOT using requested_resolution (aka newapi), then override
357 // calls to OnOutputFormatRequest and use values from requested_resolution
358 // instead (combined with qualityscaling based on pixel counts above).
359 if (webrtc::field_trial::IsDisabled(
360 "WebRTC-Video-RequestedResolutionOverrideOutputFormatRequest")) {
361 // kill-switch...
362 return;
363 }
364
365 if (!sink_wants.requested_resolution) {
366 if (stashed_output_format_request_) {
367 // because current active_output_format_request is based on
368 // requested_resolution logic, while current encoder(s) doesn't want that,
369 // we have to restore the stashed request.
370 RTC_LOG(LS_INFO) << "Unstashing OnOutputFormatRequest: "
371 << stashed_output_format_request_->ToString();
372 output_format_request_ = *stashed_output_format_request_;
373 stashed_output_format_request_.reset();
374 }
375 return;
376 }
377
378 if (sink_wants.aggregates->any_active_without_requested_resolution) {
379 return;
380 }
381
382 if (!stashed_output_format_request_) {
383 // The active output format request is about to be rewritten by
384 // request_resolution. We need to save it for later use in case the encoder
385 // which doesn't use request_resolution logic become active in the future.
386 stashed_output_format_request_ = output_format_request_;
387 RTC_LOG(LS_INFO) << "Stashing OnOutputFormatRequest: "
388 << stashed_output_format_request_->ToString();
389 }
390
391 auto res = *sink_wants.requested_resolution;
392 auto pixel_count = res.width * res.height;
393 output_format_request_.target_landscape_aspect_ratio =
394 std::make_pair(res.width, res.height);
395 output_format_request_.max_landscape_pixel_count = pixel_count;
396 output_format_request_.target_portrait_aspect_ratio =
397 std::make_pair(res.height, res.width);
398 output_format_request_.max_portrait_pixel_count = pixel_count;
399 output_format_request_.max_fps = max_framerate_request_;
400 RTC_LOG(LS_INFO) << "Setting output_format_request_ based on sink_wants: "
401 << output_format_request_.ToString();
402 }
403
GetTargetPixels() const404 int VideoAdapter::GetTargetPixels() const {
405 webrtc::MutexLock lock(&mutex_);
406 return resolution_request_target_pixel_count_;
407 }
408
GetMaxFramerate() const409 float VideoAdapter::GetMaxFramerate() const {
410 webrtc::MutexLock lock(&mutex_);
411 // Minimum of `output_format_request_.max_fps` and `max_framerate_request_` is
412 // used to throttle frame-rate.
413 int framerate =
414 std::min(max_framerate_request_,
415 output_format_request_.max_fps.value_or(max_framerate_request_));
416 if (framerate == std::numeric_limits<int>::max()) {
417 return std::numeric_limits<float>::infinity();
418 } else {
419 return max_framerate_request_;
420 }
421 }
422
ToString() const423 std::string VideoAdapter::OutputFormatRequest::ToString() const {
424 rtc::StringBuilder oss;
425 oss << "[ ";
426 if (target_landscape_aspect_ratio == Swap(target_portrait_aspect_ratio) &&
427 max_landscape_pixel_count == max_portrait_pixel_count) {
428 if (target_landscape_aspect_ratio) {
429 oss << target_landscape_aspect_ratio->first << "x"
430 << target_landscape_aspect_ratio->second;
431 } else {
432 oss << "unset-resolution";
433 }
434 if (max_landscape_pixel_count) {
435 oss << " max_pixel_count: " << *max_landscape_pixel_count;
436 }
437 } else {
438 oss << "[ landscape: ";
439 if (target_landscape_aspect_ratio) {
440 oss << target_landscape_aspect_ratio->first << "x"
441 << target_landscape_aspect_ratio->second;
442 } else {
443 oss << "unset";
444 }
445 if (max_landscape_pixel_count) {
446 oss << " max_pixel_count: " << *max_landscape_pixel_count;
447 }
448 oss << " ] [ portrait: ";
449 if (target_portrait_aspect_ratio) {
450 oss << target_portrait_aspect_ratio->first << "x"
451 << target_portrait_aspect_ratio->second;
452 }
453 if (max_portrait_pixel_count) {
454 oss << " max_pixel_count: " << *max_portrait_pixel_count;
455 }
456 oss << " ]";
457 }
458 oss << " max_fps: ";
459 if (max_fps) {
460 oss << *max_fps;
461 } else {
462 oss << "unset";
463 }
464 oss << " ]";
465 return oss.Release();
466 }
467
468 } // namespace cricket
469