xref: /aosp_15_r20/external/webrtc/media/base/video_adapter.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2010 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "media/base/video_adapter.h"
12 
13 #include <algorithm>
14 #include <cmath>
15 #include <cstdlib>
16 #include <limits>
17 #include <utility>
18 
19 #include "absl/types/optional.h"
20 #include "media/base/video_common.h"
21 #include "rtc_base/checks.h"
22 #include "rtc_base/logging.h"
23 #include "rtc_base/strings/string_builder.h"
24 #include "rtc_base/time_utils.h"
25 #include "system_wrappers/include/field_trial.h"
26 
27 namespace {
28 
29 struct Fraction {
30   int numerator;
31   int denominator;
32 
DivideByGcd__anon4cc909e10111::Fraction33   void DivideByGcd() {
34     int g = cricket::GreatestCommonDivisor(numerator, denominator);
35     numerator /= g;
36     denominator /= g;
37   }
38 
39   // Determines number of output pixels if both width and height of an input of
40   // `input_pixels` pixels is scaled with the fraction numerator / denominator.
scale_pixel_count__anon4cc909e10111::Fraction41   int scale_pixel_count(int input_pixels) {
42     return (numerator * numerator * input_pixels) / (denominator * denominator);
43   }
44 };
45 
46 // Round `value_to_round` to a multiple of `multiple`. Prefer rounding upwards,
47 // but never more than `max_value`.
roundUp(int value_to_round,int multiple,int max_value)48 int roundUp(int value_to_round, int multiple, int max_value) {
49   const int rounded_value =
50       (value_to_round + multiple - 1) / multiple * multiple;
51   return rounded_value <= max_value ? rounded_value
52                                     : (max_value / multiple * multiple);
53 }
54 
55 // Generates a scale factor that makes `input_pixels` close to `target_pixels`,
56 // but no higher than `max_pixels`.
FindScale(int input_width,int input_height,int target_pixels,int max_pixels,bool variable_start_scale_factor)57 Fraction FindScale(int input_width,
58                    int input_height,
59                    int target_pixels,
60                    int max_pixels,
61                    bool variable_start_scale_factor) {
62   // This function only makes sense for a positive target.
63   RTC_DCHECK_GT(target_pixels, 0);
64   RTC_DCHECK_GT(max_pixels, 0);
65   RTC_DCHECK_GE(max_pixels, target_pixels);
66 
67   const int input_pixels = input_width * input_height;
68 
69   // Don't scale up original.
70   if (target_pixels >= input_pixels)
71     return Fraction{1, 1};
72 
73   Fraction current_scale = Fraction{1, 1};
74   Fraction best_scale = Fraction{1, 1};
75 
76   if (variable_start_scale_factor) {
77     // Start scaling down by 2/3 depending on `input_width` and `input_height`.
78     if (input_width % 3 == 0 && input_height % 3 == 0) {
79       // 2/3 (then alternates 3/4, 2/3, 3/4,...).
80       current_scale = Fraction{6, 6};
81     }
82     if (input_width % 9 == 0 && input_height % 9 == 0) {
83       // 2/3, 2/3 (then alternates 3/4, 2/3, 3/4,...).
84       current_scale = Fraction{36, 36};
85     }
86   }
87 
88   // The minimum (absolute) difference between the number of output pixels and
89   // the target pixel count.
90   int min_pixel_diff = std::numeric_limits<int>::max();
91   if (input_pixels <= max_pixels) {
92     // Start condition for 1/1 case, if it is less than max.
93     min_pixel_diff = std::abs(input_pixels - target_pixels);
94   }
95 
96   // Alternately scale down by 3/4 and 2/3. This results in fractions which are
97   // effectively scalable. For instance, starting at 1280x720 will result in
98   // the series (3/4) => 960x540, (1/2) => 640x360, (3/8) => 480x270,
99   // (1/4) => 320x180, (3/16) => 240x125, (1/8) => 160x90.
100   while (current_scale.scale_pixel_count(input_pixels) > target_pixels) {
101     if (current_scale.numerator % 3 == 0 &&
102         current_scale.denominator % 2 == 0) {
103       // Multiply by 2/3.
104       current_scale.numerator /= 3;
105       current_scale.denominator /= 2;
106     } else {
107       // Multiply by 3/4.
108       current_scale.numerator *= 3;
109       current_scale.denominator *= 4;
110     }
111 
112     int output_pixels = current_scale.scale_pixel_count(input_pixels);
113     if (output_pixels <= max_pixels) {
114       int diff = std::abs(target_pixels - output_pixels);
115       if (diff < min_pixel_diff) {
116         min_pixel_diff = diff;
117         best_scale = current_scale;
118       }
119     }
120   }
121   best_scale.DivideByGcd();
122 
123   return best_scale;
124 }
125 
Swap(const absl::optional<std::pair<int,int>> & in)126 absl::optional<std::pair<int, int>> Swap(
127     const absl::optional<std::pair<int, int>>& in) {
128   if (!in) {
129     return absl::nullopt;
130   }
131   return std::make_pair(in->second, in->first);
132 }
133 
134 }  // namespace
135 
136 namespace cricket {
137 
VideoAdapter(int source_resolution_alignment)138 VideoAdapter::VideoAdapter(int source_resolution_alignment)
139     : frames_in_(0),
140       frames_out_(0),
141       frames_scaled_(0),
142       adaption_changes_(0),
143       previous_width_(0),
144       previous_height_(0),
145       variable_start_scale_factor_(!webrtc::field_trial::IsDisabled(
146           "WebRTC-Video-VariableStartScaleFactor")),
147       source_resolution_alignment_(source_resolution_alignment),
148       resolution_alignment_(source_resolution_alignment),
149       resolution_request_target_pixel_count_(std::numeric_limits<int>::max()),
150       resolution_request_max_pixel_count_(std::numeric_limits<int>::max()),
151       max_framerate_request_(std::numeric_limits<int>::max()) {}
152 
VideoAdapter()153 VideoAdapter::VideoAdapter() : VideoAdapter(1) {}
154 
~VideoAdapter()155 VideoAdapter::~VideoAdapter() {}
156 
DropFrame(int64_t in_timestamp_ns)157 bool VideoAdapter::DropFrame(int64_t in_timestamp_ns) {
158   int max_fps = max_framerate_request_;
159   if (output_format_request_.max_fps)
160     max_fps = std::min(max_fps, *output_format_request_.max_fps);
161 
162   framerate_controller_.SetMaxFramerate(max_fps);
163   return framerate_controller_.ShouldDropFrame(in_timestamp_ns);
164 }
165 
AdaptFrameResolution(int in_width,int in_height,int64_t in_timestamp_ns,int * cropped_width,int * cropped_height,int * out_width,int * out_height)166 bool VideoAdapter::AdaptFrameResolution(int in_width,
167                                         int in_height,
168                                         int64_t in_timestamp_ns,
169                                         int* cropped_width,
170                                         int* cropped_height,
171                                         int* out_width,
172                                         int* out_height) {
173   webrtc::MutexLock lock(&mutex_);
174   ++frames_in_;
175 
176   // The max output pixel count is the minimum of the requests from
177   // OnOutputFormatRequest and OnResolutionFramerateRequest.
178   int max_pixel_count = resolution_request_max_pixel_count_;
179 
180   // Select target aspect ratio and max pixel count depending on input frame
181   // orientation.
182   absl::optional<std::pair<int, int>> target_aspect_ratio;
183   if (in_width > in_height) {
184     target_aspect_ratio = output_format_request_.target_landscape_aspect_ratio;
185     if (output_format_request_.max_landscape_pixel_count)
186       max_pixel_count = std::min(
187           max_pixel_count, *output_format_request_.max_landscape_pixel_count);
188   } else {
189     target_aspect_ratio = output_format_request_.target_portrait_aspect_ratio;
190     if (output_format_request_.max_portrait_pixel_count)
191       max_pixel_count = std::min(
192           max_pixel_count, *output_format_request_.max_portrait_pixel_count);
193   }
194 
195   int target_pixel_count =
196       std::min(resolution_request_target_pixel_count_, max_pixel_count);
197 
198   // Drop the input frame if necessary.
199   if (max_pixel_count <= 0 || DropFrame(in_timestamp_ns)) {
200     // Show VAdapt log every 90 frames dropped. (3 seconds)
201     if ((frames_in_ - frames_out_) % 90 == 0) {
202       // TODO(fbarchard): Reduce to LS_VERBOSE when adapter info is not needed
203       // in default calls.
204       RTC_LOG(LS_INFO) << "VAdapt Drop Frame: scaled " << frames_scaled_
205                        << " / out " << frames_out_ << " / in " << frames_in_
206                        << " Changes: " << adaption_changes_
207                        << " Input: " << in_width << "x" << in_height
208                        << " timestamp: " << in_timestamp_ns
209                        << " Output fps: " << max_framerate_request_ << "/"
210                        << output_format_request_.max_fps.value_or(-1)
211                        << " alignment: " << resolution_alignment_;
212     }
213 
214     // Drop frame.
215     return false;
216   }
217 
218   // Calculate how the input should be cropped.
219   if (!target_aspect_ratio || target_aspect_ratio->first <= 0 ||
220       target_aspect_ratio->second <= 0) {
221     *cropped_width = in_width;
222     *cropped_height = in_height;
223   } else {
224     const float requested_aspect =
225         target_aspect_ratio->first /
226         static_cast<float>(target_aspect_ratio->second);
227     *cropped_width =
228         std::min(in_width, static_cast<int>(in_height * requested_aspect));
229     *cropped_height =
230         std::min(in_height, static_cast<int>(in_width / requested_aspect));
231   }
232   const Fraction scale =
233       FindScale(*cropped_width, *cropped_height, target_pixel_count,
234                 max_pixel_count, variable_start_scale_factor_);
235   // Adjust cropping slightly to get correctly aligned output size and a perfect
236   // scale factor.
237   *cropped_width = roundUp(*cropped_width,
238                            scale.denominator * resolution_alignment_, in_width);
239   *cropped_height = roundUp(
240       *cropped_height, scale.denominator * resolution_alignment_, in_height);
241   RTC_DCHECK_EQ(0, *cropped_width % scale.denominator);
242   RTC_DCHECK_EQ(0, *cropped_height % scale.denominator);
243 
244   // Calculate final output size.
245   *out_width = *cropped_width / scale.denominator * scale.numerator;
246   *out_height = *cropped_height / scale.denominator * scale.numerator;
247   RTC_DCHECK_EQ(0, *out_width % resolution_alignment_);
248   RTC_DCHECK_EQ(0, *out_height % resolution_alignment_);
249 
250   ++frames_out_;
251   if (scale.numerator != scale.denominator)
252     ++frames_scaled_;
253 
254   if (previous_width_ &&
255       (previous_width_ != *out_width || previous_height_ != *out_height)) {
256     ++adaption_changes_;
257     RTC_LOG(LS_INFO) << "Frame size changed: scaled " << frames_scaled_
258                      << " / out " << frames_out_ << " / in " << frames_in_
259                      << " Changes: " << adaption_changes_
260                      << " Input: " << in_width << "x" << in_height
261                      << " Scale: " << scale.numerator << "/"
262                      << scale.denominator << " Output: " << *out_width << "x"
263                      << *out_height << " fps: " << max_framerate_request_ << "/"
264                      << output_format_request_.max_fps.value_or(-1)
265                      << " alignment: " << resolution_alignment_;
266   }
267 
268   previous_width_ = *out_width;
269   previous_height_ = *out_height;
270 
271   return true;
272 }
273 
OnOutputFormatRequest(const absl::optional<VideoFormat> & format)274 void VideoAdapter::OnOutputFormatRequest(
275     const absl::optional<VideoFormat>& format) {
276   absl::optional<std::pair<int, int>> target_aspect_ratio;
277   absl::optional<int> max_pixel_count;
278   absl::optional<int> max_fps;
279   if (format) {
280     target_aspect_ratio = std::make_pair(format->width, format->height);
281     max_pixel_count = format->width * format->height;
282     if (format->interval > 0)
283       max_fps = rtc::kNumNanosecsPerSec / format->interval;
284   }
285   OnOutputFormatRequest(target_aspect_ratio, max_pixel_count, max_fps);
286 }
287 
OnOutputFormatRequest(const absl::optional<std::pair<int,int>> & target_aspect_ratio,const absl::optional<int> & max_pixel_count,const absl::optional<int> & max_fps)288 void VideoAdapter::OnOutputFormatRequest(
289     const absl::optional<std::pair<int, int>>& target_aspect_ratio,
290     const absl::optional<int>& max_pixel_count,
291     const absl::optional<int>& max_fps) {
292   absl::optional<std::pair<int, int>> target_landscape_aspect_ratio;
293   absl::optional<std::pair<int, int>> target_portrait_aspect_ratio;
294   if (target_aspect_ratio && target_aspect_ratio->first > 0 &&
295       target_aspect_ratio->second > 0) {
296     // Maintain input orientation.
297     const int max_side =
298         std::max(target_aspect_ratio->first, target_aspect_ratio->second);
299     const int min_side =
300         std::min(target_aspect_ratio->first, target_aspect_ratio->second);
301     target_landscape_aspect_ratio = std::make_pair(max_side, min_side);
302     target_portrait_aspect_ratio = std::make_pair(min_side, max_side);
303   }
304   OnOutputFormatRequest(target_landscape_aspect_ratio, max_pixel_count,
305                         target_portrait_aspect_ratio, max_pixel_count, max_fps);
306 }
307 
OnOutputFormatRequest(const absl::optional<std::pair<int,int>> & target_landscape_aspect_ratio,const absl::optional<int> & max_landscape_pixel_count,const absl::optional<std::pair<int,int>> & target_portrait_aspect_ratio,const absl::optional<int> & max_portrait_pixel_count,const absl::optional<int> & max_fps)308 void VideoAdapter::OnOutputFormatRequest(
309     const absl::optional<std::pair<int, int>>& target_landscape_aspect_ratio,
310     const absl::optional<int>& max_landscape_pixel_count,
311     const absl::optional<std::pair<int, int>>& target_portrait_aspect_ratio,
312     const absl::optional<int>& max_portrait_pixel_count,
313     const absl::optional<int>& max_fps) {
314   webrtc::MutexLock lock(&mutex_);
315 
316   OutputFormatRequest request = {
317       .target_landscape_aspect_ratio = target_landscape_aspect_ratio,
318       .max_landscape_pixel_count = max_landscape_pixel_count,
319       .target_portrait_aspect_ratio = target_portrait_aspect_ratio,
320       .max_portrait_pixel_count = max_portrait_pixel_count,
321       .max_fps = max_fps};
322 
323   if (stashed_output_format_request_) {
324     // Save the output format request for later use in case the encoder making
325     // this call would become active, because currently all active encoders use
326     // requested_resolution instead.
327     stashed_output_format_request_ = request;
328     RTC_LOG(LS_INFO) << "Stashing OnOutputFormatRequest: "
329                      << stashed_output_format_request_->ToString();
330   } else {
331     output_format_request_ = request;
332     RTC_LOG(LS_INFO) << "Setting output_format_request_: "
333                      << output_format_request_.ToString();
334   }
335 
336   framerate_controller_.Reset();
337 }
338 
OnSinkWants(const rtc::VideoSinkWants & sink_wants)339 void VideoAdapter::OnSinkWants(const rtc::VideoSinkWants& sink_wants) {
340   webrtc::MutexLock lock(&mutex_);
341   resolution_request_max_pixel_count_ = sink_wants.max_pixel_count;
342   resolution_request_target_pixel_count_ =
343       sink_wants.target_pixel_count.value_or(
344           resolution_request_max_pixel_count_);
345   max_framerate_request_ = sink_wants.max_framerate_fps;
346   resolution_alignment_ = cricket::LeastCommonMultiple(
347       source_resolution_alignment_, sink_wants.resolution_alignment);
348 
349   if (!sink_wants.aggregates) {
350     RTC_LOG(LS_WARNING)
351         << "These should always be created by VideoBroadcaster!";
352     return;
353   }
354 
355   // If requested_resolution is used, and there are no active encoders
356   // that are NOT using requested_resolution (aka newapi), then override
357   // calls to OnOutputFormatRequest and use values from requested_resolution
358   // instead (combined with qualityscaling based on pixel counts above).
359   if (webrtc::field_trial::IsDisabled(
360           "WebRTC-Video-RequestedResolutionOverrideOutputFormatRequest")) {
361     // kill-switch...
362     return;
363   }
364 
365   if (!sink_wants.requested_resolution) {
366     if (stashed_output_format_request_) {
367       // because current active_output_format_request is based on
368       // requested_resolution logic, while current encoder(s) doesn't want that,
369       // we have to restore the stashed request.
370       RTC_LOG(LS_INFO) << "Unstashing OnOutputFormatRequest: "
371                        << stashed_output_format_request_->ToString();
372       output_format_request_ = *stashed_output_format_request_;
373       stashed_output_format_request_.reset();
374     }
375     return;
376   }
377 
378   if (sink_wants.aggregates->any_active_without_requested_resolution) {
379     return;
380   }
381 
382   if (!stashed_output_format_request_) {
383     // The active output format request is about to be rewritten by
384     // request_resolution. We need to save it for later use in case the encoder
385     // which doesn't use request_resolution logic become active in the future.
386     stashed_output_format_request_ = output_format_request_;
387     RTC_LOG(LS_INFO) << "Stashing OnOutputFormatRequest: "
388                      << stashed_output_format_request_->ToString();
389   }
390 
391   auto res = *sink_wants.requested_resolution;
392   auto pixel_count = res.width * res.height;
393   output_format_request_.target_landscape_aspect_ratio =
394       std::make_pair(res.width, res.height);
395   output_format_request_.max_landscape_pixel_count = pixel_count;
396   output_format_request_.target_portrait_aspect_ratio =
397       std::make_pair(res.height, res.width);
398   output_format_request_.max_portrait_pixel_count = pixel_count;
399   output_format_request_.max_fps = max_framerate_request_;
400   RTC_LOG(LS_INFO) << "Setting output_format_request_ based on sink_wants: "
401                    << output_format_request_.ToString();
402 }
403 
GetTargetPixels() const404 int VideoAdapter::GetTargetPixels() const {
405   webrtc::MutexLock lock(&mutex_);
406   return resolution_request_target_pixel_count_;
407 }
408 
GetMaxFramerate() const409 float VideoAdapter::GetMaxFramerate() const {
410   webrtc::MutexLock lock(&mutex_);
411   // Minimum of `output_format_request_.max_fps` and `max_framerate_request_` is
412   // used to throttle frame-rate.
413   int framerate =
414       std::min(max_framerate_request_,
415                output_format_request_.max_fps.value_or(max_framerate_request_));
416   if (framerate == std::numeric_limits<int>::max()) {
417     return std::numeric_limits<float>::infinity();
418   } else {
419     return max_framerate_request_;
420   }
421 }
422 
ToString() const423 std::string VideoAdapter::OutputFormatRequest::ToString() const {
424   rtc::StringBuilder oss;
425   oss << "[ ";
426   if (target_landscape_aspect_ratio == Swap(target_portrait_aspect_ratio) &&
427       max_landscape_pixel_count == max_portrait_pixel_count) {
428     if (target_landscape_aspect_ratio) {
429       oss << target_landscape_aspect_ratio->first << "x"
430           << target_landscape_aspect_ratio->second;
431     } else {
432       oss << "unset-resolution";
433     }
434     if (max_landscape_pixel_count) {
435       oss << " max_pixel_count: " << *max_landscape_pixel_count;
436     }
437   } else {
438     oss << "[ landscape: ";
439     if (target_landscape_aspect_ratio) {
440       oss << target_landscape_aspect_ratio->first << "x"
441           << target_landscape_aspect_ratio->second;
442     } else {
443       oss << "unset";
444     }
445     if (max_landscape_pixel_count) {
446       oss << " max_pixel_count: " << *max_landscape_pixel_count;
447     }
448     oss << " ] [ portrait: ";
449     if (target_portrait_aspect_ratio) {
450       oss << target_portrait_aspect_ratio->first << "x"
451           << target_portrait_aspect_ratio->second;
452     }
453     if (max_portrait_pixel_count) {
454       oss << " max_pixel_count: " << *max_portrait_pixel_count;
455     }
456     oss << " ]";
457   }
458   oss << " max_fps: ";
459   if (max_fps) {
460     oss << *max_fps;
461   } else {
462     oss << "unset";
463   }
464   oss << " ]";
465   return oss.Release();
466 }
467 
468 }  // namespace cricket
469