xref: /aosp_15_r20/external/webrtc/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
12 
13 #include <algorithm>
14 
15 #include "common_audio/include/audio_util.h"
16 #include "modules/audio_processing/agc2/agc2_common.h"
17 #include "modules/audio_processing/logging/apm_data_dumper.h"
18 #include "rtc_base/checks.h"
19 #include "rtc_base/logging.h"
20 #include "rtc_base/numerics/safe_minmax.h"
21 #include "system_wrappers/include/metrics.h"
22 
23 namespace webrtc {
24 namespace {
25 
26 using AdaptiveDigitalConfig =
27     AudioProcessing::Config::GainController2::AdaptiveDigital;
28 
29 constexpr int kHeadroomHistogramMin = 0;
30 constexpr int kHeadroomHistogramMax = 50;
31 constexpr int kGainDbHistogramMax = 30;
32 
33 // Computes the gain for `input_level_dbfs` to reach `-config.headroom_db`.
34 // Clamps the gain in [0, `config.max_gain_db`]. `config.headroom_db` is a
35 // safety margin to allow transient peaks to exceed the target peak level
36 // without clipping.
ComputeGainDb(float input_level_dbfs,const AdaptiveDigitalConfig & config)37 float ComputeGainDb(float input_level_dbfs,
38                     const AdaptiveDigitalConfig& config) {
39   // If the level is very low, apply the maximum gain.
40   if (input_level_dbfs < -(config.headroom_db + config.max_gain_db)) {
41     return config.max_gain_db;
42   }
43   // We expect to end up here most of the time: the level is below
44   // -headroom, but we can boost it to -headroom.
45   if (input_level_dbfs < -config.headroom_db) {
46     return -config.headroom_db - input_level_dbfs;
47   }
48   // The level is too high and we can't boost.
49   RTC_DCHECK_GE(input_level_dbfs, -config.headroom_db);
50   return 0.0f;
51 }
52 
53 // Returns `target_gain_db` if applying such a gain to `input_noise_level_dbfs`
54 // does not exceed `max_output_noise_level_dbfs`. Otherwise lowers and returns
55 // `target_gain_db` so that the output noise level equals
56 // `max_output_noise_level_dbfs`.
LimitGainByNoise(float target_gain_db,float input_noise_level_dbfs,float max_output_noise_level_dbfs,ApmDataDumper & apm_data_dumper)57 float LimitGainByNoise(float target_gain_db,
58                        float input_noise_level_dbfs,
59                        float max_output_noise_level_dbfs,
60                        ApmDataDumper& apm_data_dumper) {
61   const float max_allowed_gain_db =
62       max_output_noise_level_dbfs - input_noise_level_dbfs;
63   apm_data_dumper.DumpRaw("agc2_adaptive_gain_applier_max_allowed_gain_db",
64                           max_allowed_gain_db);
65   return std::min(target_gain_db, std::max(max_allowed_gain_db, 0.0f));
66 }
67 
LimitGainByLowConfidence(float target_gain_db,float last_gain_db,float limiter_audio_level_dbfs,bool estimate_is_confident)68 float LimitGainByLowConfidence(float target_gain_db,
69                                float last_gain_db,
70                                float limiter_audio_level_dbfs,
71                                bool estimate_is_confident) {
72   if (estimate_is_confident ||
73       limiter_audio_level_dbfs <= kLimiterThresholdForAgcGainDbfs) {
74     return target_gain_db;
75   }
76   const float limiter_level_dbfs_before_gain =
77       limiter_audio_level_dbfs - last_gain_db;
78 
79   // Compute a new gain so that `limiter_level_dbfs_before_gain` +
80   // `new_target_gain_db` is not great than `kLimiterThresholdForAgcGainDbfs`.
81   const float new_target_gain_db = std::max(
82       kLimiterThresholdForAgcGainDbfs - limiter_level_dbfs_before_gain, 0.0f);
83   return std::min(new_target_gain_db, target_gain_db);
84 }
85 
86 // Computes how the gain should change during this frame.
87 // Return the gain difference in db to 'last_gain_db'.
ComputeGainChangeThisFrameDb(float target_gain_db,float last_gain_db,bool gain_increase_allowed,float max_gain_decrease_db,float max_gain_increase_db)88 float ComputeGainChangeThisFrameDb(float target_gain_db,
89                                    float last_gain_db,
90                                    bool gain_increase_allowed,
91                                    float max_gain_decrease_db,
92                                    float max_gain_increase_db) {
93   RTC_DCHECK_GT(max_gain_decrease_db, 0);
94   RTC_DCHECK_GT(max_gain_increase_db, 0);
95   float target_gain_difference_db = target_gain_db - last_gain_db;
96   if (!gain_increase_allowed) {
97     target_gain_difference_db = std::min(target_gain_difference_db, 0.0f);
98   }
99   return rtc::SafeClamp(target_gain_difference_db, -max_gain_decrease_db,
100                         max_gain_increase_db);
101 }
102 
103 // Copies the (multichannel) audio samples from `src` into `dst`.
CopyAudio(AudioFrameView<const float> src,std::vector<std::vector<float>> & dst)104 void CopyAudio(AudioFrameView<const float> src,
105                std::vector<std::vector<float>>& dst) {
106   RTC_DCHECK_GT(src.num_channels(), 0);
107   RTC_DCHECK_GT(src.samples_per_channel(), 0);
108   RTC_DCHECK_EQ(dst.size(), src.num_channels());
109   for (int c = 0; c < src.num_channels(); ++c) {
110     rtc::ArrayView<const float> channel_view = src.channel(c);
111     RTC_DCHECK_EQ(channel_view.size(), src.samples_per_channel());
112     RTC_DCHECK_EQ(dst[c].size(), src.samples_per_channel());
113     std::copy(channel_view.begin(), channel_view.end(), dst[c].begin());
114   }
115 }
116 
117 }  // namespace
118 
AdaptiveDigitalGainApplier(ApmDataDumper * apm_data_dumper,const AudioProcessing::Config::GainController2::AdaptiveDigital & config,int sample_rate_hz,int num_channels)119 AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
120     ApmDataDumper* apm_data_dumper,
121     const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
122     int sample_rate_hz,
123     int num_channels)
124     : apm_data_dumper_(apm_data_dumper),
125       gain_applier_(
126           /*hard_clip_samples=*/false,
127           /*initial_gain_factor=*/DbToRatio(config.initial_gain_db)),
128       config_(config),
129       max_gain_change_db_per_10ms_(config_.max_gain_change_db_per_second *
130                                    kFrameDurationMs / 1000.0f),
131       calls_since_last_gain_log_(0),
132       frames_to_gain_increase_allowed_(
133           config_.adjacent_speech_frames_threshold),
134       last_gain_db_(config_.initial_gain_db) {
135   RTC_DCHECK_GT(max_gain_change_db_per_10ms_, 0.0f);
136   RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
137   RTC_DCHECK_GE(config_.max_output_noise_level_dbfs, -90.0f);
138   RTC_DCHECK_LE(config_.max_output_noise_level_dbfs, 0.0f);
139   Initialize(sample_rate_hz, num_channels);
140 }
141 
Initialize(int sample_rate_hz,int num_channels)142 void AdaptiveDigitalGainApplier::Initialize(int sample_rate_hz,
143                                             int num_channels) {
144   if (!config_.dry_run) {
145     return;
146   }
147   RTC_DCHECK_GT(sample_rate_hz, 0);
148   RTC_DCHECK_GT(num_channels, 0);
149   int frame_size = rtc::CheckedDivExact(sample_rate_hz, 100);
150   bool sample_rate_changed =
151       dry_run_frame_.empty() ||  // Handle initialization.
152       dry_run_frame_[0].size() != static_cast<size_t>(frame_size);
153   bool num_channels_changed =
154       dry_run_channels_.size() != static_cast<size_t>(num_channels);
155   if (sample_rate_changed || num_channels_changed) {
156     // Resize the multichannel audio vector and update the channel pointers.
157     dry_run_frame_.resize(num_channels);
158     dry_run_channels_.resize(num_channels);
159     for (int c = 0; c < num_channels; ++c) {
160       dry_run_frame_[c].resize(frame_size);
161       dry_run_channels_[c] = dry_run_frame_[c].data();
162     }
163   }
164 }
165 
Process(const FrameInfo & info,AudioFrameView<float> frame)166 void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
167                                          AudioFrameView<float> frame) {
168   RTC_DCHECK_GE(info.speech_level_dbfs, -150.0f);
169   RTC_DCHECK_GE(frame.num_channels(), 1);
170   RTC_DCHECK(
171       frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 ||
172       frame.samples_per_channel() == 320 || frame.samples_per_channel() == 480)
173       << "`frame` does not look like a 10 ms frame for an APM supported sample "
174          "rate";
175 
176   // Compute the input level used to select the desired gain.
177   RTC_DCHECK_GT(info.headroom_db, 0.0f);
178   const float input_level_dbfs = info.speech_level_dbfs + info.headroom_db;
179 
180   const float target_gain_db = LimitGainByLowConfidence(
181       LimitGainByNoise(ComputeGainDb(input_level_dbfs, config_),
182                        info.noise_rms_dbfs, config_.max_output_noise_level_dbfs,
183                        *apm_data_dumper_),
184       last_gain_db_, info.limiter_envelope_dbfs, info.speech_level_reliable);
185 
186   // Forbid increasing the gain until enough adjacent speech frames are
187   // observed.
188   bool first_confident_speech_frame = false;
189   if (info.speech_probability < kVadConfidenceThreshold) {
190     frames_to_gain_increase_allowed_ = config_.adjacent_speech_frames_threshold;
191   } else if (frames_to_gain_increase_allowed_ > 0) {
192     frames_to_gain_increase_allowed_--;
193     first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0;
194   }
195   apm_data_dumper_->DumpRaw(
196       "agc2_adaptive_gain_applier_frames_to_gain_increase_allowed",
197       frames_to_gain_increase_allowed_);
198 
199   const bool gain_increase_allowed = frames_to_gain_increase_allowed_ == 0;
200 
201   float max_gain_increase_db = max_gain_change_db_per_10ms_;
202   if (first_confident_speech_frame) {
203     // No gain increase happened while waiting for a long enough speech
204     // sequence. Therefore, temporarily allow a faster gain increase.
205     RTC_DCHECK(gain_increase_allowed);
206     max_gain_increase_db *= config_.adjacent_speech_frames_threshold;
207   }
208 
209   const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
210       target_gain_db, last_gain_db_, gain_increase_allowed,
211       /*max_gain_decrease_db=*/max_gain_change_db_per_10ms_,
212       max_gain_increase_db);
213 
214   apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_want_to_change_by_db",
215                             target_gain_db - last_gain_db_);
216   apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_will_change_by_db",
217                             gain_change_this_frame_db);
218 
219   // Optimization: avoid calling math functions if gain does not
220   // change.
221   if (gain_change_this_frame_db != 0.f) {
222     gain_applier_.SetGainFactor(
223         DbToRatio(last_gain_db_ + gain_change_this_frame_db));
224   }
225 
226   // Modify `frame` only if not running in "dry run" mode.
227   if (!config_.dry_run) {
228     gain_applier_.ApplyGain(frame);
229   } else {
230     // Copy `frame` so that `ApplyGain()` is called (on a copy).
231     CopyAudio(frame, dry_run_frame_);
232     RTC_DCHECK(!dry_run_channels_.empty());
233     AudioFrameView<float> frame_copy(&dry_run_channels_[0],
234                                      frame.num_channels(),
235                                      frame.samples_per_channel());
236     gain_applier_.ApplyGain(frame_copy);
237   }
238 
239   // Remember that the gain has changed for the next iteration.
240   last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;
241   apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_applied_gain_db",
242                             last_gain_db_);
243 
244   // Log every 10 seconds.
245   calls_since_last_gain_log_++;
246   if (calls_since_last_gain_log_ == 1000) {
247     calls_since_last_gain_log_ = 0;
248     RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedSpeechLevel",
249                                 -info.speech_level_dbfs, 0, 100, 101);
250     RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel",
251                                 -info.noise_rms_dbfs, 0, 100, 101);
252     RTC_HISTOGRAM_COUNTS_LINEAR(
253         "WebRTC.Audio.Agc2.Headroom", info.headroom_db, kHeadroomHistogramMin,
254         kHeadroomHistogramMax,
255         kHeadroomHistogramMax - kHeadroomHistogramMin + 1);
256     RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied",
257                                 last_gain_db_, 0, kGainDbHistogramMax,
258                                 kGainDbHistogramMax + 1);
259     RTC_LOG(LS_INFO) << "AGC2 adaptive digital"
260                      << " | speech_dbfs: " << info.speech_level_dbfs
261                      << " | noise_dbfs: " << info.noise_rms_dbfs
262                      << " | headroom_db: " << info.headroom_db
263                      << " | gain_db: " << last_gain_db_;
264   }
265 }
266 
267 }  // namespace webrtc
268