1 /*
2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
12
13 #include <algorithm>
14
15 #include "common_audio/include/audio_util.h"
16 #include "modules/audio_processing/agc2/agc2_common.h"
17 #include "modules/audio_processing/logging/apm_data_dumper.h"
18 #include "rtc_base/checks.h"
19 #include "rtc_base/logging.h"
20 #include "rtc_base/numerics/safe_minmax.h"
21 #include "system_wrappers/include/metrics.h"
22
23 namespace webrtc {
24 namespace {
25
26 using AdaptiveDigitalConfig =
27 AudioProcessing::Config::GainController2::AdaptiveDigital;
28
29 constexpr int kHeadroomHistogramMin = 0;
30 constexpr int kHeadroomHistogramMax = 50;
31 constexpr int kGainDbHistogramMax = 30;
32
33 // Computes the gain for `input_level_dbfs` to reach `-config.headroom_db`.
34 // Clamps the gain in [0, `config.max_gain_db`]. `config.headroom_db` is a
35 // safety margin to allow transient peaks to exceed the target peak level
36 // without clipping.
ComputeGainDb(float input_level_dbfs,const AdaptiveDigitalConfig & config)37 float ComputeGainDb(float input_level_dbfs,
38 const AdaptiveDigitalConfig& config) {
39 // If the level is very low, apply the maximum gain.
40 if (input_level_dbfs < -(config.headroom_db + config.max_gain_db)) {
41 return config.max_gain_db;
42 }
43 // We expect to end up here most of the time: the level is below
44 // -headroom, but we can boost it to -headroom.
45 if (input_level_dbfs < -config.headroom_db) {
46 return -config.headroom_db - input_level_dbfs;
47 }
48 // The level is too high and we can't boost.
49 RTC_DCHECK_GE(input_level_dbfs, -config.headroom_db);
50 return 0.0f;
51 }
52
53 // Returns `target_gain_db` if applying such a gain to `input_noise_level_dbfs`
54 // does not exceed `max_output_noise_level_dbfs`. Otherwise lowers and returns
55 // `target_gain_db` so that the output noise level equals
56 // `max_output_noise_level_dbfs`.
LimitGainByNoise(float target_gain_db,float input_noise_level_dbfs,float max_output_noise_level_dbfs,ApmDataDumper & apm_data_dumper)57 float LimitGainByNoise(float target_gain_db,
58 float input_noise_level_dbfs,
59 float max_output_noise_level_dbfs,
60 ApmDataDumper& apm_data_dumper) {
61 const float max_allowed_gain_db =
62 max_output_noise_level_dbfs - input_noise_level_dbfs;
63 apm_data_dumper.DumpRaw("agc2_adaptive_gain_applier_max_allowed_gain_db",
64 max_allowed_gain_db);
65 return std::min(target_gain_db, std::max(max_allowed_gain_db, 0.0f));
66 }
67
LimitGainByLowConfidence(float target_gain_db,float last_gain_db,float limiter_audio_level_dbfs,bool estimate_is_confident)68 float LimitGainByLowConfidence(float target_gain_db,
69 float last_gain_db,
70 float limiter_audio_level_dbfs,
71 bool estimate_is_confident) {
72 if (estimate_is_confident ||
73 limiter_audio_level_dbfs <= kLimiterThresholdForAgcGainDbfs) {
74 return target_gain_db;
75 }
76 const float limiter_level_dbfs_before_gain =
77 limiter_audio_level_dbfs - last_gain_db;
78
79 // Compute a new gain so that `limiter_level_dbfs_before_gain` +
80 // `new_target_gain_db` is not great than `kLimiterThresholdForAgcGainDbfs`.
81 const float new_target_gain_db = std::max(
82 kLimiterThresholdForAgcGainDbfs - limiter_level_dbfs_before_gain, 0.0f);
83 return std::min(new_target_gain_db, target_gain_db);
84 }
85
86 // Computes how the gain should change during this frame.
87 // Return the gain difference in db to 'last_gain_db'.
ComputeGainChangeThisFrameDb(float target_gain_db,float last_gain_db,bool gain_increase_allowed,float max_gain_decrease_db,float max_gain_increase_db)88 float ComputeGainChangeThisFrameDb(float target_gain_db,
89 float last_gain_db,
90 bool gain_increase_allowed,
91 float max_gain_decrease_db,
92 float max_gain_increase_db) {
93 RTC_DCHECK_GT(max_gain_decrease_db, 0);
94 RTC_DCHECK_GT(max_gain_increase_db, 0);
95 float target_gain_difference_db = target_gain_db - last_gain_db;
96 if (!gain_increase_allowed) {
97 target_gain_difference_db = std::min(target_gain_difference_db, 0.0f);
98 }
99 return rtc::SafeClamp(target_gain_difference_db, -max_gain_decrease_db,
100 max_gain_increase_db);
101 }
102
103 // Copies the (multichannel) audio samples from `src` into `dst`.
CopyAudio(AudioFrameView<const float> src,std::vector<std::vector<float>> & dst)104 void CopyAudio(AudioFrameView<const float> src,
105 std::vector<std::vector<float>>& dst) {
106 RTC_DCHECK_GT(src.num_channels(), 0);
107 RTC_DCHECK_GT(src.samples_per_channel(), 0);
108 RTC_DCHECK_EQ(dst.size(), src.num_channels());
109 for (int c = 0; c < src.num_channels(); ++c) {
110 rtc::ArrayView<const float> channel_view = src.channel(c);
111 RTC_DCHECK_EQ(channel_view.size(), src.samples_per_channel());
112 RTC_DCHECK_EQ(dst[c].size(), src.samples_per_channel());
113 std::copy(channel_view.begin(), channel_view.end(), dst[c].begin());
114 }
115 }
116
117 } // namespace
118
AdaptiveDigitalGainApplier(ApmDataDumper * apm_data_dumper,const AudioProcessing::Config::GainController2::AdaptiveDigital & config,int sample_rate_hz,int num_channels)119 AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
120 ApmDataDumper* apm_data_dumper,
121 const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
122 int sample_rate_hz,
123 int num_channels)
124 : apm_data_dumper_(apm_data_dumper),
125 gain_applier_(
126 /*hard_clip_samples=*/false,
127 /*initial_gain_factor=*/DbToRatio(config.initial_gain_db)),
128 config_(config),
129 max_gain_change_db_per_10ms_(config_.max_gain_change_db_per_second *
130 kFrameDurationMs / 1000.0f),
131 calls_since_last_gain_log_(0),
132 frames_to_gain_increase_allowed_(
133 config_.adjacent_speech_frames_threshold),
134 last_gain_db_(config_.initial_gain_db) {
135 RTC_DCHECK_GT(max_gain_change_db_per_10ms_, 0.0f);
136 RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
137 RTC_DCHECK_GE(config_.max_output_noise_level_dbfs, -90.0f);
138 RTC_DCHECK_LE(config_.max_output_noise_level_dbfs, 0.0f);
139 Initialize(sample_rate_hz, num_channels);
140 }
141
Initialize(int sample_rate_hz,int num_channels)142 void AdaptiveDigitalGainApplier::Initialize(int sample_rate_hz,
143 int num_channels) {
144 if (!config_.dry_run) {
145 return;
146 }
147 RTC_DCHECK_GT(sample_rate_hz, 0);
148 RTC_DCHECK_GT(num_channels, 0);
149 int frame_size = rtc::CheckedDivExact(sample_rate_hz, 100);
150 bool sample_rate_changed =
151 dry_run_frame_.empty() || // Handle initialization.
152 dry_run_frame_[0].size() != static_cast<size_t>(frame_size);
153 bool num_channels_changed =
154 dry_run_channels_.size() != static_cast<size_t>(num_channels);
155 if (sample_rate_changed || num_channels_changed) {
156 // Resize the multichannel audio vector and update the channel pointers.
157 dry_run_frame_.resize(num_channels);
158 dry_run_channels_.resize(num_channels);
159 for (int c = 0; c < num_channels; ++c) {
160 dry_run_frame_[c].resize(frame_size);
161 dry_run_channels_[c] = dry_run_frame_[c].data();
162 }
163 }
164 }
165
Process(const FrameInfo & info,AudioFrameView<float> frame)166 void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
167 AudioFrameView<float> frame) {
168 RTC_DCHECK_GE(info.speech_level_dbfs, -150.0f);
169 RTC_DCHECK_GE(frame.num_channels(), 1);
170 RTC_DCHECK(
171 frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 ||
172 frame.samples_per_channel() == 320 || frame.samples_per_channel() == 480)
173 << "`frame` does not look like a 10 ms frame for an APM supported sample "
174 "rate";
175
176 // Compute the input level used to select the desired gain.
177 RTC_DCHECK_GT(info.headroom_db, 0.0f);
178 const float input_level_dbfs = info.speech_level_dbfs + info.headroom_db;
179
180 const float target_gain_db = LimitGainByLowConfidence(
181 LimitGainByNoise(ComputeGainDb(input_level_dbfs, config_),
182 info.noise_rms_dbfs, config_.max_output_noise_level_dbfs,
183 *apm_data_dumper_),
184 last_gain_db_, info.limiter_envelope_dbfs, info.speech_level_reliable);
185
186 // Forbid increasing the gain until enough adjacent speech frames are
187 // observed.
188 bool first_confident_speech_frame = false;
189 if (info.speech_probability < kVadConfidenceThreshold) {
190 frames_to_gain_increase_allowed_ = config_.adjacent_speech_frames_threshold;
191 } else if (frames_to_gain_increase_allowed_ > 0) {
192 frames_to_gain_increase_allowed_--;
193 first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0;
194 }
195 apm_data_dumper_->DumpRaw(
196 "agc2_adaptive_gain_applier_frames_to_gain_increase_allowed",
197 frames_to_gain_increase_allowed_);
198
199 const bool gain_increase_allowed = frames_to_gain_increase_allowed_ == 0;
200
201 float max_gain_increase_db = max_gain_change_db_per_10ms_;
202 if (first_confident_speech_frame) {
203 // No gain increase happened while waiting for a long enough speech
204 // sequence. Therefore, temporarily allow a faster gain increase.
205 RTC_DCHECK(gain_increase_allowed);
206 max_gain_increase_db *= config_.adjacent_speech_frames_threshold;
207 }
208
209 const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
210 target_gain_db, last_gain_db_, gain_increase_allowed,
211 /*max_gain_decrease_db=*/max_gain_change_db_per_10ms_,
212 max_gain_increase_db);
213
214 apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_want_to_change_by_db",
215 target_gain_db - last_gain_db_);
216 apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_will_change_by_db",
217 gain_change_this_frame_db);
218
219 // Optimization: avoid calling math functions if gain does not
220 // change.
221 if (gain_change_this_frame_db != 0.f) {
222 gain_applier_.SetGainFactor(
223 DbToRatio(last_gain_db_ + gain_change_this_frame_db));
224 }
225
226 // Modify `frame` only if not running in "dry run" mode.
227 if (!config_.dry_run) {
228 gain_applier_.ApplyGain(frame);
229 } else {
230 // Copy `frame` so that `ApplyGain()` is called (on a copy).
231 CopyAudio(frame, dry_run_frame_);
232 RTC_DCHECK(!dry_run_channels_.empty());
233 AudioFrameView<float> frame_copy(&dry_run_channels_[0],
234 frame.num_channels(),
235 frame.samples_per_channel());
236 gain_applier_.ApplyGain(frame_copy);
237 }
238
239 // Remember that the gain has changed for the next iteration.
240 last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;
241 apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_applied_gain_db",
242 last_gain_db_);
243
244 // Log every 10 seconds.
245 calls_since_last_gain_log_++;
246 if (calls_since_last_gain_log_ == 1000) {
247 calls_since_last_gain_log_ = 0;
248 RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedSpeechLevel",
249 -info.speech_level_dbfs, 0, 100, 101);
250 RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel",
251 -info.noise_rms_dbfs, 0, 100, 101);
252 RTC_HISTOGRAM_COUNTS_LINEAR(
253 "WebRTC.Audio.Agc2.Headroom", info.headroom_db, kHeadroomHistogramMin,
254 kHeadroomHistogramMax,
255 kHeadroomHistogramMax - kHeadroomHistogramMin + 1);
256 RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied",
257 last_gain_db_, 0, kGainDbHistogramMax,
258 kGainDbHistogramMax + 1);
259 RTC_LOG(LS_INFO) << "AGC2 adaptive digital"
260 << " | speech_dbfs: " << info.speech_level_dbfs
261 << " | noise_dbfs: " << info.noise_rms_dbfs
262 << " | headroom_db: " << info.headroom_db
263 << " | gain_db: " << last_gain_db_;
264 }
265 }
266
267 } // namespace webrtc
268