1 /*
2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
12
13 #include <algorithm>
14 #include <memory>
15
16 #include "common_audio/include/audio_util.h"
17 #include "modules/audio_processing/agc2/agc2_common.h"
18 #include "modules/audio_processing/agc2/vector_float_frame.h"
19 #include "modules/audio_processing/include/audio_processing.h"
20 #include "modules/audio_processing/logging/apm_data_dumper.h"
21 #include "rtc_base/gunit.h"
22
23 namespace webrtc {
24 namespace {
25
26 constexpr int kMono = 1;
27 constexpr int kStereo = 2;
28 constexpr int kFrameLen10ms8kHz = 80;
29 constexpr int kFrameLen10ms48kHz = 480;
30
31 constexpr float kMaxSpeechProbability = 1.0f;
32
33 // Constants used in place of estimated noise levels.
34 constexpr float kNoNoiseDbfs = kMinLevelDbfs;
35 constexpr float kWithNoiseDbfs = -20.0f;
36
37 // Number of additional frames to process in the tests to ensure that the tested
38 // adaptation processes have converged.
39 constexpr int kNumExtraFrames = 10;
40
GetMaxGainChangePerFrameDb(float max_gain_change_db_per_second)41 constexpr float GetMaxGainChangePerFrameDb(
42 float max_gain_change_db_per_second) {
43 return max_gain_change_db_per_second * kFrameDurationMs / 1000.0f;
44 }
45
46 using AdaptiveDigitalConfig =
47 AudioProcessing::Config::GainController2::AdaptiveDigital;
48
49 constexpr AdaptiveDigitalConfig kDefaultConfig{};
50
51 // Helper to create initialized `AdaptiveDigitalGainApplier` objects.
52 struct GainApplierHelper {
GainApplierHelperwebrtc::__anon4e31bdb60111::GainApplierHelper53 GainApplierHelper(const AdaptiveDigitalConfig& config,
54 int sample_rate_hz,
55 int num_channels)
56 : apm_data_dumper(0),
57 gain_applier(
58 std::make_unique<AdaptiveDigitalGainApplier>(&apm_data_dumper,
59 config,
60 sample_rate_hz,
61 num_channels)) {}
62 ApmDataDumper apm_data_dumper;
63 std::unique_ptr<AdaptiveDigitalGainApplier> gain_applier;
64 };
65
66 // Returns a `FrameInfo` sample to simulate noiseless speech detected with
67 // maximum probability and with level, headroom and limiter envelope chosen
68 // so that the resulting gain equals the default initial adaptive digital gain
69 // i.e., no gain adaptation is expected.
GetFrameInfoToNotAdapt(const AdaptiveDigitalConfig & config)70 AdaptiveDigitalGainApplier::FrameInfo GetFrameInfoToNotAdapt(
71 const AdaptiveDigitalConfig& config) {
72 AdaptiveDigitalGainApplier::FrameInfo info;
73 info.speech_probability = kMaxSpeechProbability;
74 info.speech_level_dbfs = -config.initial_gain_db - config.headroom_db;
75 info.speech_level_reliable = true;
76 info.noise_rms_dbfs = kNoNoiseDbfs;
77 info.headroom_db = config.headroom_db;
78 info.limiter_envelope_dbfs = -2.0f;
79 return info;
80 }
81
TEST(GainController2AdaptiveGainApplier,GainApplierShouldNotCrash)82 TEST(GainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
83 GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kStereo);
84 // Make one call with reasonable audio level values and settings.
85 VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
86 helper.gain_applier->Process(GetFrameInfoToNotAdapt(kDefaultConfig),
87 fake_audio.float_frame_view());
88 }
89
90 // Checks that the maximum allowed gain is applied.
TEST(GainController2AdaptiveGainApplier,MaxGainApplied)91 TEST(GainController2AdaptiveGainApplier, MaxGainApplied) {
92 constexpr int kNumFramesToAdapt =
93 static_cast<int>(kDefaultConfig.max_gain_db /
94 GetMaxGainChangePerFrameDb(
95 kDefaultConfig.max_gain_change_db_per_second)) +
96 kNumExtraFrames;
97
98 GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/8000, kMono);
99 AdaptiveDigitalGainApplier::FrameInfo info =
100 GetFrameInfoToNotAdapt(kDefaultConfig);
101 info.speech_level_dbfs = -60.0f;
102 float applied_gain;
103 for (int i = 0; i < kNumFramesToAdapt; ++i) {
104 VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
105 helper.gain_applier->Process(info, fake_audio.float_frame_view());
106 applied_gain = fake_audio.float_frame_view().channel(0)[0];
107 }
108 const float applied_gain_db = 20.0f * std::log10f(applied_gain);
109 EXPECT_NEAR(applied_gain_db, kDefaultConfig.max_gain_db, 0.1f);
110 }
111
TEST(GainController2AdaptiveGainApplier,GainDoesNotChangeFast)112 TEST(GainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
113 GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/8000, kMono);
114
115 constexpr float initial_level_dbfs = -25.0f;
116 constexpr float kMaxGainChangeDbPerFrame =
117 GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second);
118 constexpr int kNumFramesToAdapt =
119 static_cast<int>(initial_level_dbfs / kMaxGainChangeDbPerFrame) +
120 kNumExtraFrames;
121
122 const float max_change_per_frame_linear = DbToRatio(kMaxGainChangeDbPerFrame);
123
124 float last_gain_linear = 1.f;
125 for (int i = 0; i < kNumFramesToAdapt; ++i) {
126 SCOPED_TRACE(i);
127 VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
128 AdaptiveDigitalGainApplier::FrameInfo info =
129 GetFrameInfoToNotAdapt(kDefaultConfig);
130 info.speech_level_dbfs = initial_level_dbfs;
131 helper.gain_applier->Process(info, fake_audio.float_frame_view());
132 float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
133 EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
134 max_change_per_frame_linear);
135 last_gain_linear = current_gain_linear;
136 }
137
138 // Check that the same is true when gain decreases as well.
139 for (int i = 0; i < kNumFramesToAdapt; ++i) {
140 SCOPED_TRACE(i);
141 VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
142 AdaptiveDigitalGainApplier::FrameInfo info =
143 GetFrameInfoToNotAdapt(kDefaultConfig);
144 info.speech_level_dbfs = 0.f;
145 helper.gain_applier->Process(info, fake_audio.float_frame_view());
146 float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
147 EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
148 max_change_per_frame_linear);
149 last_gain_linear = current_gain_linear;
150 }
151 }
152
TEST(GainController2AdaptiveGainApplier,GainIsRampedInAFrame)153 TEST(GainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
154 GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono);
155
156 constexpr float initial_level_dbfs = -25.0f;
157
158 VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
159 AdaptiveDigitalGainApplier::FrameInfo info =
160 GetFrameInfoToNotAdapt(kDefaultConfig);
161 info.speech_level_dbfs = initial_level_dbfs;
162 helper.gain_applier->Process(info, fake_audio.float_frame_view());
163 float maximal_difference = 0.0f;
164 float current_value = 1.0f * DbToRatio(kDefaultConfig.initial_gain_db);
165 for (const auto& x : fake_audio.float_frame_view().channel(0)) {
166 const float difference = std::abs(x - current_value);
167 maximal_difference = std::max(maximal_difference, difference);
168 current_value = x;
169 }
170
171 const float max_change_per_frame_linear = DbToRatio(
172 GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second));
173 const float max_change_per_sample =
174 max_change_per_frame_linear / kFrameLen10ms48kHz;
175
176 EXPECT_LE(maximal_difference, max_change_per_sample);
177 }
178
TEST(GainController2AdaptiveGainApplier,NoiseLimitsGain)179 TEST(GainController2AdaptiveGainApplier, NoiseLimitsGain) {
180 GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono);
181
182 constexpr float initial_level_dbfs = -25.0f;
183 constexpr int num_initial_frames =
184 kDefaultConfig.initial_gain_db /
185 GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second);
186 constexpr int num_frames = 50;
187
188 ASSERT_GT(kWithNoiseDbfs, kDefaultConfig.max_output_noise_level_dbfs)
189 << "kWithNoiseDbfs is too low";
190
191 for (int i = 0; i < num_initial_frames + num_frames; ++i) {
192 VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
193 AdaptiveDigitalGainApplier::FrameInfo info =
194 GetFrameInfoToNotAdapt(kDefaultConfig);
195 info.speech_level_dbfs = initial_level_dbfs;
196 info.noise_rms_dbfs = kWithNoiseDbfs;
197 helper.gain_applier->Process(info, fake_audio.float_frame_view());
198
199 // Wait so that the adaptive gain applier has time to lower the gain.
200 if (i > num_initial_frames) {
201 const float maximal_ratio =
202 *std::max_element(fake_audio.float_frame_view().channel(0).begin(),
203 fake_audio.float_frame_view().channel(0).end());
204
205 EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f);
206 }
207 }
208 }
209
TEST(GainController2GainApplier,CanHandlePositiveSpeechLevels)210 TEST(GainController2GainApplier, CanHandlePositiveSpeechLevels) {
211 GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kStereo);
212
213 // Make one call with positive audio level values and settings.
214 VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
215 AdaptiveDigitalGainApplier::FrameInfo info =
216 GetFrameInfoToNotAdapt(kDefaultConfig);
217 info.speech_level_dbfs = 5.0f;
218 helper.gain_applier->Process(info, fake_audio.float_frame_view());
219 }
220
TEST(GainController2GainApplier,AudioLevelLimitsGain)221 TEST(GainController2GainApplier, AudioLevelLimitsGain) {
222 GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono);
223
224 constexpr float initial_level_dbfs = -25.0f;
225 constexpr int num_initial_frames =
226 kDefaultConfig.initial_gain_db /
227 GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second);
228 constexpr int num_frames = 50;
229
230 ASSERT_GT(kWithNoiseDbfs, kDefaultConfig.max_output_noise_level_dbfs)
231 << "kWithNoiseDbfs is too low";
232
233 for (int i = 0; i < num_initial_frames + num_frames; ++i) {
234 VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
235 AdaptiveDigitalGainApplier::FrameInfo info =
236 GetFrameInfoToNotAdapt(kDefaultConfig);
237 info.speech_level_dbfs = initial_level_dbfs;
238 info.limiter_envelope_dbfs = 1.0f;
239 info.speech_level_reliable = false;
240 helper.gain_applier->Process(info, fake_audio.float_frame_view());
241
242 // Wait so that the adaptive gain applier has time to lower the gain.
243 if (i > num_initial_frames) {
244 const float maximal_ratio =
245 *std::max_element(fake_audio.float_frame_view().channel(0).begin(),
246 fake_audio.float_frame_view().channel(0).end());
247
248 EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f);
249 }
250 }
251 }
252
253 class AdaptiveDigitalGainApplierTest : public ::testing::TestWithParam<int> {
254 protected:
adjacent_speech_frames_threshold() const255 int adjacent_speech_frames_threshold() const { return GetParam(); }
256 };
257
TEST_P(AdaptiveDigitalGainApplierTest,DoNotIncreaseGainWithTooFewSpeechFrames)258 TEST_P(AdaptiveDigitalGainApplierTest,
259 DoNotIncreaseGainWithTooFewSpeechFrames) {
260 AdaptiveDigitalConfig config;
261 config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold();
262 GainApplierHelper helper(config, /*sample_rate_hz=*/48000, kMono);
263
264 // Lower the speech level so that the target gain will be increased.
265 AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
266 info.speech_level_dbfs -= 12.0f;
267
268 float prev_gain = 0.0f;
269 for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) {
270 SCOPED_TRACE(i);
271 VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
272 helper.gain_applier->Process(info, audio.float_frame_view());
273 const float gain = audio.float_frame_view().channel(0)[0];
274 if (i > 0) {
275 EXPECT_EQ(prev_gain, gain); // No gain increase applied.
276 }
277 prev_gain = gain;
278 }
279 }
280
TEST_P(AdaptiveDigitalGainApplierTest,IncreaseGainWithEnoughSpeechFrames)281 TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
282 AdaptiveDigitalConfig config;
283 config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold();
284 GainApplierHelper helper(config, /*sample_rate_hz=*/48000, kMono);
285
286 // Lower the speech level so that the target gain will be increased.
287 AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
288 info.speech_level_dbfs -= 12.0f;
289
290 float prev_gain = 0.0f;
291 for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) {
292 SCOPED_TRACE(i);
293 VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
294 helper.gain_applier->Process(info, audio.float_frame_view());
295 prev_gain = audio.float_frame_view().channel(0)[0];
296 }
297
298 // Process one more speech frame.
299 VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
300 helper.gain_applier->Process(info, audio.float_frame_view());
301
302 // An increased gain has been applied.
303 EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain);
304 }
305
306 INSTANTIATE_TEST_SUITE_P(GainController2,
307 AdaptiveDigitalGainApplierTest,
308 ::testing::Values(1, 7, 31));
309
310 // Checks that the input is never modified when running in dry run mode.
TEST(GainController2GainApplier,DryRunDoesNotChangeInput)311 TEST(GainController2GainApplier, DryRunDoesNotChangeInput) {
312 AdaptiveDigitalConfig config;
313 config.dry_run = true;
314 GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono);
315
316 // Simulate an input signal with log speech level.
317 AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
318 info.speech_level_dbfs = -60.0f;
319 const int num_frames_to_adapt =
320 static_cast<int>(
321 config.max_gain_db /
322 GetMaxGainChangePerFrameDb(config.max_gain_change_db_per_second)) +
323 kNumExtraFrames;
324 constexpr float kPcmSamples = 123.456f;
325 // Run the gain applier and check that the PCM samples are not modified.
326 for (int i = 0; i < num_frames_to_adapt; ++i) {
327 SCOPED_TRACE(i);
328 VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, kPcmSamples);
329 helper.gain_applier->Process(info, fake_audio.float_frame_view());
330 EXPECT_FLOAT_EQ(fake_audio.float_frame_view().channel(0)[0], kPcmSamples);
331 }
332 }
333
334 // Checks that no sample is modified before and after the sample rate changes.
TEST(GainController2GainApplier,DryRunHandlesSampleRateChange)335 TEST(GainController2GainApplier, DryRunHandlesSampleRateChange) {
336 AdaptiveDigitalConfig config;
337 config.dry_run = true;
338 GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono);
339
340 AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
341 info.speech_level_dbfs = -60.0f;
342 constexpr float kPcmSamples = 123.456f;
343 VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
344 helper.gain_applier->Process(info, fake_audio_8k.float_frame_view());
345 EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
346 helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
347 VectorFloatFrame fake_audio_48k(kMono, kFrameLen10ms48kHz, kPcmSamples);
348 helper.gain_applier->Process(info, fake_audio_48k.float_frame_view());
349 EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
350 }
351
352 // Checks that no sample is modified before and after the number of channels
353 // changes.
TEST(GainController2GainApplier,DryRunHandlesNumChannelsChange)354 TEST(GainController2GainApplier, DryRunHandlesNumChannelsChange) {
355 AdaptiveDigitalConfig config;
356 config.dry_run = true;
357 GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono);
358
359 AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
360 info.speech_level_dbfs = -60.0f;
361 constexpr float kPcmSamples = 123.456f;
362 VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
363 helper.gain_applier->Process(info, fake_audio_8k.float_frame_view());
364 EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
365 VectorFloatFrame fake_audio_48k(kStereo, kFrameLen10ms8kHz, kPcmSamples);
366 helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kStereo);
367 helper.gain_applier->Process(info, fake_audio_48k.float_frame_view());
368 EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
369 EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(1)[0], kPcmSamples);
370 }
371
372 } // namespace
373 } // namespace webrtc
374