xref: /aosp_15_r20/external/webrtc/modules/audio_processing/aec3/erle_estimator_unittest.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/aec3/erle_estimator.h"
12 
13 #include <cmath>
14 
15 #include "api/array_view.h"
16 #include "modules/audio_processing/aec3/render_delay_buffer.h"
17 #include "modules/audio_processing/aec3/spectrum_buffer.h"
18 #include "rtc_base/random.h"
19 #include "rtc_base/strings/string_builder.h"
20 #include "test/gtest.h"
21 
22 namespace webrtc {
23 
24 namespace {
25 constexpr int kLowFrequencyLimit = kFftLengthBy2 / 2;
26 constexpr float kTrueErle = 10.f;
27 constexpr float kTrueErleOnsets = 1.0f;
28 constexpr float kEchoPathGain = 3.f;
29 
VerifyErleBands(rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> erle,float reference_lf,float reference_hf)30 void VerifyErleBands(
31     rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle,
32     float reference_lf,
33     float reference_hf) {
34   for (size_t ch = 0; ch < erle.size(); ++ch) {
35     std::for_each(
36         erle[ch].begin(), erle[ch].begin() + kLowFrequencyLimit,
37         [reference_lf](float a) { EXPECT_NEAR(reference_lf, a, 0.001); });
38     std::for_each(
39         erle[ch].begin() + kLowFrequencyLimit, erle[ch].end(),
40         [reference_hf](float a) { EXPECT_NEAR(reference_hf, a, 0.001); });
41   }
42 }
43 
VerifyErle(rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> erle,float erle_time_domain,float reference_lf,float reference_hf)44 void VerifyErle(
45     rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle,
46     float erle_time_domain,
47     float reference_lf,
48     float reference_hf) {
49   VerifyErleBands(erle, reference_lf, reference_hf);
50   EXPECT_NEAR(kTrueErle, erle_time_domain, 0.5);
51 }
52 
VerifyErleGreaterOrEqual(rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> erle1,rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> erle2)53 void VerifyErleGreaterOrEqual(
54     rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle1,
55     rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle2) {
56   for (size_t ch = 0; ch < erle1.size(); ++ch) {
57     for (size_t i = 0; i < kFftLengthBy2Plus1; ++i) {
58       EXPECT_GE(erle1[ch][i], erle2[ch][i]);
59     }
60   }
61 }
62 
FormFarendTimeFrame(Block * x)63 void FormFarendTimeFrame(Block* x) {
64   const std::array<float, kBlockSize> frame = {
65       7459.88, 17209.6, 17383,   20768.9, 16816.7, 18386.3, 4492.83, 9675.85,
66       6665.52, 14808.6, 9342.3,  7483.28, 19261.7, 4145.98, 1622.18, 13475.2,
67       7166.32, 6856.61, 21937,   7263.14, 9569.07, 14919,   8413.32, 7551.89,
68       7848.65, 6011.27, 13080.6, 15865.2, 12656,   17459.6, 4263.93, 4503.03,
69       9311.79, 21095.8, 12657.9, 13906.6, 19267.2, 11338.1, 16828.9, 11501.6,
70       11405,   15031.4, 14541.6, 19765.5, 18346.3, 19350.2, 3157.47, 18095.8,
71       1743.68, 21328.2, 19727.5, 7295.16, 10332.4, 11055.5, 20107.4, 14708.4,
72       12416.2, 16434,   2454.69, 9840.8,  6867.23, 1615.75, 6059.9,  8394.19};
73   for (int band = 0; band < x->NumBands(); ++band) {
74     for (int channel = 0; channel < x->NumChannels(); ++channel) {
75       RTC_DCHECK_GE(kBlockSize, frame.size());
76       std::copy(frame.begin(), frame.end(), x->begin(band, channel));
77     }
78   }
79 }
80 
FormFarendFrame(const RenderBuffer & render_buffer,float erle,std::array<float,kFftLengthBy2Plus1> * X2,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> E2,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> Y2)81 void FormFarendFrame(const RenderBuffer& render_buffer,
82                      float erle,
83                      std::array<float, kFftLengthBy2Plus1>* X2,
84                      rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> E2,
85                      rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> Y2) {
86   const auto& spectrum_buffer = render_buffer.GetSpectrumBuffer();
87   const int num_render_channels = spectrum_buffer.buffer[0].size();
88   const int num_capture_channels = Y2.size();
89 
90   X2->fill(0.f);
91   for (int ch = 0; ch < num_render_channels; ++ch) {
92     for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
93       (*X2)[k] += spectrum_buffer.buffer[spectrum_buffer.write][ch][k] /
94                   num_render_channels;
95     }
96   }
97 
98   for (int ch = 0; ch < num_capture_channels; ++ch) {
99     std::transform(X2->begin(), X2->end(), Y2[ch].begin(),
100                    [](float a) { return a * kEchoPathGain * kEchoPathGain; });
101     std::transform(Y2[ch].begin(), Y2[ch].end(), E2[ch].begin(),
102                    [erle](float a) { return a / erle; });
103   }
104 }
105 
FormNearendFrame(Block * x,std::array<float,kFftLengthBy2Plus1> * X2,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> E2,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> Y2)106 void FormNearendFrame(
107     Block* x,
108     std::array<float, kFftLengthBy2Plus1>* X2,
109     rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> E2,
110     rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> Y2) {
111   for (int band = 0; band < x->NumBands(); ++band) {
112     for (int ch = 0; ch < x->NumChannels(); ++ch) {
113       std::fill(x->begin(band, ch), x->end(band, ch), 0.f);
114     }
115   }
116 
117   X2->fill(0.f);
118   for (size_t ch = 0; ch < Y2.size(); ++ch) {
119     Y2[ch].fill(500.f * 1000.f * 1000.f);
120     E2[ch].fill(Y2[ch][0]);
121   }
122 }
123 
GetFilterFreq(size_t delay_headroom_samples,rtc::ArrayView<std::vector<std::array<float,kFftLengthBy2Plus1>>> filter_frequency_response)124 void GetFilterFreq(
125     size_t delay_headroom_samples,
126     rtc::ArrayView<std::vector<std::array<float, kFftLengthBy2Plus1>>>
127         filter_frequency_response) {
128   const size_t delay_headroom_blocks = delay_headroom_samples / kBlockSize;
129   for (size_t ch = 0; ch < filter_frequency_response[0].size(); ++ch) {
130     for (auto& block_freq_resp : filter_frequency_response) {
131       block_freq_resp[ch].fill(0.f);
132     }
133 
134     for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
135       filter_frequency_response[delay_headroom_blocks][ch][k] = kEchoPathGain;
136     }
137   }
138 }
139 
140 }  // namespace
141 
142 class ErleEstimatorMultiChannel
143     : public ::testing::Test,
144       public ::testing::WithParamInterface<std::tuple<size_t, size_t>> {};
145 
146 INSTANTIATE_TEST_SUITE_P(MultiChannel,
147                          ErleEstimatorMultiChannel,
148                          ::testing::Combine(::testing::Values(1, 2, 4, 8),
149                                             ::testing::Values(1, 2, 8)));
150 
TEST_P(ErleEstimatorMultiChannel,VerifyErleIncreaseAndHold)151 TEST_P(ErleEstimatorMultiChannel, VerifyErleIncreaseAndHold) {
152   const size_t num_render_channels = std::get<0>(GetParam());
153   const size_t num_capture_channels = std::get<1>(GetParam());
154   constexpr int kSampleRateHz = 48000;
155   constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz);
156 
157   std::array<float, kFftLengthBy2Plus1> X2;
158   std::vector<std::array<float, kFftLengthBy2Plus1>> E2(num_capture_channels);
159   std::vector<std::array<float, kFftLengthBy2Plus1>> Y2(num_capture_channels);
160   std::vector<bool> converged_filters(num_capture_channels, true);
161 
162   EchoCanceller3Config config;
163   config.erle.onset_detection = true;
164 
165   Block x(kNumBands, num_render_channels);
166   std::vector<std::vector<std::array<float, kFftLengthBy2Plus1>>>
167       filter_frequency_response(
168           config.filter.refined.length_blocks,
169           std::vector<std::array<float, kFftLengthBy2Plus1>>(
170               num_capture_channels));
171   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
172       RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels));
173 
174   GetFilterFreq(config.delay.delay_headroom_samples, filter_frequency_response);
175 
176   ErleEstimator estimator(0, config, num_capture_channels);
177 
178   FormFarendTimeFrame(&x);
179   render_delay_buffer->Insert(x);
180   render_delay_buffer->PrepareCaptureProcessing();
181   // Verifies that the ERLE estimate is properly increased to higher values.
182   FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), kTrueErle, &X2, E2,
183                   Y2);
184   for (size_t k = 0; k < 1000; ++k) {
185     render_delay_buffer->Insert(x);
186     render_delay_buffer->PrepareCaptureProcessing();
187     estimator.Update(*render_delay_buffer->GetRenderBuffer(),
188                      filter_frequency_response, X2, Y2, E2, converged_filters);
189   }
190   VerifyErle(estimator.Erle(/*onset_compensated=*/true),
191              std::pow(2.f, estimator.FullbandErleLog2()), config.erle.max_l,
192              config.erle.max_h);
193   VerifyErleGreaterOrEqual(estimator.Erle(/*onset_compensated=*/false),
194                            estimator.Erle(/*onset_compensated=*/true));
195   VerifyErleGreaterOrEqual(estimator.ErleUnbounded(),
196                            estimator.Erle(/*onset_compensated=*/false));
197 
198   FormNearendFrame(&x, &X2, E2, Y2);
199   // Verifies that the ERLE is not immediately decreased during nearend
200   // activity.
201   for (size_t k = 0; k < 50; ++k) {
202     render_delay_buffer->Insert(x);
203     render_delay_buffer->PrepareCaptureProcessing();
204     estimator.Update(*render_delay_buffer->GetRenderBuffer(),
205                      filter_frequency_response, X2, Y2, E2, converged_filters);
206   }
207   VerifyErle(estimator.Erle(/*onset_compensated=*/true),
208              std::pow(2.f, estimator.FullbandErleLog2()), config.erle.max_l,
209              config.erle.max_h);
210   VerifyErleGreaterOrEqual(estimator.Erle(/*onset_compensated=*/false),
211                            estimator.Erle(/*onset_compensated=*/true));
212   VerifyErleGreaterOrEqual(estimator.ErleUnbounded(),
213                            estimator.Erle(/*onset_compensated=*/false));
214 }
215 
TEST_P(ErleEstimatorMultiChannel,VerifyErleTrackingOnOnsets)216 TEST_P(ErleEstimatorMultiChannel, VerifyErleTrackingOnOnsets) {
217   const size_t num_render_channels = std::get<0>(GetParam());
218   const size_t num_capture_channels = std::get<1>(GetParam());
219   constexpr int kSampleRateHz = 48000;
220   constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz);
221 
222   std::array<float, kFftLengthBy2Plus1> X2;
223   std::vector<std::array<float, kFftLengthBy2Plus1>> E2(num_capture_channels);
224   std::vector<std::array<float, kFftLengthBy2Plus1>> Y2(num_capture_channels);
225   std::vector<bool> converged_filters(num_capture_channels, true);
226   EchoCanceller3Config config;
227   config.erle.onset_detection = true;
228   Block x(kNumBands, num_render_channels);
229   std::vector<std::vector<std::array<float, kFftLengthBy2Plus1>>>
230       filter_frequency_response(
231           config.filter.refined.length_blocks,
232           std::vector<std::array<float, kFftLengthBy2Plus1>>(
233               num_capture_channels));
234   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
235       RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels));
236 
237   GetFilterFreq(config.delay.delay_headroom_samples, filter_frequency_response);
238 
239   ErleEstimator estimator(/*startup_phase_length_blocks=*/0, config,
240                           num_capture_channels);
241 
242   FormFarendTimeFrame(&x);
243   render_delay_buffer->Insert(x);
244   render_delay_buffer->PrepareCaptureProcessing();
245 
246   for (size_t burst = 0; burst < 20; ++burst) {
247     FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), kTrueErleOnsets,
248                     &X2, E2, Y2);
249     for (size_t k = 0; k < 10; ++k) {
250       render_delay_buffer->Insert(x);
251       render_delay_buffer->PrepareCaptureProcessing();
252       estimator.Update(*render_delay_buffer->GetRenderBuffer(),
253                        filter_frequency_response, X2, Y2, E2,
254                        converged_filters);
255     }
256     FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), kTrueErle, &X2, E2,
257                     Y2);
258     for (size_t k = 0; k < 1000; ++k) {
259       render_delay_buffer->Insert(x);
260       render_delay_buffer->PrepareCaptureProcessing();
261       estimator.Update(*render_delay_buffer->GetRenderBuffer(),
262                        filter_frequency_response, X2, Y2, E2,
263                        converged_filters);
264     }
265     FormNearendFrame(&x, &X2, E2, Y2);
266     for (size_t k = 0; k < 300; ++k) {
267       render_delay_buffer->Insert(x);
268       render_delay_buffer->PrepareCaptureProcessing();
269       estimator.Update(*render_delay_buffer->GetRenderBuffer(),
270                        filter_frequency_response, X2, Y2, E2,
271                        converged_filters);
272     }
273   }
274   VerifyErleBands(estimator.ErleDuringOnsets(), config.erle.min,
275                   config.erle.min);
276   FormNearendFrame(&x, &X2, E2, Y2);
277   for (size_t k = 0; k < 1000; k++) {
278     estimator.Update(*render_delay_buffer->GetRenderBuffer(),
279                      filter_frequency_response, X2, Y2, E2, converged_filters);
280   }
281   // Verifies that during ne activity, Erle converges to the Erle for
282   // onsets.
283   VerifyErle(estimator.Erle(/*onset_compensated=*/true),
284              std::pow(2.f, estimator.FullbandErleLog2()), config.erle.min,
285              config.erle.min);
286 }
287 
288 }  // namespace webrtc
289