1 /*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/aecm/aecm_core.h"
12 #include "modules/audio_processing/aecm/echo_control_mobile.h"
13 #include "modules/audio_processing/utility/delay_estimator_wrapper.h"
14 #include "rtc_base/checks.h"
15 #include "rtc_base/numerics/safe_conversions.h"
16
17 namespace webrtc {
18
19 namespace {
20
21 static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
22 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, 3562, 3951,
23 4337, 4720, 5101, 5478, 5853, 6224, 6591, 6954, 7313, 7668, 8019,
24 8364, 8705, 9040, 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
25 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, 13773, 13985, 14189,
26 14384, 14571, 14749, 14918, 15079, 15231, 15373, 15506, 15631, 15746, 15851,
27 15947, 16034, 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384};
28
29 static const int16_t kNoiseEstQDomain = 15;
30 static const int16_t kNoiseEstIncCount = 5;
31
32 static int16_t coefTable[] = {
33 0, 4, 256, 260, 128, 132, 384, 388, 64, 68, 320, 324, 192, 196, 448,
34 452, 32, 36, 288, 292, 160, 164, 416, 420, 96, 100, 352, 356, 224, 228,
35 480, 484, 16, 20, 272, 276, 144, 148, 400, 404, 80, 84, 336, 340, 208,
36 212, 464, 468, 48, 52, 304, 308, 176, 180, 432, 436, 112, 116, 368, 372,
37 240, 244, 496, 500, 8, 12, 264, 268, 136, 140, 392, 396, 72, 76, 328,
38 332, 200, 204, 456, 460, 40, 44, 296, 300, 168, 172, 424, 428, 104, 108,
39 360, 364, 232, 236, 488, 492, 24, 28, 280, 284, 152, 156, 408, 412, 88,
40 92, 344, 348, 216, 220, 472, 476, 56, 60, 312, 316, 184, 188, 440, 444,
41 120, 124, 376, 380, 248, 252, 504, 508};
42
43 static int16_t coefTable_ifft[] = {
44 0, 512, 256, 508, 128, 252, 384, 380, 64, 124, 320, 444, 192, 188, 448,
45 316, 32, 60, 288, 476, 160, 220, 416, 348, 96, 92, 352, 412, 224, 156,
46 480, 284, 16, 28, 272, 492, 144, 236, 400, 364, 80, 108, 336, 428, 208,
47 172, 464, 300, 48, 44, 304, 460, 176, 204, 432, 332, 112, 76, 368, 396,
48 240, 140, 496, 268, 8, 12, 264, 500, 136, 244, 392, 372, 72, 116, 328,
49 436, 200, 180, 456, 308, 40, 52, 296, 468, 168, 212, 424, 340, 104, 84,
50 360, 404, 232, 148, 488, 276, 24, 20, 280, 484, 152, 228, 408, 356, 88,
51 100, 344, 420, 216, 164, 472, 292, 56, 36, 312, 452, 184, 196, 440, 324,
52 120, 68, 376, 388, 248, 132, 504, 260};
53
54 } // namespace
55
56 static void ComfortNoise(AecmCore* aecm,
57 const uint16_t* dfa,
58 ComplexInt16* out,
59 const int16_t* lambda);
60
WindowAndFFT(AecmCore * aecm,int16_t * fft,const int16_t * time_signal,ComplexInt16 * freq_signal,int time_signal_scaling)61 static void WindowAndFFT(AecmCore* aecm,
62 int16_t* fft,
63 const int16_t* time_signal,
64 ComplexInt16* freq_signal,
65 int time_signal_scaling) {
66 int i, j;
67 int32_t tmp1, tmp2, tmp3, tmp4;
68 int16_t* pfrfi;
69 ComplexInt16* pfreq_signal;
70 int16_t f_coef, s_coef;
71 int32_t load_ptr, store_ptr1, store_ptr2, shift, shift1;
72 int32_t hann, hann1, coefs;
73
74 memset(fft, 0, sizeof(int16_t) * PART_LEN4);
75
76 // FFT of signal
77 __asm __volatile(
78 ".set push \n\t"
79 ".set noreorder \n\t"
80 "addiu %[shift], %[time_signal_scaling], -14 \n\t"
81 "addiu %[i], $zero, 64 \n\t"
82 "addiu %[load_ptr], %[time_signal], 0 \n\t"
83 "addiu %[hann], %[hanning], 0 \n\t"
84 "addiu %[hann1], %[hanning], 128 \n\t"
85 "addiu %[coefs], %[coefTable], 0 \n\t"
86 "bltz %[shift], 2f \n\t"
87 " negu %[shift1], %[shift] \n\t"
88 "1: "
89 "\n\t"
90 "lh %[tmp1], 0(%[load_ptr]) \n\t"
91 "lh %[tmp2], 0(%[hann]) \n\t"
92 "lh %[tmp3], 128(%[load_ptr]) \n\t"
93 "lh %[tmp4], 0(%[hann1]) \n\t"
94 "addiu %[i], %[i], -1 \n\t"
95 "mul %[tmp1], %[tmp1], %[tmp2] \n\t"
96 "mul %[tmp3], %[tmp3], %[tmp4] \n\t"
97 "lh %[f_coef], 0(%[coefs]) \n\t"
98 "lh %[s_coef], 2(%[coefs]) \n\t"
99 "addiu %[load_ptr], %[load_ptr], 2 \n\t"
100 "addiu %[hann], %[hann], 2 \n\t"
101 "addiu %[hann1], %[hann1], -2 \n\t"
102 "addu %[store_ptr1], %[fft], %[f_coef] \n\t"
103 "addu %[store_ptr2], %[fft], %[s_coef] \n\t"
104 "sllv %[tmp1], %[tmp1], %[shift] \n\t"
105 "sllv %[tmp3], %[tmp3], %[shift] \n\t"
106 "sh %[tmp1], 0(%[store_ptr1]) \n\t"
107 "sh %[tmp3], 0(%[store_ptr2]) \n\t"
108 "bgtz %[i], 1b \n\t"
109 " addiu %[coefs], %[coefs], 4 \n\t"
110 "b 3f \n\t"
111 " nop \n\t"
112 "2: "
113 "\n\t"
114 "lh %[tmp1], 0(%[load_ptr]) \n\t"
115 "lh %[tmp2], 0(%[hann]) \n\t"
116 "lh %[tmp3], 128(%[load_ptr]) \n\t"
117 "lh %[tmp4], 0(%[hann1]) \n\t"
118 "addiu %[i], %[i], -1 \n\t"
119 "mul %[tmp1], %[tmp1], %[tmp2] \n\t"
120 "mul %[tmp3], %[tmp3], %[tmp4] \n\t"
121 "lh %[f_coef], 0(%[coefs]) \n\t"
122 "lh %[s_coef], 2(%[coefs]) \n\t"
123 "addiu %[load_ptr], %[load_ptr], 2 \n\t"
124 "addiu %[hann], %[hann], 2 \n\t"
125 "addiu %[hann1], %[hann1], -2 \n\t"
126 "addu %[store_ptr1], %[fft], %[f_coef] \n\t"
127 "addu %[store_ptr2], %[fft], %[s_coef] \n\t"
128 "srav %[tmp1], %[tmp1], %[shift1] \n\t"
129 "srav %[tmp3], %[tmp3], %[shift1] \n\t"
130 "sh %[tmp1], 0(%[store_ptr1]) \n\t"
131 "sh %[tmp3], 0(%[store_ptr2]) \n\t"
132 "bgtz %[i], 2b \n\t"
133 " addiu %[coefs], %[coefs], 4 \n\t"
134 "3: "
135 "\n\t"
136 ".set pop \n\t"
137 : [load_ptr] "=&r"(load_ptr), [shift] "=&r"(shift), [hann] "=&r"(hann),
138 [hann1] "=&r"(hann1), [shift1] "=&r"(shift1), [coefs] "=&r"(coefs),
139 [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3),
140 [tmp4] "=&r"(tmp4), [i] "=&r"(i), [f_coef] "=&r"(f_coef),
141 [s_coef] "=&r"(s_coef), [store_ptr1] "=&r"(store_ptr1),
142 [store_ptr2] "=&r"(store_ptr2)
143 : [time_signal] "r"(time_signal), [coefTable] "r"(coefTable),
144 [time_signal_scaling] "r"(time_signal_scaling),
145 [hanning] "r"(WebRtcAecm_kSqrtHanning), [fft] "r"(fft)
146 : "memory", "hi", "lo");
147
148 WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1);
149 pfrfi = fft;
150 pfreq_signal = freq_signal;
151
152 __asm __volatile(
153 ".set push "
154 "\n\t"
155 ".set noreorder "
156 "\n\t"
157 "addiu %[j], $zero, 128 "
158 "\n\t"
159 "1: "
160 "\n\t"
161 "lh %[tmp1], 0(%[pfrfi]) "
162 "\n\t"
163 "lh %[tmp2], 2(%[pfrfi]) "
164 "\n\t"
165 "lh %[tmp3], 4(%[pfrfi]) "
166 "\n\t"
167 "lh %[tmp4], 6(%[pfrfi]) "
168 "\n\t"
169 "subu %[tmp2], $zero, %[tmp2] "
170 "\n\t"
171 "sh %[tmp1], 0(%[pfreq_signal]) "
172 "\n\t"
173 "sh %[tmp2], 2(%[pfreq_signal]) "
174 "\n\t"
175 "subu %[tmp4], $zero, %[tmp4] "
176 "\n\t"
177 "sh %[tmp3], 4(%[pfreq_signal]) "
178 "\n\t"
179 "sh %[tmp4], 6(%[pfreq_signal]) "
180 "\n\t"
181 "lh %[tmp1], 8(%[pfrfi]) "
182 "\n\t"
183 "lh %[tmp2], 10(%[pfrfi]) "
184 "\n\t"
185 "lh %[tmp3], 12(%[pfrfi]) "
186 "\n\t"
187 "lh %[tmp4], 14(%[pfrfi]) "
188 "\n\t"
189 "addiu %[j], %[j], -8 "
190 "\n\t"
191 "subu %[tmp2], $zero, %[tmp2] "
192 "\n\t"
193 "sh %[tmp1], 8(%[pfreq_signal]) "
194 "\n\t"
195 "sh %[tmp2], 10(%[pfreq_signal]) "
196 "\n\t"
197 "subu %[tmp4], $zero, %[tmp4] "
198 "\n\t"
199 "sh %[tmp3], 12(%[pfreq_signal]) "
200 "\n\t"
201 "sh %[tmp4], 14(%[pfreq_signal]) "
202 "\n\t"
203 "addiu %[pfreq_signal], %[pfreq_signal], 16 "
204 "\n\t"
205 "bgtz %[j], 1b "
206 "\n\t"
207 " addiu %[pfrfi], %[pfrfi], 16 "
208 "\n\t"
209 ".set pop "
210 "\n\t"
211 : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3),
212 [j] "=&r"(j), [pfrfi] "+r"(pfrfi), [pfreq_signal] "+r"(pfreq_signal),
213 [tmp4] "=&r"(tmp4)
214 :
215 : "memory");
216 }
217
InverseFFTAndWindow(AecmCore * aecm,int16_t * fft,ComplexInt16 * efw,int16_t * output,const int16_t * nearendClean)218 static void InverseFFTAndWindow(AecmCore* aecm,
219 int16_t* fft,
220 ComplexInt16* efw,
221 int16_t* output,
222 const int16_t* nearendClean) {
223 int i, outCFFT;
224 int32_t tmp1, tmp2, tmp3, tmp4, tmp_re, tmp_im;
225 int16_t* pcoefTable_ifft = coefTable_ifft;
226 int16_t* pfft = fft;
227 int16_t* ppfft = fft;
228 ComplexInt16* pefw = efw;
229 int32_t out_aecm;
230 int16_t* paecm_buf = aecm->outBuf;
231 const int16_t* p_kSqrtHanning = WebRtcAecm_kSqrtHanning;
232 const int16_t* pp_kSqrtHanning = &WebRtcAecm_kSqrtHanning[PART_LEN];
233 int16_t* output1 = output;
234
235 __asm __volatile(
236 ".set push "
237 "\n\t"
238 ".set noreorder "
239 "\n\t"
240 "addiu %[i], $zero, 64 "
241 "\n\t"
242 "1: "
243 "\n\t"
244 "lh %[tmp1], 0(%[pcoefTable_ifft]) "
245 "\n\t"
246 "lh %[tmp2], 2(%[pcoefTable_ifft]) "
247 "\n\t"
248 "lh %[tmp_re], 0(%[pefw]) "
249 "\n\t"
250 "lh %[tmp_im], 2(%[pefw]) "
251 "\n\t"
252 "addu %[pfft], %[fft], %[tmp2] "
253 "\n\t"
254 "sh %[tmp_re], 0(%[pfft]) "
255 "\n\t"
256 "sh %[tmp_im], 2(%[pfft]) "
257 "\n\t"
258 "addu %[pfft], %[fft], %[tmp1] "
259 "\n\t"
260 "sh %[tmp_re], 0(%[pfft]) "
261 "\n\t"
262 "subu %[tmp_im], $zero, %[tmp_im] "
263 "\n\t"
264 "sh %[tmp_im], 2(%[pfft]) "
265 "\n\t"
266 "lh %[tmp1], 4(%[pcoefTable_ifft]) "
267 "\n\t"
268 "lh %[tmp2], 6(%[pcoefTable_ifft]) "
269 "\n\t"
270 "lh %[tmp_re], 4(%[pefw]) "
271 "\n\t"
272 "lh %[tmp_im], 6(%[pefw]) "
273 "\n\t"
274 "addu %[pfft], %[fft], %[tmp2] "
275 "\n\t"
276 "sh %[tmp_re], 0(%[pfft]) "
277 "\n\t"
278 "sh %[tmp_im], 2(%[pfft]) "
279 "\n\t"
280 "addu %[pfft], %[fft], %[tmp1] "
281 "\n\t"
282 "sh %[tmp_re], 0(%[pfft]) "
283 "\n\t"
284 "subu %[tmp_im], $zero, %[tmp_im] "
285 "\n\t"
286 "sh %[tmp_im], 2(%[pfft]) "
287 "\n\t"
288 "lh %[tmp1], 8(%[pcoefTable_ifft]) "
289 "\n\t"
290 "lh %[tmp2], 10(%[pcoefTable_ifft]) "
291 "\n\t"
292 "lh %[tmp_re], 8(%[pefw]) "
293 "\n\t"
294 "lh %[tmp_im], 10(%[pefw]) "
295 "\n\t"
296 "addu %[pfft], %[fft], %[tmp2] "
297 "\n\t"
298 "sh %[tmp_re], 0(%[pfft]) "
299 "\n\t"
300 "sh %[tmp_im], 2(%[pfft]) "
301 "\n\t"
302 "addu %[pfft], %[fft], %[tmp1] "
303 "\n\t"
304 "sh %[tmp_re], 0(%[pfft]) "
305 "\n\t"
306 "subu %[tmp_im], $zero, %[tmp_im] "
307 "\n\t"
308 "sh %[tmp_im], 2(%[pfft]) "
309 "\n\t"
310 "lh %[tmp1], 12(%[pcoefTable_ifft]) "
311 "\n\t"
312 "lh %[tmp2], 14(%[pcoefTable_ifft]) "
313 "\n\t"
314 "lh %[tmp_re], 12(%[pefw]) "
315 "\n\t"
316 "lh %[tmp_im], 14(%[pefw]) "
317 "\n\t"
318 "addu %[pfft], %[fft], %[tmp2] "
319 "\n\t"
320 "sh %[tmp_re], 0(%[pfft]) "
321 "\n\t"
322 "sh %[tmp_im], 2(%[pfft]) "
323 "\n\t"
324 "addu %[pfft], %[fft], %[tmp1] "
325 "\n\t"
326 "sh %[tmp_re], 0(%[pfft]) "
327 "\n\t"
328 "subu %[tmp_im], $zero, %[tmp_im] "
329 "\n\t"
330 "sh %[tmp_im], 2(%[pfft]) "
331 "\n\t"
332 "addiu %[pcoefTable_ifft], %[pcoefTable_ifft], 16 "
333 "\n\t"
334 "addiu %[i], %[i], -4 "
335 "\n\t"
336 "bgtz %[i], 1b "
337 "\n\t"
338 " addiu %[pefw], %[pefw], 16 "
339 "\n\t"
340 ".set pop "
341 "\n\t"
342 : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [pfft] "+r"(pfft), [i] "=&r"(i),
343 [tmp_re] "=&r"(tmp_re), [tmp_im] "=&r"(tmp_im), [pefw] "+r"(pefw),
344 [pcoefTable_ifft] "+r"(pcoefTable_ifft), [fft] "+r"(fft)
345 :
346 : "memory");
347
348 fft[2] = efw[PART_LEN].real;
349 fft[3] = -efw[PART_LEN].imag;
350
351 outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1);
352 pfft = fft;
353
354 __asm __volatile(
355 ".set push \n\t"
356 ".set noreorder \n\t"
357 "addiu %[i], $zero, 128 \n\t"
358 "1: \n\t"
359 "lh %[tmp1], 0(%[ppfft]) \n\t"
360 "lh %[tmp2], 4(%[ppfft]) \n\t"
361 "lh %[tmp3], 8(%[ppfft]) \n\t"
362 "lh %[tmp4], 12(%[ppfft]) \n\t"
363 "addiu %[i], %[i], -4 \n\t"
364 "sh %[tmp1], 0(%[pfft]) \n\t"
365 "sh %[tmp2], 2(%[pfft]) \n\t"
366 "sh %[tmp3], 4(%[pfft]) \n\t"
367 "sh %[tmp4], 6(%[pfft]) \n\t"
368 "addiu %[ppfft], %[ppfft], 16 \n\t"
369 "bgtz %[i], 1b \n\t"
370 " addiu %[pfft], %[pfft], 8 \n\t"
371 ".set pop \n\t"
372 : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [pfft] "+r"(pfft), [i] "=&r"(i),
373 [tmp3] "=&r"(tmp3), [tmp4] "=&r"(tmp4), [ppfft] "+r"(ppfft)
374 :
375 : "memory");
376
377 pfft = fft;
378 out_aecm = (int32_t)(outCFFT - aecm->dfaCleanQDomain);
379
380 __asm __volatile(
381 ".set push "
382 "\n\t"
383 ".set noreorder "
384 "\n\t"
385 "addiu %[i], $zero, 64 "
386 "\n\t"
387 "11: "
388 "\n\t"
389 "lh %[tmp1], 0(%[pfft]) "
390 "\n\t"
391 "lh %[tmp2], 0(%[p_kSqrtHanning]) "
392 "\n\t"
393 "addiu %[i], %[i], -2 "
394 "\n\t"
395 "mul %[tmp1], %[tmp1], %[tmp2] "
396 "\n\t"
397 "lh %[tmp3], 2(%[pfft]) "
398 "\n\t"
399 "lh %[tmp4], 2(%[p_kSqrtHanning]) "
400 "\n\t"
401 "mul %[tmp3], %[tmp3], %[tmp4] "
402 "\n\t"
403 "addiu %[tmp1], %[tmp1], 8192 "
404 "\n\t"
405 "sra %[tmp1], %[tmp1], 14 "
406 "\n\t"
407 "addiu %[tmp3], %[tmp3], 8192 "
408 "\n\t"
409 "sra %[tmp3], %[tmp3], 14 "
410 "\n\t"
411 "bgez %[out_aecm], 1f "
412 "\n\t"
413 " negu %[tmp2], %[out_aecm] "
414 "\n\t"
415 "srav %[tmp1], %[tmp1], %[tmp2] "
416 "\n\t"
417 "b 2f "
418 "\n\t"
419 " srav %[tmp3], %[tmp3], %[tmp2] "
420 "\n\t"
421 "1: "
422 "\n\t"
423 "sllv %[tmp1], %[tmp1], %[out_aecm] "
424 "\n\t"
425 "sllv %[tmp3], %[tmp3], %[out_aecm] "
426 "\n\t"
427 "2: "
428 "\n\t"
429 "lh %[tmp4], 0(%[paecm_buf]) "
430 "\n\t"
431 "lh %[tmp2], 2(%[paecm_buf]) "
432 "\n\t"
433 "addu %[tmp3], %[tmp3], %[tmp2] "
434 "\n\t"
435 "addu %[tmp1], %[tmp1], %[tmp4] "
436 "\n\t"
437 #if defined(MIPS_DSP_R1_LE)
438 "shll_s.w %[tmp1], %[tmp1], 16 "
439 "\n\t"
440 "sra %[tmp1], %[tmp1], 16 "
441 "\n\t"
442 "shll_s.w %[tmp3], %[tmp3], 16 "
443 "\n\t"
444 "sra %[tmp3], %[tmp3], 16 "
445 "\n\t"
446 #else // #if defined(MIPS_DSP_R1_LE)
447 "sra %[tmp4], %[tmp1], 31 "
448 "\n\t"
449 "sra %[tmp2], %[tmp1], 15 "
450 "\n\t"
451 "beq %[tmp4], %[tmp2], 3f "
452 "\n\t"
453 " ori %[tmp2], $zero, 0x7fff "
454 "\n\t"
455 "xor %[tmp1], %[tmp2], %[tmp4] "
456 "\n\t"
457 "3: "
458 "\n\t"
459 "sra %[tmp2], %[tmp3], 31 "
460 "\n\t"
461 "sra %[tmp4], %[tmp3], 15 "
462 "\n\t"
463 "beq %[tmp2], %[tmp4], 4f "
464 "\n\t"
465 " ori %[tmp4], $zero, 0x7fff "
466 "\n\t"
467 "xor %[tmp3], %[tmp4], %[tmp2] "
468 "\n\t"
469 "4: "
470 "\n\t"
471 #endif // #if defined(MIPS_DSP_R1_LE)
472 "sh %[tmp1], 0(%[pfft]) "
473 "\n\t"
474 "sh %[tmp1], 0(%[output1]) "
475 "\n\t"
476 "sh %[tmp3], 2(%[pfft]) "
477 "\n\t"
478 "sh %[tmp3], 2(%[output1]) "
479 "\n\t"
480 "lh %[tmp1], 128(%[pfft]) "
481 "\n\t"
482 "lh %[tmp2], 0(%[pp_kSqrtHanning]) "
483 "\n\t"
484 "mul %[tmp1], %[tmp1], %[tmp2] "
485 "\n\t"
486 "lh %[tmp3], 130(%[pfft]) "
487 "\n\t"
488 "lh %[tmp4], -2(%[pp_kSqrtHanning]) "
489 "\n\t"
490 "mul %[tmp3], %[tmp3], %[tmp4] "
491 "\n\t"
492 "sra %[tmp1], %[tmp1], 14 "
493 "\n\t"
494 "sra %[tmp3], %[tmp3], 14 "
495 "\n\t"
496 "bgez %[out_aecm], 5f "
497 "\n\t"
498 " negu %[tmp2], %[out_aecm] "
499 "\n\t"
500 "srav %[tmp3], %[tmp3], %[tmp2] "
501 "\n\t"
502 "b 6f "
503 "\n\t"
504 " srav %[tmp1], %[tmp1], %[tmp2] "
505 "\n\t"
506 "5: "
507 "\n\t"
508 "sllv %[tmp1], %[tmp1], %[out_aecm] "
509 "\n\t"
510 "sllv %[tmp3], %[tmp3], %[out_aecm] "
511 "\n\t"
512 "6: "
513 "\n\t"
514 #if defined(MIPS_DSP_R1_LE)
515 "shll_s.w %[tmp1], %[tmp1], 16 "
516 "\n\t"
517 "sra %[tmp1], %[tmp1], 16 "
518 "\n\t"
519 "shll_s.w %[tmp3], %[tmp3], 16 "
520 "\n\t"
521 "sra %[tmp3], %[tmp3], 16 "
522 "\n\t"
523 #else // #if defined(MIPS_DSP_R1_LE)
524 "sra %[tmp4], %[tmp1], 31 "
525 "\n\t"
526 "sra %[tmp2], %[tmp1], 15 "
527 "\n\t"
528 "beq %[tmp4], %[tmp2], 7f "
529 "\n\t"
530 " ori %[tmp2], $zero, 0x7fff "
531 "\n\t"
532 "xor %[tmp1], %[tmp2], %[tmp4] "
533 "\n\t"
534 "7: "
535 "\n\t"
536 "sra %[tmp2], %[tmp3], 31 "
537 "\n\t"
538 "sra %[tmp4], %[tmp3], 15 "
539 "\n\t"
540 "beq %[tmp2], %[tmp4], 8f "
541 "\n\t"
542 " ori %[tmp4], $zero, 0x7fff "
543 "\n\t"
544 "xor %[tmp3], %[tmp4], %[tmp2] "
545 "\n\t"
546 "8: "
547 "\n\t"
548 #endif // #if defined(MIPS_DSP_R1_LE)
549 "sh %[tmp1], 0(%[paecm_buf]) "
550 "\n\t"
551 "sh %[tmp3], 2(%[paecm_buf]) "
552 "\n\t"
553 "addiu %[output1], %[output1], 4 "
554 "\n\t"
555 "addiu %[paecm_buf], %[paecm_buf], 4 "
556 "\n\t"
557 "addiu %[pfft], %[pfft], 4 "
558 "\n\t"
559 "addiu %[p_kSqrtHanning], %[p_kSqrtHanning], 4 "
560 "\n\t"
561 "bgtz %[i], 11b "
562 "\n\t"
563 " addiu %[pp_kSqrtHanning], %[pp_kSqrtHanning], -4 "
564 "\n\t"
565 ".set pop "
566 "\n\t"
567 : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [pfft] "+r"(pfft),
568 [output1] "+r"(output1), [tmp3] "=&r"(tmp3), [tmp4] "=&r"(tmp4),
569 [paecm_buf] "+r"(paecm_buf), [i] "=&r"(i),
570 [pp_kSqrtHanning] "+r"(pp_kSqrtHanning),
571 [p_kSqrtHanning] "+r"(p_kSqrtHanning)
572 : [out_aecm] "r"(out_aecm),
573 [WebRtcAecm_kSqrtHanning] "r"(WebRtcAecm_kSqrtHanning)
574 : "hi", "lo", "memory");
575
576 // Copy the current block to the old position
577 // (aecm->outBuf is shifted elsewhere)
578 memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);
579 memcpy(aecm->dBufNoisy, aecm->dBufNoisy + PART_LEN,
580 sizeof(int16_t) * PART_LEN);
581 if (nearendClean != NULL) {
582 memcpy(aecm->dBufClean, aecm->dBufClean + PART_LEN,
583 sizeof(int16_t) * PART_LEN);
584 }
585 }
586
WebRtcAecm_CalcLinearEnergies_mips(AecmCore * aecm,const uint16_t * far_spectrum,int32_t * echo_est,uint32_t * far_energy,uint32_t * echo_energy_adapt,uint32_t * echo_energy_stored)587 void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm,
588 const uint16_t* far_spectrum,
589 int32_t* echo_est,
590 uint32_t* far_energy,
591 uint32_t* echo_energy_adapt,
592 uint32_t* echo_energy_stored) {
593 int i;
594 uint32_t par1 = (*far_energy);
595 uint32_t par2 = (*echo_energy_adapt);
596 uint32_t par3 = (*echo_energy_stored);
597 int16_t* ch_stored_p = &(aecm->channelStored[0]);
598 int16_t* ch_adapt_p = &(aecm->channelAdapt16[0]);
599 uint16_t* spectrum_p = (uint16_t*)(&(far_spectrum[0]));
600 int32_t* echo_p = &(echo_est[0]);
601 int32_t temp0, stored0, echo0, adept0, spectrum0;
602 int32_t stored1, adept1, spectrum1, echo1, temp1;
603
604 // Get energy for the delayed far end signal and estimated
605 // echo using both stored and adapted channels.
606 for (i = 0; i < PART_LEN; i += 4) {
607 __asm __volatile(
608 ".set push \n\t"
609 ".set noreorder \n\t"
610 "lh %[stored0], 0(%[ch_stored_p]) \n\t"
611 "lhu %[adept0], 0(%[ch_adapt_p]) \n\t"
612 "lhu %[spectrum0], 0(%[spectrum_p]) \n\t"
613 "lh %[stored1], 2(%[ch_stored_p]) \n\t"
614 "lhu %[adept1], 2(%[ch_adapt_p]) \n\t"
615 "lhu %[spectrum1], 2(%[spectrum_p]) \n\t"
616 "mul %[echo0], %[stored0], %[spectrum0] \n\t"
617 "mul %[temp0], %[adept0], %[spectrum0] \n\t"
618 "mul %[echo1], %[stored1], %[spectrum1] \n\t"
619 "mul %[temp1], %[adept1], %[spectrum1] \n\t"
620 "addu %[par1], %[par1], %[spectrum0] \n\t"
621 "addu %[par1], %[par1], %[spectrum1] \n\t"
622 "addiu %[echo_p], %[echo_p], 16 \n\t"
623 "addu %[par3], %[par3], %[echo0] \n\t"
624 "addu %[par2], %[par2], %[temp0] \n\t"
625 "addu %[par3], %[par3], %[echo1] \n\t"
626 "addu %[par2], %[par2], %[temp1] \n\t"
627 "usw %[echo0], -16(%[echo_p]) \n\t"
628 "usw %[echo1], -12(%[echo_p]) \n\t"
629 "lh %[stored0], 4(%[ch_stored_p]) \n\t"
630 "lhu %[adept0], 4(%[ch_adapt_p]) \n\t"
631 "lhu %[spectrum0], 4(%[spectrum_p]) \n\t"
632 "lh %[stored1], 6(%[ch_stored_p]) \n\t"
633 "lhu %[adept1], 6(%[ch_adapt_p]) \n\t"
634 "lhu %[spectrum1], 6(%[spectrum_p]) \n\t"
635 "mul %[echo0], %[stored0], %[spectrum0] \n\t"
636 "mul %[temp0], %[adept0], %[spectrum0] \n\t"
637 "mul %[echo1], %[stored1], %[spectrum1] \n\t"
638 "mul %[temp1], %[adept1], %[spectrum1] \n\t"
639 "addu %[par1], %[par1], %[spectrum0] \n\t"
640 "addu %[par1], %[par1], %[spectrum1] \n\t"
641 "addiu %[ch_stored_p], %[ch_stored_p], 8 \n\t"
642 "addiu %[ch_adapt_p], %[ch_adapt_p], 8 \n\t"
643 "addiu %[spectrum_p], %[spectrum_p], 8 \n\t"
644 "addu %[par3], %[par3], %[echo0] \n\t"
645 "addu %[par2], %[par2], %[temp0] \n\t"
646 "addu %[par3], %[par3], %[echo1] \n\t"
647 "addu %[par2], %[par2], %[temp1] \n\t"
648 "usw %[echo0], -8(%[echo_p]) \n\t"
649 "usw %[echo1], -4(%[echo_p]) \n\t"
650 ".set pop \n\t"
651 : [temp0] "=&r"(temp0), [stored0] "=&r"(stored0),
652 [adept0] "=&r"(adept0), [spectrum0] "=&r"(spectrum0),
653 [echo0] "=&r"(echo0), [echo_p] "+r"(echo_p), [par3] "+r"(par3),
654 [par1] "+r"(par1), [par2] "+r"(par2), [stored1] "=&r"(stored1),
655 [adept1] "=&r"(adept1), [echo1] "=&r"(echo1),
656 [spectrum1] "=&r"(spectrum1), [temp1] "=&r"(temp1),
657 [ch_stored_p] "+r"(ch_stored_p), [ch_adapt_p] "+r"(ch_adapt_p),
658 [spectrum_p] "+r"(spectrum_p)
659 :
660 : "hi", "lo", "memory");
661 }
662
663 echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],
664 far_spectrum[PART_LEN]);
665 par1 += (uint32_t)(far_spectrum[PART_LEN]);
666 par2 += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN];
667 par3 += (uint32_t)echo_est[PART_LEN];
668
669 (*far_energy) = par1;
670 (*echo_energy_adapt) = par2;
671 (*echo_energy_stored) = par3;
672 }
673
674 #if defined(MIPS_DSP_R1_LE)
WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore * aecm,const uint16_t * far_spectrum,int32_t * echo_est)675 void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm,
676 const uint16_t* far_spectrum,
677 int32_t* echo_est) {
678 int i;
679 int16_t* temp1;
680 uint16_t* temp8;
681 int32_t temp0, temp2, temp3, temp4, temp5, temp6;
682 int32_t* temp7 = &(echo_est[0]);
683 temp1 = &(aecm->channelStored[0]);
684 temp8 = (uint16_t*)(&far_spectrum[0]);
685
686 // During startup we store the channel every block.
687 memcpy(aecm->channelStored, aecm->channelAdapt16,
688 sizeof(int16_t) * PART_LEN1);
689 // Recalculate echo estimate
690 for (i = 0; i < PART_LEN; i += 4) {
691 __asm __volatile(
692 "ulw %[temp0], 0(%[temp8]) \n\t"
693 "ulw %[temp2], 0(%[temp1]) \n\t"
694 "ulw %[temp4], 4(%[temp8]) \n\t"
695 "ulw %[temp5], 4(%[temp1]) \n\t"
696 "muleq_s.w.phl %[temp3], %[temp2], %[temp0] \n\t"
697 "muleq_s.w.phr %[temp0], %[temp2], %[temp0] \n\t"
698 "muleq_s.w.phl %[temp6], %[temp5], %[temp4] \n\t"
699 "muleq_s.w.phr %[temp4], %[temp5], %[temp4] \n\t"
700 "addiu %[temp7], %[temp7], 16 \n\t"
701 "addiu %[temp1], %[temp1], 8 \n\t"
702 "addiu %[temp8], %[temp8], 8 \n\t"
703 "sra %[temp3], %[temp3], 1 \n\t"
704 "sra %[temp0], %[temp0], 1 \n\t"
705 "sra %[temp6], %[temp6], 1 \n\t"
706 "sra %[temp4], %[temp4], 1 \n\t"
707 "usw %[temp3], -12(%[temp7]) \n\t"
708 "usw %[temp0], -16(%[temp7]) \n\t"
709 "usw %[temp6], -4(%[temp7]) \n\t"
710 "usw %[temp4], -8(%[temp7]) \n\t"
711 : [temp0] "=&r"(temp0), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
712 [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [temp6] "=&r"(temp6),
713 [temp1] "+r"(temp1), [temp8] "+r"(temp8), [temp7] "+r"(temp7)
714 :
715 : "hi", "lo", "memory");
716 }
717 echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]);
718 }
719
WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore * aecm)720 void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm) {
721 int i;
722 int32_t* temp3;
723 int16_t* temp0;
724 int32_t temp1, temp2, temp4, temp5;
725
726 temp0 = &(aecm->channelStored[0]);
727 temp3 = &(aecm->channelAdapt32[0]);
728
729 // The stored channel has a significantly lower MSE than the adaptive one for
730 // two consecutive calculations. Reset the adaptive channel.
731 memcpy(aecm->channelAdapt16, aecm->channelStored,
732 sizeof(int16_t) * PART_LEN1);
733
734 // Restore the W32 channel
735 for (i = 0; i < PART_LEN; i += 4) {
736 __asm __volatile(
737 "ulw %[temp1], 0(%[temp0]) \n\t"
738 "ulw %[temp4], 4(%[temp0]) \n\t"
739 "preceq.w.phl %[temp2], %[temp1] \n\t"
740 "preceq.w.phr %[temp1], %[temp1] \n\t"
741 "preceq.w.phl %[temp5], %[temp4] \n\t"
742 "preceq.w.phr %[temp4], %[temp4] \n\t"
743 "addiu %[temp0], %[temp0], 8 \n\t"
744 "usw %[temp2], 4(%[temp3]) \n\t"
745 "usw %[temp1], 0(%[temp3]) \n\t"
746 "usw %[temp5], 12(%[temp3]) \n\t"
747 "usw %[temp4], 8(%[temp3]) \n\t"
748 "addiu %[temp3], %[temp3], 16 \n\t"
749 : [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp4] "=&r"(temp4),
750 [temp5] "=&r"(temp5), [temp3] "+r"(temp3), [temp0] "+r"(temp0)
751 :
752 : "memory");
753 }
754
755 aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16;
756 }
757 #endif // #if defined(MIPS_DSP_R1_LE)
758
759 // Transforms a time domain signal into the frequency domain, outputting the
760 // complex valued signal, absolute value and sum of absolute values.
761 //
762 // time_signal [in] Pointer to time domain signal
763 // freq_signal_real [out] Pointer to real part of frequency domain array
764 // freq_signal_imag [out] Pointer to imaginary part of frequency domain
765 // array
766 // freq_signal_abs [out] Pointer to absolute value of frequency domain
767 // array
768 // freq_signal_sum_abs [out] Pointer to the sum of all absolute values in
769 // the frequency domain array
770 // return value The Q-domain of current frequency values
771 //
TimeToFrequencyDomain(AecmCore * aecm,const int16_t * time_signal,ComplexInt16 * freq_signal,uint16_t * freq_signal_abs,uint32_t * freq_signal_sum_abs)772 static int TimeToFrequencyDomain(AecmCore* aecm,
773 const int16_t* time_signal,
774 ComplexInt16* freq_signal,
775 uint16_t* freq_signal_abs,
776 uint32_t* freq_signal_sum_abs) {
777 int i = 0;
778 int time_signal_scaling = 0;
779
780 // In fft_buf, +16 for 32-byte alignment.
781 int16_t fft_buf[PART_LEN4 + 16];
782 int16_t* fft = (int16_t*)(((uintptr_t)fft_buf + 31) & ~31);
783
784 int16_t tmp16no1;
785 #if !defined(MIPS_DSP_R2_LE)
786 int32_t tmp32no1;
787 int32_t tmp32no2;
788 int16_t tmp16no2;
789 #else
790 int32_t tmp32no10, tmp32no11, tmp32no12, tmp32no13;
791 int32_t tmp32no20, tmp32no21, tmp32no22, tmp32no23;
792 int16_t* freqp;
793 uint16_t* freqabsp;
794 uint32_t freqt0, freqt1, freqt2, freqt3;
795 uint32_t freqs;
796 #endif
797
798 #ifdef AECM_DYNAMIC_Q
799 tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);
800 time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
801 #endif
802
803 WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
804
805 // Extract imaginary and real part,
806 // calculate the magnitude for all frequency bins
807 freq_signal[0].imag = 0;
808 freq_signal[PART_LEN].imag = 0;
809 freq_signal[PART_LEN].real = fft[PART_LEN2];
810 freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real);
811 freq_signal_abs[PART_LEN] =
812 (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[PART_LEN].real);
813 (*freq_signal_sum_abs) =
814 (uint32_t)(freq_signal_abs[0]) + (uint32_t)(freq_signal_abs[PART_LEN]);
815
816 #if !defined(MIPS_DSP_R2_LE)
817 for (i = 1; i < PART_LEN; i++) {
818 if (freq_signal[i].real == 0) {
819 freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
820 } else if (freq_signal[i].imag == 0) {
821 freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real);
822 } else {
823 // Approximation for magnitude of complex fft output
824 // magn = sqrt(real^2 + imag^2)
825 // magn ~= alpha * max(`imag`,`real`) + beta * min(`imag`,`real`)
826 //
827 // The parameters alpha and beta are stored in Q15
828 tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
829 tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
830 tmp32no1 = tmp16no1 * tmp16no1;
831 tmp32no2 = tmp16no2 * tmp16no2;
832 tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2);
833 tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);
834
835 freq_signal_abs[i] = (uint16_t)tmp32no1;
836 }
837 (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];
838 }
839 #else // #if !defined(MIPS_DSP_R2_LE)
840 freqs =
841 (uint32_t)(freq_signal_abs[0]) + (uint32_t)(freq_signal_abs[PART_LEN]);
842 freqp = &(freq_signal[1].real);
843
844 __asm __volatile(
845 "lw %[freqt0], 0(%[freqp]) \n\t"
846 "lw %[freqt1], 4(%[freqp]) \n\t"
847 "lw %[freqt2], 8(%[freqp]) \n\t"
848 "mult $ac0, $zero, $zero \n\t"
849 "mult $ac1, $zero, $zero \n\t"
850 "mult $ac2, $zero, $zero \n\t"
851 "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t"
852 "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t"
853 "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t"
854 "addiu %[freqp], %[freqp], 12 \n\t"
855 "extr.w %[tmp32no20], $ac0, 1 \n\t"
856 "extr.w %[tmp32no21], $ac1, 1 \n\t"
857 "extr.w %[tmp32no22], $ac2, 1 \n\t"
858 : [freqt0] "=&r"(freqt0), [freqt1] "=&r"(freqt1), [freqt2] "=&r"(freqt2),
859 [freqp] "+r"(freqp), [tmp32no20] "=r"(tmp32no20),
860 [tmp32no21] "=r"(tmp32no21), [tmp32no22] "=r"(tmp32no22)
861 :
862 : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo");
863
864 tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20);
865 tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21);
866 tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22);
867 freq_signal_abs[1] = (uint16_t)tmp32no10;
868 freq_signal_abs[2] = (uint16_t)tmp32no11;
869 freq_signal_abs[3] = (uint16_t)tmp32no12;
870 freqs += (uint32_t)tmp32no10;
871 freqs += (uint32_t)tmp32no11;
872 freqs += (uint32_t)tmp32no12;
873 freqabsp = &(freq_signal_abs[4]);
874 for (i = 4; i < PART_LEN; i += 4) {
875 __asm __volatile(
876 "ulw %[freqt0], 0(%[freqp]) \n\t"
877 "ulw %[freqt1], 4(%[freqp]) \n\t"
878 "ulw %[freqt2], 8(%[freqp]) \n\t"
879 "ulw %[freqt3], 12(%[freqp]) \n\t"
880 "mult $ac0, $zero, $zero \n\t"
881 "mult $ac1, $zero, $zero \n\t"
882 "mult $ac2, $zero, $zero \n\t"
883 "mult $ac3, $zero, $zero \n\t"
884 "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t"
885 "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t"
886 "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t"
887 "dpaq_s.w.ph $ac3, %[freqt3], %[freqt3] \n\t"
888 "addiu %[freqp], %[freqp], 16 \n\t"
889 "addiu %[freqabsp], %[freqabsp], 8 \n\t"
890 "extr.w %[tmp32no20], $ac0, 1 \n\t"
891 "extr.w %[tmp32no21], $ac1, 1 \n\t"
892 "extr.w %[tmp32no22], $ac2, 1 \n\t"
893 "extr.w %[tmp32no23], $ac3, 1 \n\t"
894 : [freqt0] "=&r"(freqt0), [freqt1] "=&r"(freqt1),
895 [freqt2] "=&r"(freqt2), [freqt3] "=&r"(freqt3),
896 [tmp32no20] "=r"(tmp32no20), [tmp32no21] "=r"(tmp32no21),
897 [tmp32no22] "=r"(tmp32no22), [tmp32no23] "=r"(tmp32no23),
898 [freqabsp] "+r"(freqabsp), [freqp] "+r"(freqp)
899 :
900 : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
901 "$ac3hi", "$ac3lo");
902
903 tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20);
904 tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21);
905 tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22);
906 tmp32no13 = WebRtcSpl_SqrtFloor(tmp32no23);
907
908 __asm __volatile(
909 "sh %[tmp32no10], -8(%[freqabsp]) \n\t"
910 "sh %[tmp32no11], -6(%[freqabsp]) \n\t"
911 "sh %[tmp32no12], -4(%[freqabsp]) \n\t"
912 "sh %[tmp32no13], -2(%[freqabsp]) \n\t"
913 "addu %[freqs], %[freqs], %[tmp32no10] \n\t"
914 "addu %[freqs], %[freqs], %[tmp32no11] \n\t"
915 "addu %[freqs], %[freqs], %[tmp32no12] \n\t"
916 "addu %[freqs], %[freqs], %[tmp32no13] \n\t"
917 : [freqs] "+r"(freqs)
918 : [tmp32no10] "r"(tmp32no10), [tmp32no11] "r"(tmp32no11),
919 [tmp32no12] "r"(tmp32no12), [tmp32no13] "r"(tmp32no13),
920 [freqabsp] "r"(freqabsp)
921 : "memory");
922 }
923
924 (*freq_signal_sum_abs) = freqs;
925 #endif
926
927 return time_signal_scaling;
928 }
929
WebRtcAecm_ProcessBlock(AecmCore * aecm,const int16_t * farend,const int16_t * nearendNoisy,const int16_t * nearendClean,int16_t * output)930 int WebRtcAecm_ProcessBlock(AecmCore* aecm,
931 const int16_t* farend,
932 const int16_t* nearendNoisy,
933 const int16_t* nearendClean,
934 int16_t* output) {
935 int i;
936 uint32_t xfaSum;
937 uint32_t dfaNoisySum;
938 uint32_t dfaCleanSum;
939 uint32_t echoEst32Gained;
940 uint32_t tmpU32;
941 int32_t tmp32no1;
942
943 uint16_t xfa[PART_LEN1];
944 uint16_t dfaNoisy[PART_LEN1];
945 uint16_t dfaClean[PART_LEN1];
946 uint16_t* ptrDfaClean = dfaClean;
947 const uint16_t* far_spectrum_ptr = NULL;
948
949 // 32 byte aligned buffers (with +8 or +16).
950 int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe.
951 int32_t echoEst32_buf[PART_LEN1 + 8];
952 int32_t dfw_buf[PART_LEN2 + 8];
953 int32_t efw_buf[PART_LEN2 + 8];
954
955 int16_t* fft = (int16_t*)(((uint32_t)fft_buf + 31) & ~31);
956 int32_t* echoEst32 = (int32_t*)(((uint32_t)echoEst32_buf + 31) & ~31);
957 ComplexInt16* dfw = (ComplexInt16*)(((uint32_t)dfw_buf + 31) & ~31);
958 ComplexInt16* efw = (ComplexInt16*)(((uint32_t)efw_buf + 31) & ~31);
959
960 int16_t hnl[PART_LEN1];
961 int16_t numPosCoef = 0;
962 int delay;
963 int16_t tmp16no1;
964 int16_t tmp16no2;
965 int16_t mu;
966 int16_t supGain;
967 int16_t zeros32, zeros16;
968 int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;
969 int far_q;
970 int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff;
971
972 const int kMinPrefBand = 4;
973 const int kMaxPrefBand = 24;
974 int32_t avgHnl32 = 0;
975
976 int32_t temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
977 int16_t* ptr;
978 int16_t* ptr1;
979 int16_t* er_ptr;
980 int16_t* dr_ptr;
981
982 ptr = &hnl[0];
983 ptr1 = &hnl[0];
984 er_ptr = &efw[0].real;
985 dr_ptr = &dfw[0].real;
986
987 // Determine startup state. There are three states:
988 // (0) the first CONV_LEN blocks
989 // (1) another CONV_LEN blocks
990 // (2) the rest
991
992 if (aecm->startupState < 2) {
993 aecm->startupState =
994 (aecm->totCount >= CONV_LEN) + (aecm->totCount >= CONV_LEN2);
995 }
996 // END: Determine startup state
997
998 // Buffer near and far end signals
999 memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);
1000 memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN);
1001 if (nearendClean != NULL) {
1002 memcpy(aecm->dBufClean + PART_LEN, nearendClean,
1003 sizeof(int16_t) * PART_LEN);
1004 }
1005
1006 // Transform far end signal from time domain to frequency domain.
1007 far_q = TimeToFrequencyDomain(aecm, aecm->xBuf, dfw, xfa, &xfaSum);
1008
1009 // Transform noisy near end signal from time domain to frequency domain.
1010 zerosDBufNoisy =
1011 TimeToFrequencyDomain(aecm, aecm->dBufNoisy, dfw, dfaNoisy, &dfaNoisySum);
1012 aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
1013 aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;
1014
1015 if (nearendClean == NULL) {
1016 ptrDfaClean = dfaNoisy;
1017 aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;
1018 aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;
1019 dfaCleanSum = dfaNoisySum;
1020 } else {
1021 // Transform clean near end signal from time domain to frequency domain.
1022 zerosDBufClean = TimeToFrequencyDomain(aecm, aecm->dBufClean, dfw, dfaClean,
1023 &dfaCleanSum);
1024 aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
1025 aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;
1026 }
1027
1028 // Get the delay
1029 // Save far-end history and estimate delay
1030 WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q);
1031
1032 if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1,
1033 far_q) == -1) {
1034 return -1;
1035 }
1036 delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator, dfaNoisy,
1037 PART_LEN1, zerosDBufNoisy);
1038 if (delay == -1) {
1039 return -1;
1040 } else if (delay == -2) {
1041 // If the delay is unknown, we assume zero.
1042 // NOTE: this will have to be adjusted if we ever add lookahead.
1043 delay = 0;
1044 }
1045
1046 if (aecm->fixedDelay >= 0) {
1047 // Use fixed delay
1048 delay = aecm->fixedDelay;
1049 }
1050
1051 // Get aligned far end spectrum
1052 far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay);
1053 zerosXBuf = (int16_t)far_q;
1054
1055 if (far_spectrum_ptr == NULL) {
1056 return -1;
1057 }
1058
1059 // Calculate log(energy) and update energy threshold levels
1060 WebRtcAecm_CalcEnergies(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisySum,
1061 echoEst32);
1062 // Calculate stepsize
1063 mu = WebRtcAecm_CalcStepSize(aecm);
1064
1065 // Update counters
1066 aecm->totCount++;
1067
1068 // This is the channel estimation algorithm.
1069 // It is base on NLMS but has a variable step length,
1070 // which was calculated above.
1071 WebRtcAecm_UpdateChannel(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisy, mu,
1072 echoEst32);
1073
1074 supGain = WebRtcAecm_CalcSuppressionGain(aecm);
1075
1076 // Calculate Wiener filter hnl[]
1077 for (i = 0; i < PART_LEN1; i++) {
1078 // Far end signal through channel estimate in Q8
1079 // How much can we shift right to preserve resolution
1080 tmp32no1 = echoEst32[i] - aecm->echoFilt[i];
1081 aecm->echoFilt[i] +=
1082 rtc::dchecked_cast<int32_t>((int64_t{tmp32no1} * 50) >> 8);
1083
1084 zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;
1085 zeros16 = WebRtcSpl_NormW16(supGain) + 1;
1086 if (zeros32 + zeros16 > 16) {
1087 // Multiplication is safe
1088 // Result in
1089 // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff])
1090 echoEst32Gained =
1091 WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], (uint16_t)supGain);
1092 resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
1093 resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
1094 } else {
1095 tmp16no1 = 17 - zeros32 - zeros16;
1096 resolutionDiff =
1097 14 + tmp16no1 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
1098 resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
1099 if (zeros32 > tmp16no1) {
1100 echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
1101 supGain >> tmp16no1);
1102 } else {
1103 // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
1104 echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain;
1105 }
1106 }
1107
1108 zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);
1109 RTC_DCHECK_GE(zeros16, 0); // `zeros16` is a norm, hence non-negative.
1110 dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld;
1111 if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) {
1112 tmp16no1 = aecm->nearFilt[i] << zeros16;
1113 qDomainDiff = zeros16 - dfa_clean_q_domain_diff;
1114 tmp16no2 = ptrDfaClean[i] >> -qDomainDiff;
1115 } else {
1116 tmp16no1 = dfa_clean_q_domain_diff < 0
1117 ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff
1118 : aecm->nearFilt[i] << dfa_clean_q_domain_diff;
1119 qDomainDiff = 0;
1120 tmp16no2 = ptrDfaClean[i];
1121 }
1122
1123 tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);
1124 tmp16no2 = (int16_t)(tmp32no1 >> 4);
1125 tmp16no2 += tmp16no1;
1126 zeros16 = WebRtcSpl_NormW16(tmp16no2);
1127 if ((tmp16no2) & (-qDomainDiff > zeros16)) {
1128 aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;
1129 } else {
1130 aecm->nearFilt[i] =
1131 qDomainDiff < 0 ? tmp16no2 << -qDomainDiff : tmp16no2 >> qDomainDiff;
1132 }
1133
1134 // Wiener filter coefficients, resulting hnl in Q14
1135 if (echoEst32Gained == 0) {
1136 hnl[i] = ONE_Q14;
1137 numPosCoef++;
1138 } else if (aecm->nearFilt[i] == 0) {
1139 hnl[i] = 0;
1140 } else {
1141 // Multiply the suppression gain
1142 // Rounding
1143 echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);
1144 tmpU32 =
1145 WebRtcSpl_DivU32U16(echoEst32Gained, (uint16_t)aecm->nearFilt[i]);
1146
1147 // Current resolution is
1148 // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN
1149 // - max(0, 17 - zeros16 - zeros32))
1150 // Make sure we are in Q14
1151 tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);
1152 if (tmp32no1 > ONE_Q14) {
1153 hnl[i] = 0;
1154 } else if (tmp32no1 < 0) {
1155 hnl[i] = ONE_Q14;
1156 numPosCoef++;
1157 } else {
1158 // 1-echoEst/dfa
1159 hnl[i] = ONE_Q14 - (int16_t)tmp32no1;
1160 if (hnl[i] <= 0) {
1161 hnl[i] = 0;
1162 } else {
1163 numPosCoef++;
1164 }
1165 }
1166 }
1167 }
1168
1169 // Only in wideband. Prevent the gain in upper band from being larger than
1170 // in lower band.
1171 if (aecm->mult == 2) {
1172 // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause
1173 // speech distortion in double-talk.
1174 for (i = 0; i < (PART_LEN1 >> 3); i++) {
1175 __asm __volatile(
1176 "lh %[temp1], 0(%[ptr1]) \n\t"
1177 "lh %[temp2], 2(%[ptr1]) \n\t"
1178 "lh %[temp3], 4(%[ptr1]) \n\t"
1179 "lh %[temp4], 6(%[ptr1]) \n\t"
1180 "lh %[temp5], 8(%[ptr1]) \n\t"
1181 "lh %[temp6], 10(%[ptr1]) \n\t"
1182 "lh %[temp7], 12(%[ptr1]) \n\t"
1183 "lh %[temp8], 14(%[ptr1]) \n\t"
1184 "mul %[temp1], %[temp1], %[temp1] \n\t"
1185 "mul %[temp2], %[temp2], %[temp2] \n\t"
1186 "mul %[temp3], %[temp3], %[temp3] \n\t"
1187 "mul %[temp4], %[temp4], %[temp4] \n\t"
1188 "mul %[temp5], %[temp5], %[temp5] \n\t"
1189 "mul %[temp6], %[temp6], %[temp6] \n\t"
1190 "mul %[temp7], %[temp7], %[temp7] \n\t"
1191 "mul %[temp8], %[temp8], %[temp8] \n\t"
1192 "sra %[temp1], %[temp1], 14 \n\t"
1193 "sra %[temp2], %[temp2], 14 \n\t"
1194 "sra %[temp3], %[temp3], 14 \n\t"
1195 "sra %[temp4], %[temp4], 14 \n\t"
1196 "sra %[temp5], %[temp5], 14 \n\t"
1197 "sra %[temp6], %[temp6], 14 \n\t"
1198 "sra %[temp7], %[temp7], 14 \n\t"
1199 "sra %[temp8], %[temp8], 14 \n\t"
1200 "sh %[temp1], 0(%[ptr1]) \n\t"
1201 "sh %[temp2], 2(%[ptr1]) \n\t"
1202 "sh %[temp3], 4(%[ptr1]) \n\t"
1203 "sh %[temp4], 6(%[ptr1]) \n\t"
1204 "sh %[temp5], 8(%[ptr1]) \n\t"
1205 "sh %[temp6], 10(%[ptr1]) \n\t"
1206 "sh %[temp7], 12(%[ptr1]) \n\t"
1207 "sh %[temp8], 14(%[ptr1]) \n\t"
1208 "addiu %[ptr1], %[ptr1], 16 \n\t"
1209 : [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
1210 [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [temp6] "=&r"(temp6),
1211 [temp7] "=&r"(temp7), [temp8] "=&r"(temp8), [ptr1] "+r"(ptr1)
1212 :
1213 : "memory", "hi", "lo");
1214 }
1215 for (i = 0; i < (PART_LEN1 & 7); i++) {
1216 __asm __volatile(
1217 "lh %[temp1], 0(%[ptr1]) \n\t"
1218 "mul %[temp1], %[temp1], %[temp1] \n\t"
1219 "sra %[temp1], %[temp1], 14 \n\t"
1220 "sh %[temp1], 0(%[ptr1]) \n\t"
1221 "addiu %[ptr1], %[ptr1], 2 \n\t"
1222 : [temp1] "=&r"(temp1), [ptr1] "+r"(ptr1)
1223 :
1224 : "memory", "hi", "lo");
1225 }
1226
1227 for (i = kMinPrefBand; i <= kMaxPrefBand; i++) {
1228 avgHnl32 += (int32_t)hnl[i];
1229 }
1230
1231 RTC_DCHECK_GT(kMaxPrefBand - kMinPrefBand + 1, 0);
1232 avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);
1233
1234 for (i = kMaxPrefBand; i < PART_LEN1; i++) {
1235 if (hnl[i] > (int16_t)avgHnl32) {
1236 hnl[i] = (int16_t)avgHnl32;
1237 }
1238 }
1239 }
1240
1241 // Calculate NLP gain, result is in Q14
1242 if (aecm->nlpFlag) {
1243 if (numPosCoef < 3) {
1244 for (i = 0; i < PART_LEN1; i++) {
1245 efw[i].real = 0;
1246 efw[i].imag = 0;
1247 hnl[i] = 0;
1248 }
1249 } else {
1250 for (i = 0; i < PART_LEN1; i++) {
1251 #if defined(MIPS_DSP_R1_LE)
1252 __asm __volatile(
1253 ".set push \n\t"
1254 ".set noreorder \n\t"
1255 "lh %[temp1], 0(%[ptr]) \n\t"
1256 "lh %[temp2], 0(%[dr_ptr]) \n\t"
1257 "slti %[temp4], %[temp1], 0x4001 \n\t"
1258 "beqz %[temp4], 3f \n\t"
1259 " lh %[temp3], 2(%[dr_ptr]) \n\t"
1260 "slti %[temp5], %[temp1], 3277 \n\t"
1261 "bnez %[temp5], 2f \n\t"
1262 " addiu %[dr_ptr], %[dr_ptr], 4 \n\t"
1263 "mul %[temp2], %[temp2], %[temp1] \n\t"
1264 "mul %[temp3], %[temp3], %[temp1] \n\t"
1265 "shra_r.w %[temp2], %[temp2], 14 \n\t"
1266 "shra_r.w %[temp3], %[temp3], 14 \n\t"
1267 "b 4f \n\t"
1268 " nop \n\t"
1269 "2: \n\t"
1270 "addu %[temp1], $zero, $zero \n\t"
1271 "addu %[temp2], $zero, $zero \n\t"
1272 "addu %[temp3], $zero, $zero \n\t"
1273 "b 1f \n\t"
1274 " nop \n\t"
1275 "3: \n\t"
1276 "addiu %[temp1], $0, 0x4000 \n\t"
1277 "1: \n\t"
1278 "sh %[temp1], 0(%[ptr]) \n\t"
1279 "4: \n\t"
1280 "sh %[temp2], 0(%[er_ptr]) \n\t"
1281 "sh %[temp3], 2(%[er_ptr]) \n\t"
1282 "addiu %[ptr], %[ptr], 2 \n\t"
1283 "addiu %[er_ptr], %[er_ptr], 4 \n\t"
1284 ".set pop \n\t"
1285 : [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
1286 [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [ptr] "+r"(ptr),
1287 [er_ptr] "+r"(er_ptr), [dr_ptr] "+r"(dr_ptr)
1288 :
1289 : "memory", "hi", "lo");
1290 #else
1291 __asm __volatile(
1292 ".set push \n\t"
1293 ".set noreorder \n\t"
1294 "lh %[temp1], 0(%[ptr]) \n\t"
1295 "lh %[temp2], 0(%[dr_ptr]) \n\t"
1296 "slti %[temp4], %[temp1], 0x4001 \n\t"
1297 "beqz %[temp4], 3f \n\t"
1298 " lh %[temp3], 2(%[dr_ptr]) \n\t"
1299 "slti %[temp5], %[temp1], 3277 \n\t"
1300 "bnez %[temp5], 2f \n\t"
1301 " addiu %[dr_ptr], %[dr_ptr], 4 \n\t"
1302 "mul %[temp2], %[temp2], %[temp1] \n\t"
1303 "mul %[temp3], %[temp3], %[temp1] \n\t"
1304 "addiu %[temp2], %[temp2], 0x2000 \n\t"
1305 "addiu %[temp3], %[temp3], 0x2000 \n\t"
1306 "sra %[temp2], %[temp2], 14 \n\t"
1307 "sra %[temp3], %[temp3], 14 \n\t"
1308 "b 4f \n\t"
1309 " nop \n\t"
1310 "2: \n\t"
1311 "addu %[temp1], $zero, $zero \n\t"
1312 "addu %[temp2], $zero, $zero \n\t"
1313 "addu %[temp3], $zero, $zero \n\t"
1314 "b 1f \n\t"
1315 " nop \n\t"
1316 "3: \n\t"
1317 "addiu %[temp1], $0, 0x4000 \n\t"
1318 "1: \n\t"
1319 "sh %[temp1], 0(%[ptr]) \n\t"
1320 "4: \n\t"
1321 "sh %[temp2], 0(%[er_ptr]) \n\t"
1322 "sh %[temp3], 2(%[er_ptr]) \n\t"
1323 "addiu %[ptr], %[ptr], 2 \n\t"
1324 "addiu %[er_ptr], %[er_ptr], 4 \n\t"
1325 ".set pop \n\t"
1326 : [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3),
1327 [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [ptr] "+r"(ptr),
1328 [er_ptr] "+r"(er_ptr), [dr_ptr] "+r"(dr_ptr)
1329 :
1330 : "memory", "hi", "lo");
1331 #endif
1332 }
1333 }
1334 } else {
1335 // multiply with Wiener coefficients
1336 for (i = 0; i < PART_LEN1; i++) {
1337 efw[i].real = (int16_t)(
1338 WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, hnl[i], 14));
1339 efw[i].imag = (int16_t)(
1340 WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, hnl[i], 14));
1341 }
1342 }
1343
1344 if (aecm->cngMode == AecmTrue) {
1345 ComfortNoise(aecm, ptrDfaClean, efw, hnl);
1346 }
1347
1348 InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
1349
1350 return 0;
1351 }
1352
1353 // Generate comfort noise and add to output signal.
ComfortNoise(AecmCore * aecm,const uint16_t * dfa,ComplexInt16 * out,const int16_t * lambda)1354 static void ComfortNoise(AecmCore* aecm,
1355 const uint16_t* dfa,
1356 ComplexInt16* out,
1357 const int16_t* lambda) {
1358 int16_t i;
1359 int16_t tmp16, tmp161, tmp162, tmp163, nrsh1, nrsh2;
1360 int32_t tmp32, tmp321, tnoise, tnoise1;
1361 int32_t tmp322, tmp323, *tmp1;
1362 int16_t* dfap;
1363 int16_t* lambdap;
1364 const int32_t c2049 = 2049;
1365 const int32_t c359 = 359;
1366 const int32_t c114 = ONE_Q14;
1367
1368 int16_t randW16[PART_LEN];
1369 int16_t uReal[PART_LEN1];
1370 int16_t uImag[PART_LEN1];
1371 int32_t outLShift32;
1372
1373 int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;
1374 int16_t minTrackShift = 9;
1375
1376 RTC_DCHECK_GE(shiftFromNearToNoise, 0);
1377 RTC_DCHECK_LT(shiftFromNearToNoise, 16);
1378
1379 if (aecm->noiseEstCtr < 100) {
1380 // Track the minimum more quickly initially.
1381 aecm->noiseEstCtr++;
1382 minTrackShift = 6;
1383 }
1384
1385 // Generate a uniform random array on [0 2^15-1].
1386 WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);
1387 int16_t* randW16p = (int16_t*)randW16;
1388 #if defined(MIPS_DSP_R1_LE)
1389 int16_t* kCosTablep = (int16_t*)WebRtcAecm_kCosTable;
1390 int16_t* kSinTablep = (int16_t*)WebRtcAecm_kSinTable;
1391 #endif // #if defined(MIPS_DSP_R1_LE)
1392 tmp1 = (int32_t*)aecm->noiseEst + 1;
1393 dfap = (int16_t*)dfa + 1;
1394 lambdap = (int16_t*)lambda + 1;
1395 // Estimate noise power.
1396 for (i = 1; i < PART_LEN1; i += 2) {
1397 // Shift to the noise domain.
1398 __asm __volatile(
1399 "lh %[tmp32], 0(%[dfap]) \n\t"
1400 "lw %[tnoise], 0(%[tmp1]) \n\t"
1401 "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t"
1402 : [tmp32] "=&r"(tmp32), [outLShift32] "=r"(outLShift32),
1403 [tnoise] "=&r"(tnoise)
1404 : [tmp1] "r"(tmp1), [dfap] "r"(dfap),
1405 [shiftFromNearToNoise] "r"(shiftFromNearToNoise)
1406 : "memory");
1407
1408 if (outLShift32 < tnoise) {
1409 // Reset "too low" counter
1410 aecm->noiseEstTooLowCtr[i] = 0;
1411 // Track the minimum.
1412 if (tnoise < (1 << minTrackShift)) {
1413 // For small values, decrease noiseEst[i] every
1414 // `kNoiseEstIncCount` block. The regular approach below can not
1415 // go further down due to truncation.
1416 aecm->noiseEstTooHighCtr[i]++;
1417 if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) {
1418 tnoise--;
1419 aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter
1420 }
1421 } else {
1422 __asm __volatile(
1423 "subu %[tmp32], %[tnoise], %[outLShift32] \n\t"
1424 "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t"
1425 "subu %[tnoise], %[tnoise], %[tmp32] \n\t"
1426 : [tmp32] "=&r"(tmp32), [tnoise] "+r"(tnoise)
1427 :
1428 [outLShift32] "r"(outLShift32), [minTrackShift] "r"(minTrackShift));
1429 }
1430 } else {
1431 // Reset "too high" counter
1432 aecm->noiseEstTooHighCtr[i] = 0;
1433 // Ramp slowly upwards until we hit the minimum again.
1434 if ((tnoise >> 19) <= 0) {
1435 if ((tnoise >> 11) > 0) {
1436 // Large enough for relative increase
1437 __asm __volatile(
1438 "mul %[tnoise], %[tnoise], %[c2049] \n\t"
1439 "sra %[tnoise], %[tnoise], 11 \n\t"
1440 : [tnoise] "+r"(tnoise)
1441 : [c2049] "r"(c2049)
1442 : "hi", "lo");
1443 } else {
1444 // Make incremental increases based on size every
1445 // `kNoiseEstIncCount` block
1446 aecm->noiseEstTooLowCtr[i]++;
1447 if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) {
1448 __asm __volatile(
1449 "sra %[tmp32], %[tnoise], 9 \n\t"
1450 "addi %[tnoise], %[tnoise], 1 \n\t"
1451 "addu %[tnoise], %[tnoise], %[tmp32] \n\t"
1452 : [tnoise] "+r"(tnoise), [tmp32] "=&r"(tmp32)
1453 :);
1454 aecm->noiseEstTooLowCtr[i] = 0; // Reset counter
1455 }
1456 }
1457 } else {
1458 // Avoid overflow.
1459 // Multiplication with 2049 will cause wrap around. Scale
1460 // down first and then multiply
1461 __asm __volatile(
1462 "sra %[tnoise], %[tnoise], 11 \n\t"
1463 "mul %[tnoise], %[tnoise], %[c2049] \n\t"
1464 : [tnoise] "+r"(tnoise)
1465 : [c2049] "r"(c2049)
1466 : "hi", "lo");
1467 }
1468 }
1469
1470 // Shift to the noise domain.
1471 __asm __volatile(
1472 "lh %[tmp32], 2(%[dfap]) \n\t"
1473 "lw %[tnoise1], 4(%[tmp1]) \n\t"
1474 "addiu %[dfap], %[dfap], 4 \n\t"
1475 "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t"
1476 : [tmp32] "=&r"(tmp32), [dfap] "+r"(dfap),
1477 [outLShift32] "=r"(outLShift32), [tnoise1] "=&r"(tnoise1)
1478 : [tmp1] "r"(tmp1), [shiftFromNearToNoise] "r"(shiftFromNearToNoise)
1479 : "memory");
1480
1481 if (outLShift32 < tnoise1) {
1482 // Reset "too low" counter
1483 aecm->noiseEstTooLowCtr[i + 1] = 0;
1484 // Track the minimum.
1485 if (tnoise1 < (1 << minTrackShift)) {
1486 // For small values, decrease noiseEst[i] every
1487 // `kNoiseEstIncCount` block. The regular approach below can not
1488 // go further down due to truncation.
1489 aecm->noiseEstTooHighCtr[i + 1]++;
1490 if (aecm->noiseEstTooHighCtr[i + 1] >= kNoiseEstIncCount) {
1491 tnoise1--;
1492 aecm->noiseEstTooHighCtr[i + 1] = 0; // Reset the counter
1493 }
1494 } else {
1495 __asm __volatile(
1496 "subu %[tmp32], %[tnoise1], %[outLShift32] \n\t"
1497 "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t"
1498 "subu %[tnoise1], %[tnoise1], %[tmp32] \n\t"
1499 : [tmp32] "=&r"(tmp32), [tnoise1] "+r"(tnoise1)
1500 :
1501 [outLShift32] "r"(outLShift32), [minTrackShift] "r"(minTrackShift));
1502 }
1503 } else {
1504 // Reset "too high" counter
1505 aecm->noiseEstTooHighCtr[i + 1] = 0;
1506 // Ramp slowly upwards until we hit the minimum again.
1507 if ((tnoise1 >> 19) <= 0) {
1508 if ((tnoise1 >> 11) > 0) {
1509 // Large enough for relative increase
1510 __asm __volatile(
1511 "mul %[tnoise1], %[tnoise1], %[c2049] \n\t"
1512 "sra %[tnoise1], %[tnoise1], 11 \n\t"
1513 : [tnoise1] "+r"(tnoise1)
1514 : [c2049] "r"(c2049)
1515 : "hi", "lo");
1516 } else {
1517 // Make incremental increases based on size every
1518 // `kNoiseEstIncCount` block
1519 aecm->noiseEstTooLowCtr[i + 1]++;
1520 if (aecm->noiseEstTooLowCtr[i + 1] >= kNoiseEstIncCount) {
1521 __asm __volatile(
1522 "sra %[tmp32], %[tnoise1], 9 \n\t"
1523 "addi %[tnoise1], %[tnoise1], 1 \n\t"
1524 "addu %[tnoise1], %[tnoise1], %[tmp32] \n\t"
1525 : [tnoise1] "+r"(tnoise1), [tmp32] "=&r"(tmp32)
1526 :);
1527 aecm->noiseEstTooLowCtr[i + 1] = 0; // Reset counter
1528 }
1529 }
1530 } else {
1531 // Avoid overflow.
1532 // Multiplication with 2049 will cause wrap around. Scale
1533 // down first and then multiply
1534 __asm __volatile(
1535 "sra %[tnoise1], %[tnoise1], 11 \n\t"
1536 "mul %[tnoise1], %[tnoise1], %[c2049] \n\t"
1537 : [tnoise1] "+r"(tnoise1)
1538 : [c2049] "r"(c2049)
1539 : "hi", "lo");
1540 }
1541 }
1542
1543 __asm __volatile(
1544 "lh %[tmp16], 0(%[lambdap]) \n\t"
1545 "lh %[tmp161], 2(%[lambdap]) \n\t"
1546 "sw %[tnoise], 0(%[tmp1]) \n\t"
1547 "sw %[tnoise1], 4(%[tmp1]) \n\t"
1548 "subu %[tmp16], %[c114], %[tmp16] \n\t"
1549 "subu %[tmp161], %[c114], %[tmp161] \n\t"
1550 "srav %[tmp32], %[tnoise], %[shiftFromNearToNoise] \n\t"
1551 "srav %[tmp321], %[tnoise1], %[shiftFromNearToNoise] \n\t"
1552 "addiu %[lambdap], %[lambdap], 4 \n\t"
1553 "addiu %[tmp1], %[tmp1], 8 \n\t"
1554 : [tmp16] "=&r"(tmp16), [tmp161] "=&r"(tmp161), [tmp1] "+r"(tmp1),
1555 [tmp32] "=&r"(tmp32), [tmp321] "=&r"(tmp321), [lambdap] "+r"(lambdap)
1556 : [tnoise] "r"(tnoise), [tnoise1] "r"(tnoise1), [c114] "r"(c114),
1557 [shiftFromNearToNoise] "r"(shiftFromNearToNoise)
1558 : "memory");
1559
1560 if (tmp32 > 32767) {
1561 tmp32 = 32767;
1562 aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise;
1563 }
1564 if (tmp321 > 32767) {
1565 tmp321 = 32767;
1566 aecm->noiseEst[i + 1] = tmp321 << shiftFromNearToNoise;
1567 }
1568
1569 __asm __volatile(
1570 "mul %[tmp32], %[tmp32], %[tmp16] \n\t"
1571 "mul %[tmp321], %[tmp321], %[tmp161] \n\t"
1572 "sra %[nrsh1], %[tmp32], 14 \n\t"
1573 "sra %[nrsh2], %[tmp321], 14 \n\t"
1574 : [nrsh1] "=&r"(nrsh1), [nrsh2] "=r"(nrsh2)
1575 : [tmp16] "r"(tmp16), [tmp161] "r"(tmp161), [tmp32] "r"(tmp32),
1576 [tmp321] "r"(tmp321)
1577 : "memory", "hi", "lo");
1578
1579 __asm __volatile(
1580 "lh %[tmp32], 0(%[randW16p]) \n\t"
1581 "lh %[tmp321], 2(%[randW16p]) \n\t"
1582 "addiu %[randW16p], %[randW16p], 4 \n\t"
1583 "mul %[tmp32], %[tmp32], %[c359] \n\t"
1584 "mul %[tmp321], %[tmp321], %[c359] \n\t"
1585 "sra %[tmp16], %[tmp32], 15 \n\t"
1586 "sra %[tmp161], %[tmp321], 15 \n\t"
1587 : [randW16p] "+r"(randW16p), [tmp32] "=&r"(tmp32), [tmp16] "=r"(tmp16),
1588 [tmp161] "=r"(tmp161), [tmp321] "=&r"(tmp321)
1589 : [c359] "r"(c359)
1590 : "memory", "hi", "lo");
1591
1592 #if !defined(MIPS_DSP_R1_LE)
1593 tmp32 = WebRtcAecm_kCosTable[tmp16];
1594 tmp321 = WebRtcAecm_kSinTable[tmp16];
1595 tmp322 = WebRtcAecm_kCosTable[tmp161];
1596 tmp323 = WebRtcAecm_kSinTable[tmp161];
1597 #else
1598 __asm __volatile(
1599 "sll %[tmp16], %[tmp16], 1 \n\t"
1600 "sll %[tmp161], %[tmp161], 1 \n\t"
1601 "lhx %[tmp32], %[tmp16](%[kCosTablep]) \n\t"
1602 "lhx %[tmp321], %[tmp16](%[kSinTablep]) \n\t"
1603 "lhx %[tmp322], %[tmp161](%[kCosTablep]) \n\t"
1604 "lhx %[tmp323], %[tmp161](%[kSinTablep]) \n\t"
1605 : [tmp32] "=&r"(tmp32), [tmp321] "=&r"(tmp321), [tmp322] "=&r"(tmp322),
1606 [tmp323] "=&r"(tmp323)
1607 : [kCosTablep] "r"(kCosTablep), [tmp16] "r"(tmp16),
1608 [tmp161] "r"(tmp161), [kSinTablep] "r"(kSinTablep)
1609 : "memory");
1610 #endif
1611 __asm __volatile(
1612 "mul %[tmp32], %[tmp32], %[nrsh1] \n\t"
1613 "negu %[tmp162], %[nrsh1] \n\t"
1614 "mul %[tmp322], %[tmp322], %[nrsh2] \n\t"
1615 "negu %[tmp163], %[nrsh2] \n\t"
1616 "sra %[tmp32], %[tmp32], 13 \n\t"
1617 "mul %[tmp321], %[tmp321], %[tmp162] \n\t"
1618 "sra %[tmp322], %[tmp322], 13 \n\t"
1619 "mul %[tmp323], %[tmp323], %[tmp163] \n\t"
1620 "sra %[tmp321], %[tmp321], 13 \n\t"
1621 "sra %[tmp323], %[tmp323], 13 \n\t"
1622 : [tmp32] "+r"(tmp32), [tmp321] "+r"(tmp321), [tmp162] "=&r"(tmp162),
1623 [tmp322] "+r"(tmp322), [tmp323] "+r"(tmp323), [tmp163] "=&r"(tmp163)
1624 : [nrsh1] "r"(nrsh1), [nrsh2] "r"(nrsh2)
1625 : "hi", "lo");
1626 // Tables are in Q13.
1627 uReal[i] = (int16_t)tmp32;
1628 uImag[i] = (int16_t)tmp321;
1629 uReal[i + 1] = (int16_t)tmp322;
1630 uImag[i + 1] = (int16_t)tmp323;
1631 }
1632
1633 int32_t tt, sgn;
1634 tt = out[0].real;
1635 sgn = ((int)tt) >> 31;
1636 out[0].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
1637 tt = out[0].imag;
1638 sgn = ((int)tt) >> 31;
1639 out[0].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
1640 for (i = 1; i < PART_LEN; i++) {
1641 tt = out[i].real + uReal[i];
1642 sgn = ((int)tt) >> 31;
1643 out[i].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
1644 tt = out[i].imag + uImag[i];
1645 sgn = ((int)tt) >> 31;
1646 out[i].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
1647 }
1648 tt = out[PART_LEN].real + uReal[PART_LEN];
1649 sgn = ((int)tt) >> 31;
1650 out[PART_LEN].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
1651 tt = out[PART_LEN].imag;
1652 sgn = ((int)tt) >> 31;
1653 out[PART_LEN].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
1654 }
1655
1656 } // namespace webrtc
1657