1 /******************************************************************************
2 * *
3 * Copyright (C) 2023 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 #include <string.h>
22 #include "ixheaac_type_def.h"
23 #include "ixheaace_adjust_threshold_data.h"
24 #include "iusace_cnst.h"
25 #include "iusace_block_switch_const.h"
26 #include "iusace_rom.h"
27 #include "iusace_bitbuffer.h"
28
29 /* DRC */
30 #include "impd_drc_common_enc.h"
31 #include "impd_drc_uni_drc.h"
32 #include "impd_drc_tables.h"
33 #include "impd_drc_api.h"
34 #include "impd_drc_uni_drc_eq.h"
35 #include "impd_drc_uni_drc_filter_bank.h"
36 #include "impd_drc_gain_enc.h"
37 #include "impd_drc_struct_def.h"
38
39 #include "iusace_tns_usac.h"
40 #include "iusace_psy_mod.h"
41 #include "iusace_config.h"
42 #include "iusace_fft.h"
43 #include "iusace_basic_ops_flt.h"
44 #include "ixheaac_constants.h"
45 #include "ixheaace_aac_constants.h"
46 #include "ixheaac_basic_ops32.h"
47 #include "ixheaace_common_utils.h"
48 #include "ixheaac_error_standards.h"
49 #include "ixheaace_error_codes.h"
50
51 #define DIG_REV(i, m, j) \
52 do { \
53 unsigned _ = (i); \
54 _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \
55 _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \
56 _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \
57 (j) = _ >> (m); \
58 } while (0)
59
iusace_calc_norm(WORD32 a)60 static PLATFORM_INLINE WORD8 iusace_calc_norm(WORD32 a) {
61 WORD8 norm_val;
62
63 if (a == 0) {
64 norm_val = 31;
65 } else {
66 if (a == (WORD32)0xffffffffL) {
67 norm_val = 31;
68 } else {
69 if (a < 0) {
70 a = ~a;
71 }
72 for (norm_val = 0; a < (WORD32)0x40000000L; norm_val++) {
73 a <<= 1;
74 }
75 }
76 }
77
78 return norm_val;
79 }
80
iusace_complex_3point_fft(FLOAT32 * ptr_in,FLOAT32 * ptr_out)81 static PLATFORM_INLINE VOID iusace_complex_3point_fft(FLOAT32 *ptr_in, FLOAT32 *ptr_out) {
82 FLOAT32 add_r, sub_r;
83 FLOAT32 add_i, sub_i;
84 FLOAT32 x01r, x01i, temp;
85 FLOAT32 p1, p2, p3, p4;
86 FLOAT64 sinmu;
87
88 sinmu = 0.866025403784439;
89
90 x01r = ptr_in[0] + ptr_in[2];
91 x01i = ptr_in[1] + ptr_in[3];
92
93 add_r = ptr_in[2] + ptr_in[4];
94 add_i = ptr_in[3] + ptr_in[5];
95
96 sub_r = ptr_in[2] - ptr_in[4];
97 sub_i = ptr_in[3] - ptr_in[5];
98
99 p1 = add_r / (FLOAT32)2.0;
100 p4 = add_i / (FLOAT32)2.0;
101 p2 = (FLOAT32)((FLOAT64)sub_i * sinmu);
102 p3 = (FLOAT32)((FLOAT64)sub_r * sinmu);
103
104 temp = ptr_in[0] - p1;
105
106 ptr_out[0] = x01r + ptr_in[4];
107 ptr_out[1] = x01i + ptr_in[5];
108 ptr_out[2] = temp + p2;
109 ptr_out[3] = (ptr_in[1] - p3) - p4;
110 ptr_out[4] = temp - p2;
111 ptr_out[5] = (ptr_in[1] + p3) - p4;
112
113 return;
114 }
115
iusace_complex_fft_p2(FLOAT32 * ptr_x,WORD32 nlength,FLOAT32 * scratch_fft_p2_y)116 VOID iusace_complex_fft_p2(FLOAT32 *ptr_x, WORD32 nlength, FLOAT32 *scratch_fft_p2_y) {
117 WORD32 i, j, k, n_stages, h2;
118 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
119 FLOAT32 tmp;
120 WORD32 del, nodespacing, in_loop_cnt;
121 WORD32 not_power_4;
122 WORD32 dig_rev_shift;
123 FLOAT32 *y = scratch_fft_p2_y;
124 WORD32 mpass = nlength;
125 WORD32 npoints = nlength;
126 FLOAT32 *ptr_y = y;
127 const FLOAT64 *ptr_w;
128
129 dig_rev_shift = iusace_calc_norm(mpass) + 1 - 16;
130 n_stages = 30 - iusace_calc_norm(mpass);
131 not_power_4 = n_stages & 1;
132
133 n_stages = n_stages >> 1;
134
135 ptr_w = iusace_twiddle_table_fft_32x32;
136
137 if (dig_rev_shift < 0) {
138 dig_rev_shift = 0;
139 }
140
141 for (i = 0; i < npoints; i += 4) {
142 FLOAT32 *inp = ptr_x;
143 FLOAT32 tmk;
144
145 DIG_REV(i, dig_rev_shift, h2);
146 if (not_power_4) {
147 h2 += 1;
148 h2 &= ~1;
149 }
150 inp += (h2);
151
152 x0r = *inp;
153 x0i = *(inp + 1);
154 inp += (npoints >> 1);
155
156 x1r = *inp;
157 x1i = *(inp + 1);
158 inp += (npoints >> 1);
159
160 x2r = *inp;
161 x2i = *(inp + 1);
162 inp += (npoints >> 1);
163
164 x3r = *inp;
165 x3i = *(inp + 1);
166
167 x0r = x0r + x2r;
168 x0i = x0i + x2i;
169
170 tmk = x0r - x2r;
171 x2r = tmk - x2r;
172 tmk = x0i - x2i;
173 x2i = tmk - x2i;
174
175 x1r = x1r + x3r;
176 x1i = x1i + x3i;
177
178 tmk = x1r - x3r;
179 x3r = tmk - x3r;
180 tmk = x1i - x3i;
181 x3i = tmk - x3i;
182
183 x0r = x0r + x1r;
184 x0i = x0i + x1i;
185
186 tmk = x0r - x1r;
187 x1r = tmk - x1r;
188 tmk = x0i - x1i;
189 x1i = tmk - x1i;
190
191 x2r = x2r + x3i;
192 x2i = x2i - x3r;
193
194 tmk = x2r - x3i;
195 x3i = tmk - x3i;
196 tmk = x2i + x3r;
197 x3r = tmk + x3r;
198
199 *ptr_y++ = x0r;
200 *ptr_y++ = x0i;
201 *ptr_y++ = x2r;
202 *ptr_y++ = x2i;
203 *ptr_y++ = x1r;
204 *ptr_y++ = x1i;
205 *ptr_y++ = x3i;
206 *ptr_y++ = x3r;
207 }
208 ptr_y -= 2 * npoints;
209 del = 4;
210 nodespacing = 64;
211 in_loop_cnt = npoints >> 4;
212 for (i = n_stages - 1; i > 0; i--) {
213 const FLOAT64 *twiddles = ptr_w;
214 FLOAT32 *data = ptr_y;
215 FLOAT64 w_1, w_2, w_3, w_4, w_5, w_6;
216 WORD32 sec_loop_cnt;
217
218 for (k = in_loop_cnt; k != 0; k--) {
219 x0r = (*data);
220 x0i = (*(data + 1));
221 data += ((SIZE_T)del << 1);
222
223 x1r = (*data);
224 x1i = (*(data + 1));
225 data += ((SIZE_T)del << 1);
226
227 x2r = (*data);
228 x2i = (*(data + 1));
229 data += ((SIZE_T)del << 1);
230
231 x3r = (*data);
232 x3i = (*(data + 1));
233 data -= 3 * (del << 1);
234
235 x0r = x0r + x2r;
236 x0i = x0i + x2i;
237 x2r = x0r - (x2r * 2);
238 x2i = x0i - (x2i * 2);
239 x1r = x1r + x3r;
240 x1i = x1i + x3i;
241 x3r = x1r - (x3r * 2);
242 x3i = x1i - (x3i * 2);
243
244 x0r = x0r + x1r;
245 x0i = x0i + x1i;
246 x1r = x0r - (x1r * 2);
247 x1i = x0i - (x1i * 2);
248 x2r = x2r + x3i;
249 x2i = x2i - x3r;
250 x3i = x2r - (x3i * 2);
251 x3r = x2i + (x3r * 2);
252
253 *data = x0r;
254 *(data + 1) = x0i;
255 data += ((SIZE_T)del << 1);
256
257 *data = x2r;
258 *(data + 1) = x2i;
259 data += ((SIZE_T)del << 1);
260
261 *data = x1r;
262 *(data + 1) = x1i;
263 data += ((SIZE_T)del << 1);
264
265 *data = x3i;
266 *(data + 1) = x3r;
267 data += ((SIZE_T)del << 1);
268 }
269 data = ptr_y + 2;
270
271 sec_loop_cnt = (nodespacing * del);
272 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
273 (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
274 (sec_loop_cnt / 256);
275
276 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
277 w_1 = *(twiddles + j);
278 w_4 = *(twiddles + j + 257);
279 w_2 = *(twiddles + ((SIZE_T)j << 1));
280 w_5 = *(twiddles + ((SIZE_T)j << 1) + 257);
281 w_3 = *(twiddles + j + ((SIZE_T)j << 1));
282 w_6 = *(twiddles + j + ((SIZE_T)j << 1) + 257);
283
284 for (k = in_loop_cnt; k != 0; k--) {
285 data += ((SIZE_T)del << 1);
286
287 x1r = *data;
288 x1i = *(data + 1);
289 data += ((SIZE_T)del << 1);
290
291 x2r = *data;
292 x2i = *(data + 1);
293 data += ((SIZE_T)del << 1);
294
295 x3r = *data;
296 x3i = *(data + 1);
297 data -= 3 * (del << 1);
298
299 tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_1) - ixheaace_dmult((FLOAT64)x1i, w_4));
300 x1i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1r, w_4), (FLOAT64)x1i, w_1);
301 x1r = tmp;
302
303 tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2r, w_2) - ixheaace_dmult((FLOAT64)x2i, w_5));
304 x2i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x2r, w_5), (FLOAT64)x2i, w_2);
305 x2r = tmp;
306
307 tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3r, w_3) - ixheaace_dmult((FLOAT64)x3i, w_6));
308 x3i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x3r, w_6), (FLOAT64)x3i, w_3);
309 x3r = tmp;
310
311 x0r = (*data);
312 x0i = (*(data + 1));
313
314 x0r = x0r + (x2r);
315 x0i = x0i + (x2i);
316 x2r = x0r - (x2r * 2);
317 x2i = x0i - (x2i * 2);
318 x1r = x1r + x3r;
319 x1i = x1i + x3i;
320 x3r = x1r - (x3r * 2);
321 x3i = x1i - (x3i * 2);
322
323 x0r = x0r + (x1r);
324 x0i = x0i + (x1i);
325 x1r = x0r - (x1r * 2);
326 x1i = x0i - (x1i * 2);
327 x2r = x2r + (x3i);
328 x2i = x2i - (x3r);
329 x3i = x2r - (x3i * 2);
330 x3r = x2i + (x3r * 2);
331
332 *data = x0r;
333 *(data + 1) = x0i;
334 data += ((SIZE_T)del << 1);
335
336 *data = x2r;
337 *(data + 1) = x2i;
338 data += ((SIZE_T)del << 1);
339
340 *data = x1r;
341 *(data + 1) = x1i;
342 data += ((SIZE_T)del << 1);
343
344 *data = x3i;
345 *(data + 1) = x3r;
346 data += ((SIZE_T)del << 1);
347 }
348 data -= 2 * npoints;
349 data += 2;
350 }
351 for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
352 w_1 = *(twiddles + j);
353 w_4 = *(twiddles + j + 257);
354 w_2 = *(twiddles + ((SIZE_T)j << 1));
355 w_5 = *(twiddles + ((SIZE_T)j << 1) + 257);
356 w_3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
357 w_6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
358
359 for (k = in_loop_cnt; k != 0; k--) {
360 data += ((SIZE_T)del << 1);
361
362 x1r = *data;
363 x1i = *(data + 1);
364 data += ((SIZE_T)del << 1);
365
366 x2r = *data;
367 x2i = *(data + 1);
368 data += ((SIZE_T)del << 1);
369
370 x3r = *data;
371 x3i = *(data + 1);
372 data -= 3 * (del << 1);
373
374 tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_1) - ixheaace_dmult((FLOAT64)x1i, w_4));
375 x1i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1r, w_4), (FLOAT64)x1i, w_1);
376 x1r = tmp;
377
378 tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2r, w_2) - ixheaace_dmult((FLOAT64)x2i, w_5));
379 x2i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x2r, w_5), (FLOAT64)x2i, w_2);
380 x2r = tmp;
381
382 tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3r, w_6) + ixheaace_dmult((FLOAT64)x3i, w_3));
383 x3i = (FLOAT32)(-ixheaace_dmult((FLOAT64)x3r, w_3) + ixheaace_dmult((FLOAT64)x3i, w_6));
384 x3r = tmp;
385
386 x0r = (*data);
387 x0i = (*(data + 1));
388
389 x0r = x0r + (x2r);
390 x0i = x0i + (x2i);
391 x2r = x0r - (x2r * 2);
392 x2i = x0i - (x2i * 2);
393 x1r = x1r + x3r;
394 x1i = x1i + x3i;
395 x3r = x1r - (x3r * 2);
396 x3i = x1i - (x3i * 2);
397
398 x0r = x0r + (x1r);
399 x0i = x0i + (x1i);
400 x1r = x0r - (x1r * 2);
401 x1i = x0i - (x1i * 2);
402 x2r = x2r + (x3i);
403 x2i = x2i - (x3r);
404 x3i = x2r - (x3i * 2);
405 x3r = x2i + (x3r * 2);
406
407 *data = x0r;
408 *(data + 1) = x0i;
409 data += ((SIZE_T)del << 1);
410
411 *data = x2r;
412 *(data + 1) = x2i;
413 data += ((SIZE_T)del << 1);
414
415 *data = x1r;
416 *(data + 1) = x1i;
417 data += ((SIZE_T)del << 1);
418
419 *data = x3i;
420 *(data + 1) = x3r;
421 data += ((SIZE_T)del << 1);
422 }
423 data -= 2 * npoints;
424 data += 2;
425 }
426 for (; j <= sec_loop_cnt * 2; j += nodespacing) {
427 w_1 = *(twiddles + j);
428 w_4 = *(twiddles + j + 257);
429 w_2 = *(twiddles + ((SIZE_T)j << 1) - 256);
430 w_5 = *(twiddles + ((SIZE_T)j << 1) + 1);
431 w_3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
432 w_6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
433
434 for (k = in_loop_cnt; k != 0; k--) {
435 data += ((SIZE_T)del << 1);
436
437 x1r = *data;
438 x1i = *(data + 1);
439 data += ((SIZE_T)del << 1);
440
441 x2r = *data;
442 x2i = *(data + 1);
443 data += ((SIZE_T)del << 1);
444
445 x3r = *data;
446 x3i = *(data + 1);
447 data -= 3 * (del << 1);
448
449 tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_1) - ixheaace_dmult((FLOAT64)x1i, w_4));
450 x1i = (FLOAT32)ixheaace_dmac(ixheaace_dmult(x1r, w_4), x1i, w_1);
451 x1r = tmp;
452
453 tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2r, w_5) + ixheaace_dmult((FLOAT64)x2i, w_2));
454 x2i = (FLOAT32)(-ixheaace_dmult(x2r, w_2) + ixheaace_dmult(x2i, w_5));
455 x2r = tmp;
456
457 tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3r, w_6) + ixheaace_dmult((FLOAT64)x3i, w_3));
458 x3i = (FLOAT32)(-ixheaace_dmult((FLOAT64)x3r, w_3) + ixheaace_dmult((FLOAT64)x3i, w_6));
459 x3r = tmp;
460
461 x0r = (*data);
462 x0i = (*(data + 1));
463
464 x0r = x0r + (x2r);
465 x0i = x0i + (x2i);
466 x2r = x0r - (x2r * 2);
467 x2i = x0i - (x2i * 2);
468 x1r = x1r + x3r;
469 x1i = x1i + x3i;
470 x3r = x1r - (x3r * 2);
471 x3i = x1i - (x3i * 2);
472
473 x0r = x0r + (x1r);
474 x0i = x0i + (x1i);
475 x1r = x0r - (x1r * 2);
476 x1i = x0i - (x1i * 2);
477 x2r = x2r + (x3i);
478 x2i = x2i - (x3r);
479 x3i = x2r - (x3i * 2);
480 x3r = x2i + (x3r * 2);
481
482 *data = x0r;
483 *(data + 1) = x0i;
484 data += ((SIZE_T)del << 1);
485
486 *data = x2r;
487 *(data + 1) = x2i;
488 data += ((SIZE_T)del << 1);
489
490 *data = x1r;
491 *(data + 1) = x1i;
492 data += ((SIZE_T)del << 1);
493
494 *data = x3i;
495 *(data + 1) = x3r;
496 data += ((SIZE_T)del << 1);
497 }
498 data -= 2 * npoints;
499 data += 2;
500 }
501 for (; j < nodespacing * del; j += nodespacing) {
502 w_1 = *(twiddles + j);
503 w_4 = *(twiddles + j + 257);
504 w_2 = *(twiddles + ((SIZE_T)j << 1) - 256);
505 w_5 = *(twiddles + ((SIZE_T)j << 1) + 1);
506 w_3 = *(twiddles + j + ((SIZE_T)j << 1) - 512);
507 w_6 = *(twiddles + j + ((SIZE_T)j << 1) - 512 + 257);
508
509 for (k = in_loop_cnt; k != 0; k--) {
510 data += ((SIZE_T)del << 1);
511
512 x1r = *data;
513 x1i = *(data + 1);
514 data += ((SIZE_T)del << 1);
515
516 x2r = *data;
517 x2i = *(data + 1);
518 data += ((SIZE_T)del << 1);
519
520 x3r = *data;
521 x3i = *(data + 1);
522 data -= 3 * ((SIZE_T)del << 1);
523
524 tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_1) - ixheaace_dmult((FLOAT64)x1i, w_4));
525 x1i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1r, w_4), (FLOAT64)x1i, w_1);
526 x1r = tmp;
527
528 tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2r, w_5) + ixheaace_dmult((FLOAT64)x2i, w_2));
529 x2i = (FLOAT32)(-ixheaace_dmult((FLOAT64)x2r, w_2) + ixheaace_dmult((FLOAT64)x2i, w_5));
530 x2r = tmp;
531
532 tmp = (FLOAT32)(-ixheaace_dmult((FLOAT64)x3r, w_3) + ixheaace_dmult((FLOAT64)x3i, w_6));
533 x3i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x3r, w_6), (FLOAT64)x3i, w_3);
534 x3r = tmp;
535
536 x0r = (*data);
537 x0i = (*(data + 1));
538
539 x0r = x0r + (x2r);
540 x0i = x0i + (x2i);
541 x2r = x0r - (x2r * 2);
542 x2i = x0i - (x2i * 2);
543 x1r = x1r + x3r;
544 x1i = x1i - x3i;
545 x3r = x1r - (x3r * 2);
546 x3i = x1i + (x3i * 2);
547
548 x0r = x0r + (x1r);
549 x0i = x0i + (x1i);
550 x1r = x0r - (x1r * 2);
551 x1i = x0i - (x1i * 2);
552 x2r = x2r + (x3i);
553 x2i = x2i - (x3r);
554 x3i = x2r - (x3i * 2);
555 x3r = x2i + (x3r * 2);
556
557 *data = x0r;
558 *(data + 1) = x0i;
559 data += ((SIZE_T)del << 1);
560
561 *data = x2r;
562 *(data + 1) = x2i;
563 data += ((SIZE_T)del << 1);
564
565 *data = x1r;
566 *(data + 1) = x1i;
567 data += ((SIZE_T)del << 1);
568
569 *data = x3i;
570 *(data + 1) = x3r;
571 data += ((SIZE_T)del << 1);
572 }
573 data -= 2 * npoints;
574 data += 2;
575 }
576 nodespacing >>= 2;
577 del <<= 2;
578 in_loop_cnt >>= 2;
579 }
580 if (not_power_4) {
581 const FLOAT64 *twiddles = ptr_w;
582 nodespacing <<= 1;
583
584 for (j = del / 2; j != 0; j--) {
585 FLOAT64 w_1 = *twiddles;
586 FLOAT64 w_4 = *(twiddles + 257);
587 twiddles += nodespacing;
588
589 x0r = *ptr_y;
590 x0i = *(ptr_y + 1);
591 ptr_y += ((SIZE_T)del << 1);
592
593 x1r = *ptr_y;
594 x1i = *(ptr_y + 1);
595
596 tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_1) - ixheaace_dmult((FLOAT64)x1i, w_4));
597 x1i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1r, w_4), (FLOAT64)x1i, w_1);
598 x1r = tmp;
599
600 *ptr_y = (x0r) - (x1r);
601 *(ptr_y + 1) = (x0i) - (x1i);
602 ptr_y -= ((SIZE_T)del << 1);
603
604 *ptr_y = (x0r) + (x1r);
605 *(ptr_y + 1) = (x0i) + (x1i);
606 ptr_y += 2;
607 }
608 twiddles = ptr_w;
609 for (j = del / 2; j != 0; j--) {
610 FLOAT64 w_1 = *twiddles;
611 FLOAT64 w_4 = *(twiddles + 257);
612 twiddles += nodespacing;
613
614 x0r = *ptr_y;
615 x0i = *(ptr_y + 1);
616 ptr_y += ((SIZE_T)del << 1);
617
618 x1r = *ptr_y;
619 x1i = *(ptr_y + 1);
620
621 tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_4) + ixheaace_dmult((FLOAT64)x1i, w_1));
622 x1i = (FLOAT32)(-ixheaace_dmult((FLOAT64)x1r, w_1) + ixheaace_dmult((FLOAT64)x1i, w_4));
623 x1r = tmp;
624
625 *ptr_y = (x0r) - (x1r);
626 *(ptr_y + 1) = (x0i) - (x1i);
627 ptr_y -= ((SIZE_T)del << 1);
628
629 *ptr_y = (x0r) + (x1r);
630 *(ptr_y + 1) = (x0i) + (x1i);
631 ptr_y += 2;
632 }
633 }
634
635 for (i = 0; i < nlength; i++) {
636 *(ptr_x + 2 * i) = y[2 * i];
637 *(ptr_x + 2 * i + 1) = y[2 * i + 1];
638 }
639 }
640
iusace_complex_fft_p3(FLOAT32 * data,WORD32 nlength,iusace_scratch_mem * pstr_scratch)641 static VOID iusace_complex_fft_p3(FLOAT32 *data, WORD32 nlength,
642 iusace_scratch_mem *pstr_scratch) {
643 WORD32 i, j;
644 FLOAT32 *data_3 = pstr_scratch->p_fft_p3_data_3;
645 FLOAT32 *y = pstr_scratch->p_fft_p3_y;
646 WORD32 cnfac;
647 WORD32 mpass = nlength;
648 FLOAT32 *ptr_x = data;
649 FLOAT32 *ptr_y = y;
650
651 cnfac = 0;
652 while (mpass % 3 == 0) {
653 mpass /= 3;
654 cnfac++;
655 }
656
657 for (i = 0; i < 3 * cnfac; i++) {
658 for (j = 0; j < mpass; j++) {
659 data_3[2 * j] = data[3 * (2 * j) + (2 * i)];
660 data_3[2 * j + 1] = data[3 * (2 * j) + 1 + (2 * i)];
661 }
662 iusace_complex_fft_p2(data_3, mpass, pstr_scratch->p_fft_p2_y);
663
664 for (j = 0; j < mpass; j++) {
665 data[3 * (2 * j) + (2 * i)] = data_3[2 * j];
666 data[3 * (2 * j) + 1 + (2 * i)] = data_3[2 * j + 1];
667 }
668 }
669
670 {
671 const FLOAT64 *w1r, *w1i;
672 FLOAT32 tmp;
673 w1r = iusace_twiddle_table_3pr;
674 w1i = iusace_twiddle_table_3pi;
675
676 for (i = 0; i < nlength; i += 3) {
677 tmp = (FLOAT32)((FLOAT64)data[2 * i] * (*w1r) - (FLOAT64)data[2 * i + 1] * (*w1i));
678 data[2 * i + 1] =
679 (FLOAT32)((FLOAT64)data[2 * i] * (*w1i) + (FLOAT64)data[2 * i + 1] * (*w1r));
680 data[2 * i] = tmp;
681
682 w1r++;
683 w1i++;
684
685 tmp = (FLOAT32)((FLOAT64)data[2 * (i + 1)] * (*w1r) -
686 (FLOAT64)data[2 * (i + 1) + 1] * (*w1i));
687 data[2 * (i + 1) + 1] = (FLOAT32)((FLOAT64)data[2 * (i + 1)] * (*w1i) +
688 (FLOAT64)data[2 * (i + 1) + 1] * (*w1r));
689 data[2 * (i + 1)] = tmp;
690
691 w1r++;
692 w1i++;
693
694 tmp = (FLOAT32)((FLOAT64)data[2 * (i + 2)] * (*w1r) -
695 (FLOAT64)data[2 * (i + 2) + 1] * (*w1i));
696 data[2 * (i + 2) + 1] = (FLOAT32)((FLOAT64)data[2 * (i + 2)] * (*w1i) +
697 (FLOAT64)data[2 * (i + 2) + 1] * (*w1r));
698 data[2 * (i + 2)] = tmp;
699
700 w1r += 3 * (128 / mpass - 1) + 1;
701 w1i += 3 * (128 / mpass - 1) + 1;
702 }
703 }
704
705 for (i = 0; i < mpass; i++) {
706 iusace_complex_3point_fft(ptr_x, ptr_y);
707
708 ptr_x = ptr_x + 6;
709 ptr_y = ptr_y + 6;
710 }
711
712 for (i = 0; i < mpass; i++) {
713 data[2 * i] = y[6 * i];
714 data[2 * i + 1] = y[6 * i + 1];
715 }
716
717 for (i = 0; i < mpass; i++) {
718 data[2 * (i + mpass)] = y[6 * i + 2];
719 data[2 * (i + mpass) + 1] = y[6 * i + 3];
720 }
721
722 for (i = 0; i < mpass; i++) {
723 data[2 * (i + 2 * mpass)] = y[6 * i + 4];
724 data[2 * (i + 2 * mpass) + 1] = y[6 * i + 5];
725 }
726 }
727
iusace_complex_fft_p3_no_scratch(FLOAT32 * data,WORD32 nlength)728 VOID iusace_complex_fft_p3_no_scratch(FLOAT32 *data, WORD32 nlength) {
729 WORD32 i, j;
730
731 FLOAT32 data_3[800];
732 FLOAT32 y[1024];
733 FLOAT32 p_fft_p2_y[2048];
734 WORD32 cnfac;
735 WORD32 mpass = nlength;
736 FLOAT32 *ptr_x = data;
737 FLOAT32 *ptr_y = y;
738
739 cnfac = 0;
740 while (mpass % 3 == 0) {
741 mpass /= 3;
742 cnfac++;
743 }
744
745 for (i = 0; i < 3 * cnfac; i++) {
746 for (j = 0; j < mpass; j++) {
747 data_3[2 * j] = data[3 * (2 * j) + (2 * i)];
748 data_3[2 * j + 1] = data[3 * (2 * j) + 1 + (2 * i)];
749 }
750 iusace_complex_fft_p2(data_3, mpass, p_fft_p2_y);
751
752 for (j = 0; j < mpass; j++) {
753 data[3 * (2 * j) + (2 * i)] = data_3[2 * j];
754 data[3 * (2 * j) + 1 + (2 * i)] = data_3[2 * j + 1];
755 }
756 }
757
758 {
759 const FLOAT64 *w1r, *w1i;
760 FLOAT32 tmp;
761 w1r = iusace_twiddle_table_3pr;
762 w1i = iusace_twiddle_table_3pi;
763
764 for (i = 0; i < nlength; i += 3) {
765 tmp = (FLOAT32)((FLOAT64)data[2 * i] * (*w1r) - (FLOAT64)data[2 * i + 1] * (*w1i));
766 data[2 * i + 1] =
767 (FLOAT32)((FLOAT64)data[2 * i] * (*w1i) + (FLOAT64)data[2 * i + 1] * (*w1r));
768 data[2 * i] = tmp;
769
770 w1r++;
771 w1i++;
772
773 tmp = (FLOAT32)((FLOAT64)data[2 * (i + 1)] * (*w1r) -
774 (FLOAT64)data[2 * (i + 1) + 1] * (*w1i));
775 data[2 * (i + 1) + 1] = (FLOAT32)((FLOAT64)data[2 * (i + 1)] * (*w1i) +
776 (FLOAT64)data[2 * (i + 1) + 1] * (*w1r));
777 data[2 * (i + 1)] = tmp;
778
779 w1r++;
780 w1i++;
781
782 tmp = (FLOAT32)((FLOAT64)data[2 * (i + 2)] * (*w1r) -
783 (FLOAT64)data[2 * (i + 2) + 1] * (*w1i));
784 data[2 * (i + 2) + 1] = (FLOAT32)((FLOAT64)data[2 * (i + 2)] * (*w1i) +
785 (FLOAT64)data[2 * (i + 2) + 1] * (*w1r));
786 data[2 * (i + 2)] = tmp;
787
788 w1r += 3 * (128 / mpass - 1) + 1;
789 w1i += 3 * (128 / mpass - 1) + 1;
790 }
791 }
792
793 for (i = 0; i < mpass; i++) {
794 iusace_complex_3point_fft(ptr_x, ptr_y);
795
796 ptr_x = ptr_x + 6;
797 ptr_y = ptr_y + 6;
798 }
799
800 for (i = 0; i < mpass; i++) {
801 data[2 * i] = y[6 * i];
802 data[2 * i + 1] = y[6 * i + 1];
803 }
804
805 for (i = 0; i < mpass; i++) {
806 data[2 * (i + mpass)] = y[6 * i + 2];
807 data[2 * (i + mpass) + 1] = y[6 * i + 3];
808 }
809
810 for (i = 0; i < mpass; i++) {
811 data[2 * (i + 2 * mpass)] = y[6 * i + 4];
812 data[2 * (i + 2 * mpass) + 1] = y[6 * i + 5];
813 }
814 }
815
iusace_calc_pre_twid_enc(FLOAT64 * ptr_in,FLOAT32 * fft_ptr,WORD32 npoints,const FLOAT64 * cos_ptr,const FLOAT64 * sin_ptr,const WORD32 tx_flag)816 static VOID iusace_calc_pre_twid_enc(FLOAT64 *ptr_in, FLOAT32 *fft_ptr, WORD32 npoints,
817 const FLOAT64 *cos_ptr, const FLOAT64 *sin_ptr,
818 const WORD32 tx_flag) {
819 WORD32 i, n;
820 WORD32 b = npoints >> 1;
821 WORD32 a = npoints - b;
822 WORD32 nlength = npoints >> 2;
823 FLOAT64 tempr, tempi;
824
825 if (tx_flag == 0) {
826 FLOAT64 norm;
827 for (i = 0; i < b; i++) {
828 norm = ptr_in[i]; /* reuse MDCT: spectrally reverse all bins */
829 ptr_in[i] = ptr_in[npoints - 1 - i];
830 ptr_in[npoints - 1 - i] = norm;
831 }
832 }
833 for (i = 0; i < nlength; i++) {
834 n = npoints / 2 - 1 - 2 * i;
835 if (i < b / 4) {
836 tempr = ptr_in[a / 2 + n] + ptr_in[npoints + a / 2 - 1 - n];
837 } else {
838 tempr = ptr_in[a / 2 + n] - ptr_in[a / 2 - 1 - n];
839 }
840 n = 2 * i;
841 if (i < a / 4) {
842 tempi = ptr_in[a / 2 + n] - ptr_in[a / 2 - 1 - n];
843 } else {
844 tempi = ptr_in[a / 2 + n] + ptr_in[npoints + a / 2 - 1 - n];
845 }
846
847 fft_ptr[2 * i] = (FLOAT32)(tempr * (*cos_ptr) + tempi * (*sin_ptr));
848 fft_ptr[2 * i + 1] = (FLOAT32)(tempi * (*cos_ptr++) - tempr * (*sin_ptr++));
849 }
850 }
851
iusace_complex_fft(FLOAT32 * data,WORD32 nlength,iusace_scratch_mem * pstr_scratch)852 VOID iusace_complex_fft(FLOAT32 *data, WORD32 nlength, iusace_scratch_mem *pstr_scratch) {
853 if (nlength & (nlength - 1)) {
854 iusace_complex_fft_p3(data, nlength, pstr_scratch);
855 } else {
856 iusace_complex_fft_p2(data, nlength, pstr_scratch->p_fft_p2_y);
857 }
858 }
859
iusace_calc_post_twid_enc(FLOAT64 * ptr_out,FLOAT32 * fft_ptr,WORD32 npoints,const FLOAT64 * cos_ptr,const FLOAT64 * sin_ptr,const WORD32 tx_flag)860 static VOID iusace_calc_post_twid_enc(FLOAT64 *ptr_out, FLOAT32 *fft_ptr, WORD32 npoints,
861 const FLOAT64 *cos_ptr, const FLOAT64 *sin_ptr,
862 const WORD32 tx_flag) {
863 WORD32 i;
864 WORD32 nlength = npoints >> 2;
865 FLOAT64 tempr, tempi;
866
867 /* post-twiddle FFT output and then get output data */
868 for (i = 0; i < nlength; i++) {
869 tempr =
870 2 * ((FLOAT64)(fft_ptr[2 * i]) * (*cos_ptr) + (FLOAT64)(fft_ptr[2 * i + 1]) * (*sin_ptr));
871 tempi = 2 * ((FLOAT64)(fft_ptr[2 * i + 1]) * (*cos_ptr++) -
872 (FLOAT64)(fft_ptr[2 * i]) * (*sin_ptr++));
873
874 ptr_out[2 * i] = -tempr;
875 ptr_out[npoints / 2 - 1 - 2 * i] = tempi;
876 ptr_out[npoints / 2 + 2 * i] = -tempi;
877 ptr_out[npoints - 1 - 2 * i] = tempr;
878 }
879 if (tx_flag == 0) {
880 for (i = 0; i < npoints; i += 2) {
881 ptr_out[i] *= -1; /* reuse MDCT: flip signs at odd indices */
882 }
883 }
884 }
885
iusace_fft_based_mdct(FLOAT64 * ptr_in,FLOAT64 * ptr_out,WORD32 npoints,const WORD32 tx_flag,iusace_scratch_mem * pstr_scratch)886 IA_ERRORCODE iusace_fft_based_mdct(FLOAT64 *ptr_in, FLOAT64 *ptr_out, WORD32 npoints,
887 const WORD32 tx_flag, iusace_scratch_mem *pstr_scratch) {
888 FLOAT32 *ptr_scratch1 = pstr_scratch->p_fft_mdct_buf;
889 const FLOAT64 *cos_ptr = NULL;
890 const FLOAT64 *sin_ptr = NULL;
891 WORD32 nlength = npoints >> 1;
892 WORD32 n_total = npoints << 1;
893
894 memset(ptr_scratch1, 0, ((SIZE_T)n_total << 1) * sizeof(*ptr_scratch1));
895
896 switch (npoints) {
897 case (96):
898 cos_ptr = iexheaac_pre_post_twid_cos_192;
899 sin_ptr = iexheaac_pre_post_twid_sin_192;
900 break;
901 case (128):
902 cos_ptr = iusace_pre_post_twid_cos_256;
903 sin_ptr = iusace_pre_post_twid_sin_256;
904 break;
905 case (768):
906 cos_ptr = iexheaac_pre_post_twid_cos_1536;
907 sin_ptr = iexheaac_pre_post_twid_sin_1536;
908 break;
909 case (1024):
910 cos_ptr = iusace_pre_post_twid_cos_2048;
911 sin_ptr = iusace_pre_post_twid_sin_2048;
912 break;
913 default:
914 return IA_EXHEAACE_EXE_FATAL_USAC_INVALID_WINDOW_LENGTH;
915 }
916
917 /* pre-twiddle */
918 iusace_calc_pre_twid_enc(ptr_in, ptr_scratch1, npoints << 1, cos_ptr, sin_ptr, tx_flag);
919
920 /* complex FFT */
921 iusace_complex_fft(ptr_scratch1, nlength, pstr_scratch);
922
923 /* post-twiddle */
924 iusace_calc_post_twid_enc(ptr_out, ptr_scratch1, npoints << 1, cos_ptr, sin_ptr, tx_flag);
925
926 return IA_NO_ERROR;
927 }
928
iusace_complex_fft_2048(FLOAT32 * ptr_x,FLOAT32 * scratch_fft)929 VOID iusace_complex_fft_2048(FLOAT32 *ptr_x, FLOAT32 *scratch_fft) {
930 WORD32 i;
931 FLOAT32 re, im, c_v, s_v, tmp_re, tmp_im;
932 FLOAT32 *ptr_re, *ptr_im, *ptr_re_h, *ptr_im_h;
933 FLOAT32 *ptr_cos_val, *ptr_sin_val;
934 iusace_complex_fft_p2(ptr_x, 1024, scratch_fft);
935 iusace_complex_fft_p2(ptr_x + 2048, 1024, scratch_fft);
936
937 ptr_re = ptr_x;
938 ptr_im = ptr_x + 1;
939 ptr_re_h = ptr_x + 2048;
940 ptr_im_h = ptr_x + 2048 + 1;
941 ptr_cos_val = (FLOAT32 *)&iusace_twiddle_cos_2048[0];
942 ptr_sin_val = (FLOAT32 *)&iusace_twiddle_sin_2048[0];
943 for (i = 0; i < 1024; i++) {
944 re = *ptr_re_h;
945 im = *ptr_im_h;
946 c_v = ptr_cos_val[i];
947 s_v = ptr_sin_val[i];
948 tmp_re = (re * c_v) + (im * s_v);
949 tmp_im = -(re * s_v) + (im * c_v);
950 re = *ptr_re;
951 im = *ptr_im;
952
953 *ptr_re = re + tmp_re;
954 *ptr_im = im + tmp_im;
955 *ptr_re_h = re - tmp_re;
956 *ptr_im_h = im - tmp_im;
957
958 ptr_re += 2;
959 ptr_im += 2;
960 ptr_re_h += 2;
961 ptr_im_h += 2;
962 }
963 }
ixheaace_rad2_cplx_fft(FLOAT32 * ptr_real,FLOAT32 * ptr_imag,WORD32 n_points,FLOAT32 * ptr_scratch)964 static VOID ixheaace_rad2_cplx_fft(FLOAT32 *ptr_real, FLOAT32 *ptr_imag, WORD32 n_points,
965 FLOAT32 *ptr_scratch) {
966 WORD32 i, j, k, n_stages, h2;
967 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
968 WORD32 del, nodespacing, in_loop_cnt;
969 WORD32 not_power_4;
970 WORD32 dig_rev_shift;
971 WORD32 m_points = n_points;
972 FLOAT32 *ptr_x = ptr_scratch;
973 FLOAT32 *y = ptr_scratch + 2048;
974 FLOAT32 *ptr_y = y;
975 const FLOAT32 *ptr_w;
976
977 dig_rev_shift = ixheaac_norm32(m_points) + 1 - 16;
978 n_stages = 30 - ixheaac_norm32(m_points);
979 not_power_4 = n_stages & 1;
980
981 n_stages = n_stages >> 1;
982
983 ptr_w = ia_fft_twiddle_table_float;
984
985 for (i = 0; i < n_points; i++) {
986 ptr_x[2 * i] = ptr_real[i];
987 ptr_x[2 * i + 1] = ptr_imag[i];
988 }
989 dig_rev_shift = max(dig_rev_shift, 0);
990 for (i = 0; i < n_points; i += 4) {
991 FLOAT32 *inp = ptr_x;
992 FLOAT32 tmk;
993
994 DIG_REV(i, dig_rev_shift, h2);
995 if (not_power_4) {
996 h2 += 1;
997 h2 &= ~1;
998 }
999 inp += (h2);
1000
1001 x0r = *inp;
1002 x0i = *(inp + 1);
1003 inp += (n_points >> 1);
1004
1005 x1r = *inp;
1006 x1i = *(inp + 1);
1007 inp += (n_points >> 1);
1008
1009 x2r = *inp;
1010 x2i = *(inp + 1);
1011 inp += (n_points >> 1);
1012
1013 x3r = *inp;
1014 x3i = *(inp + 1);
1015
1016 x0r = ia_add_flt(x0r, x2r);
1017 x0i = ia_add_flt(x0i, x2i);
1018
1019 tmk = ia_sub_flt(x0r, x2r);
1020 x2r = ia_sub_flt(tmk, x2r);
1021 tmk = ia_sub_flt(x0i, x2i);
1022 x2i = ia_sub_flt(tmk, x2i);
1023
1024 x1r = ia_add_flt(x1r, x3r);
1025 x1i = ia_add_flt(x1i, x3i);
1026
1027 tmk = ia_sub_flt(x1r, x3r);
1028 x3r = ia_sub_flt(tmk, x3r);
1029 tmk = ia_sub_flt(x1i, x3i);
1030 x3i = ia_sub_flt(tmk, x3i);
1031
1032 x0r = ia_add_flt(x0r, x1r);
1033 x0i = ia_add_flt(x0i, x1i);
1034
1035 tmk = ia_sub_flt(x0r, x1r);
1036 x1r = ia_sub_flt(tmk, x1r);
1037 tmk = ia_sub_flt(x0i, x1i);
1038 x1i = ia_sub_flt(tmk, x1i);
1039
1040 x2r = ia_add_flt(x2r, x3i);
1041 x2i = ia_sub_flt(x2i, x3r);
1042
1043 tmk = ia_sub_flt(x2r, x3i);
1044 x3i = ia_sub_flt(tmk, x3i);
1045 tmk = ia_add_flt(x2i, x3r);
1046 x3r = ia_add_flt(tmk, x3r);
1047
1048 *ptr_y++ = x0r;
1049 *ptr_y++ = x0i;
1050 *ptr_y++ = x2r;
1051 *ptr_y++ = x2i;
1052 *ptr_y++ = x1r;
1053 *ptr_y++ = x1i;
1054 *ptr_y++ = x3i;
1055 *ptr_y++ = x3r;
1056 }
1057 ptr_y -= 2 * n_points;
1058 del = 4;
1059 nodespacing = 64;
1060 in_loop_cnt = n_points >> 4;
1061 for (i = n_stages - 1; i > 0; i--) {
1062 const FLOAT32 *twiddles = ptr_w;
1063 FLOAT32 *data = ptr_y;
1064 FLOAT32 w_1, w_2, w_3, w_4, w_5, w_6;
1065 WORD32 sec_loop_cnt;
1066
1067 for (k = in_loop_cnt; k != 0; k--) {
1068 x0r = (*data);
1069 x0i = (*(data + 1));
1070 data += ((SIZE_T)del << 1);
1071
1072 x1r = (*data);
1073 x1i = (*(data + 1));
1074 data += ((SIZE_T)del << 1);
1075
1076 x2r = (*data);
1077 x2i = (*(data + 1));
1078 data += ((SIZE_T)del << 1);
1079
1080 x3r = (*data);
1081 x3i = (*(data + 1));
1082 data -= 3 * (del << 1);
1083
1084 x0r = ia_add_flt(x0r, x2r);
1085 x0i = ia_add_flt(x0i, x2i);
1086 x2r = ia_msu_flt(x0r, x2r, 2);
1087 x2i = ia_msu_flt(x0i, x2i, 2);
1088 x1r = ia_add_flt(x1r, x3r);
1089 x1i = ia_add_flt(x1i, x3i);
1090 x3r = ia_msu_flt(x1r, x3r, 2);
1091 x3i = ia_msu_flt(x1i, x3i, 2);
1092
1093 x0r = ia_add_flt(x0r, x1r);
1094 x0i = ia_add_flt(x0i, x1i);
1095 x1r = ia_msu_flt(x0r, x1r, 2);
1096 x1i = ia_msu_flt(x0i, x1i, 2);
1097 x2r = ia_add_flt(x2r, x3i);
1098 x2i = ia_sub_flt(x2i, x3r);
1099 x3i = ia_msu_flt(x2r, x3i, 2);
1100 x3r = ia_mac_flt(x2i, x3r, 2);
1101
1102 *data = x0r;
1103 *(data + 1) = x0i;
1104 data += ((SIZE_T)del << 1);
1105
1106 *data = x2r;
1107 *(data + 1) = x2i;
1108 data += ((SIZE_T)del << 1);
1109
1110 *data = x1r;
1111 *(data + 1) = x1i;
1112 data += ((SIZE_T)del << 1);
1113
1114 *data = x3i;
1115 *(data + 1) = x3r;
1116 data += ((SIZE_T)del << 1);
1117 }
1118 data = ptr_y + 2;
1119
1120 sec_loop_cnt = (nodespacing * del);
1121 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
1122 (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
1123 (sec_loop_cnt / 256);
1124
1125 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
1126 w_1 = *(twiddles + j);
1127 w_4 = *(twiddles + j + 257);
1128 w_2 = *(twiddles + ((SIZE_T)j << 1));
1129 w_5 = *(twiddles + ((SIZE_T)j << 1) + 257);
1130 w_3 = *(twiddles + j + ((SIZE_T)j << 1));
1131 w_6 = *(twiddles + j + ((SIZE_T)j << 1) + 257);
1132
1133 for (k = in_loop_cnt; k != 0; k--) {
1134 FLOAT32 tmp;
1135 /*x0 is loaded later to avoid register crunch*/
1136
1137 data += ((SIZE_T)del << 1);
1138
1139 x1r = *data;
1140 x1i = *(data + 1);
1141 data += ((SIZE_T)del << 1);
1142
1143 x2r = *data;
1144 x2i = *(data + 1);
1145 data += ((SIZE_T)del << 1);
1146
1147 x3r = *data;
1148 x3i = *(data + 1);
1149 data -= 3 * (del << 1);
1150
1151 tmp = ia_sub_flt(ia_mul_flt(x1r, w_1), ia_mul_flt(x1i, w_4));
1152 x1i = ia_mac_flt(ia_mul_flt(x1r, w_4), x1i, w_1);
1153 x1r = tmp;
1154
1155 tmp = ia_sub_flt(ia_mul_flt(x2r, w_2), ia_mul_flt(x2i, w_5));
1156 x2i = ia_mac_flt(ia_mul_flt(x2r, w_5), x2i, w_2);
1157 x2r = tmp;
1158
1159 tmp = ia_sub_flt(ia_mul_flt(x3r, w_3), ia_mul_flt(x3i, w_6));
1160 x3i = ia_mac_flt(ia_mul_flt(x3r, w_6), x3i, w_3);
1161 x3r = tmp;
1162
1163 x0r = (*data);
1164 x0i = (*(data + 1));
1165
1166 x0r = ia_add_flt(x0r, (x2r));
1167 x0i = ia_add_flt(x0i, (x2i));
1168 x2r = ia_msu_flt(x0r, x2r, 2);
1169 x2i = ia_msu_flt(x0i, x2i, 2);
1170 x1r = ia_add_flt(x1r, x3r);
1171 x1i = ia_add_flt(x1i, x3i);
1172 x3r = ia_msu_flt(x1r, x3r, 2);
1173 x3i = ia_msu_flt(x1i, x3i, 2);
1174
1175 x0r = ia_add_flt(x0r, (x1r));
1176 x0i = ia_add_flt(x0i, (x1i));
1177 x1r = ia_msu_flt(x0r, x1r, 2);
1178 x1i = ia_msu_flt(x0i, x1i, 2);
1179 x2r = ia_add_flt(x2r, (x3i));
1180 x2i = ia_sub_flt(x2i, (x3r));
1181 x3i = ia_msu_flt(x2r, x3i, 2);
1182 x3r = ia_mac_flt(x2i, x3r, 2);
1183
1184 *data = x0r;
1185 *(data + 1) = x0i;
1186 data += ((SIZE_T)del << 1);
1187
1188 *data = x2r;
1189 *(data + 1) = x2i;
1190 data += ((SIZE_T)del << 1);
1191
1192 *data = x1r;
1193 *(data + 1) = x1i;
1194 data += ((SIZE_T)del << 1);
1195
1196 *data = x3i;
1197 *(data + 1) = x3r;
1198 data += ((SIZE_T)del << 1);
1199 }
1200 data -= 2 * n_points;
1201 data += 2;
1202 }
1203 for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
1204 w_1 = *(twiddles + j);
1205 w_4 = *(twiddles + j + 257);
1206 w_2 = *(twiddles + ((SIZE_T)j << 1));
1207 w_5 = *(twiddles + ((SIZE_T)j << 1) + 257);
1208 w_3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
1209 w_6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
1210
1211 for (k = in_loop_cnt; k != 0; k--) {
1212 FLOAT32 tmp;
1213 /*x0 is loaded later to avoid register crunch*/
1214
1215 data += ((SIZE_T)del << 1);
1216
1217 x1r = *data;
1218 x1i = *(data + 1);
1219 data += ((SIZE_T)del << 1);
1220
1221 x2r = *data;
1222 x2i = *(data + 1);
1223 data += ((SIZE_T)del << 1);
1224
1225 x3r = *data;
1226 x3i = *(data + 1);
1227 data -= 3 * (del << 1);
1228
1229 tmp = ia_sub_flt(ia_mul_flt(x1r, w_1), ia_mul_flt(x1i, w_4));
1230 x1i = ia_mac_flt(ia_mul_flt(x1r, w_4), x1i, w_1);
1231 x1r = tmp;
1232
1233 tmp = ia_sub_flt(ia_mul_flt(x2r, w_2), ia_mul_flt(x2i, w_5));
1234 x2i = ia_mac_flt(ia_mul_flt(x2r, w_5), x2i, w_2);
1235 x2r = tmp;
1236
1237 tmp = ia_add_flt(ia_mul_flt(x3r, w_6), ia_mul_flt(x3i, w_3));
1238 x3i = ia_add_flt(ia_negate_flt(ia_mul_flt(x3r, w_3)), ia_mul_flt(x3i, w_6));
1239 x3r = tmp;
1240
1241 x0r = (*data);
1242 x0i = (*(data + 1));
1243
1244 x0r = ia_add_flt(x0r, (x2r));
1245 x0i = ia_add_flt(x0i, (x2i));
1246 x2r = ia_msu_flt(x0r, x2r, 2);
1247 x2i = ia_msu_flt(x0i, x2i, 2);
1248 x1r = ia_add_flt(x1r, x3r);
1249 x1i = ia_add_flt(x1i, x3i);
1250 x3r = ia_msu_flt(x1r, x3r, 2);
1251 x3i = ia_msu_flt(x1i, x3i, 2);
1252
1253 x0r = ia_add_flt(x0r, (x1r));
1254 x0i = ia_add_flt(x0i, (x1i));
1255 x1r = ia_msu_flt(x0r, x1r, 2);
1256 x1i = ia_msu_flt(x0i, x1i, 2);
1257 x2r = ia_add_flt(x2r, (x3i));
1258 x2i = ia_sub_flt(x2i, (x3r));
1259 x3i = ia_msu_flt(x2r, x3i, 2);
1260 x3r = ia_mac_flt(x2i, x3r, 2);
1261
1262 *data = x0r;
1263 *(data + 1) = x0i;
1264 data += ((SIZE_T)del << 1);
1265
1266 *data = x2r;
1267 *(data + 1) = x2i;
1268 data += ((SIZE_T)del << 1);
1269
1270 *data = x1r;
1271 *(data + 1) = x1i;
1272 data += ((SIZE_T)del << 1);
1273
1274 *data = x3i;
1275 *(data + 1) = x3r;
1276 data += ((SIZE_T)del << 1);
1277 }
1278 data -= 2 * n_points;
1279 data += 2;
1280 }
1281 for (; j <= sec_loop_cnt * 2; j += nodespacing) {
1282 w_1 = *(twiddles + j);
1283 w_4 = *(twiddles + j + 257);
1284 w_2 = *(twiddles + ((SIZE_T)j << 1) - 256);
1285 w_5 = *(twiddles + ((SIZE_T)j << 1) + 1);
1286 w_3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
1287 w_6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
1288
1289 for (k = in_loop_cnt; k != 0; k--) {
1290 FLOAT32 tmp;
1291 /*x0 is loaded later to avoid register crunch*/
1292
1293 data += ((SIZE_T)del << 1);
1294
1295 x1r = *data;
1296 x1i = *(data + 1);
1297 data += ((SIZE_T)del << 1);
1298
1299 x2r = *data;
1300 x2i = *(data + 1);
1301 data += ((SIZE_T)del << 1);
1302
1303 x3r = *data;
1304 x3i = *(data + 1);
1305 data -= 3 * (del << 1);
1306
1307 tmp = ia_sub_flt(ia_mul_flt(x1r, w_1), ia_mul_flt(x1i, w_4));
1308 x1i = ia_mac_flt(ia_mul_flt(x1r, w_4), x1i, w_1);
1309 x1r = tmp;
1310
1311 tmp = ia_add_flt(ia_mul_flt(x2r, w_5), ia_mul_flt(x2i, w_2));
1312 x2i = ia_add_flt(ia_negate_flt(ia_mul_flt(x2r, w_2)), ia_mul_flt(x2i, w_5));
1313 x2r = tmp;
1314
1315 tmp = ia_add_flt(ia_mul_flt(x3r, w_6), ia_mul_flt(x3i, w_3));
1316 x3i = ia_add_flt(ia_negate_flt(ia_mul_flt(x3r, w_3)), ia_mul_flt(x3i, w_6));
1317 x3r = tmp;
1318
1319 x0r = (*data);
1320 x0i = (*(data + 1));
1321
1322 x0r = ia_add_flt(x0r, (x2r));
1323 x0i = ia_add_flt(x0i, (x2i));
1324 x2r = ia_msu_flt(x0r, x2r, 2);
1325 x2i = ia_msu_flt(x0i, x2i, 2);
1326 x1r = ia_add_flt(x1r, x3r);
1327 x1i = ia_add_flt(x1i, x3i);
1328 x3r = ia_msu_flt(x1r, x3r, 2);
1329 x3i = ia_msu_flt(x1i, x3i, 2);
1330
1331 x0r = ia_add_flt(x0r, (x1r));
1332 x0i = ia_add_flt(x0i, (x1i));
1333 x1r = ia_msu_flt(x0r, x1r, 2);
1334 x1i = ia_msu_flt(x0i, x1i, 2);
1335 x2r = ia_add_flt(x2r, (x3i));
1336 x2i = ia_sub_flt(x2i, (x3r));
1337 x3i = ia_msu_flt(x2r, x3i, 2);
1338 x3r = ia_mac_flt(x2i, x3r, 2);
1339
1340 *data = x0r;
1341 *(data + 1) = x0i;
1342 data += ((SIZE_T)del << 1);
1343
1344 *data = x2r;
1345 *(data + 1) = x2i;
1346 data += ((SIZE_T)del << 1);
1347
1348 *data = x1r;
1349 *(data + 1) = x1i;
1350 data += ((SIZE_T)del << 1);
1351
1352 *data = x3i;
1353 *(data + 1) = x3r;
1354 data += ((SIZE_T)del << 1);
1355 }
1356 data -= 2 * n_points;
1357 data += 2;
1358 }
1359 for (; j < nodespacing * del; j += nodespacing) {
1360 w_1 = *(twiddles + j);
1361 w_4 = *(twiddles + j + 257);
1362 w_2 = *(twiddles + ((SIZE_T)j << 1) - 256);
1363 w_5 = *(twiddles + ((SIZE_T)j << 1) + 1);
1364 w_3 = *(twiddles + j + ((SIZE_T)j << 1) - 512);
1365 w_6 = *(twiddles + j + ((SIZE_T)j << 1) - 512 + 257);
1366
1367 for (k = in_loop_cnt; k != 0; k--) {
1368 FLOAT32 tmp;
1369 /*x0 is loaded later to avoid register crunch*/
1370
1371 data += ((SIZE_T)del << 1);
1372
1373 x1r = *data;
1374 x1i = *(data + 1);
1375 data += ((SIZE_T)del << 1);
1376
1377 x2r = *data;
1378 x2i = *(data + 1);
1379 data += ((SIZE_T)del << 1);
1380
1381 x3r = *data;
1382 x3i = *(data + 1);
1383 data -= 3 * (del << 1);
1384
1385 tmp = ia_sub_flt(ia_mul_flt(x1r, w_1), ia_mul_flt(x1i, w_4));
1386 x1i = ia_mac_flt(ia_mul_flt(x1r, w_4), x1i, w_1);
1387 x1r = tmp;
1388
1389 tmp = ia_add_flt(ia_mul_flt(x2r, w_5), ia_mul_flt(x2i, w_2));
1390 x2i = ia_add_flt(ia_negate_flt(ia_mul_flt(x2r, w_2)), ia_mul_flt(x2i, w_5));
1391 x2r = tmp;
1392
1393 tmp = ia_add_flt(ia_negate_flt(ia_mul_flt(x3r, w_3)), ia_mul_flt(x3i, w_6));
1394 x3i = ia_mac_flt(ia_mul_flt(x3r, w_6), x3i, w_3);
1395 x3r = tmp;
1396
1397 x0r = (*data);
1398 x0i = (*(data + 1));
1399
1400 x0r = ia_add_flt(x0r, (x2r));
1401 x0i = ia_add_flt(x0i, (x2i));
1402 x2r = ia_msu_flt(x0r, x2r, 2);
1403 x2i = ia_msu_flt(x0i, x2i, 2);
1404 x1r = ia_add_flt(x1r, x3r);
1405 x1i = ia_sub_flt(x1i, x3i);
1406 x3r = ia_msu_flt(x1r, x3r, 2);
1407 x3i = ia_mac_flt(x1i, x3i, 2);
1408
1409 x0r = ia_add_flt(x0r, (x1r));
1410 x0i = ia_add_flt(x0i, (x1i));
1411 x1r = ia_msu_flt(x0r, x1r, 2);
1412 x1i = ia_msu_flt(x0i, x1i, 2);
1413 x2r = ia_add_flt(x2r, (x3i));
1414 x2i = ia_sub_flt(x2i, (x3r));
1415 x3i = ia_msu_flt(x2r, x3i, 2);
1416 x3r = ia_mac_flt(x2i, x3r, 2);
1417
1418 *data = x0r;
1419 *(data + 1) = x0i;
1420 data += ((SIZE_T)del << 1);
1421
1422 *data = x2r;
1423 *(data + 1) = x2i;
1424 data += ((SIZE_T)del << 1);
1425
1426 *data = x1r;
1427 *(data + 1) = x1i;
1428 data += ((SIZE_T)del << 1);
1429
1430 *data = x3i;
1431 *(data + 1) = x3r;
1432 data += ((SIZE_T)del << 1);
1433 }
1434 data -= 2 * n_points;
1435 data += 2;
1436 }
1437 nodespacing >>= 2;
1438 del <<= 2;
1439 in_loop_cnt >>= 2;
1440 }
1441 if (not_power_4) {
1442 const FLOAT32 *twiddles = ptr_w;
1443 nodespacing <<= 1;
1444
1445 for (j = del / 2; j != 0; j--) {
1446 FLOAT32 w_1 = *twiddles;
1447 FLOAT32 w_4 = *(twiddles + 257);
1448 FLOAT32 tmp;
1449 twiddles += nodespacing;
1450
1451 x0r = *ptr_y;
1452 x0i = *(ptr_y + 1);
1453 ptr_y += ((SIZE_T)del << 1);
1454
1455 x1r = *ptr_y;
1456 x1i = *(ptr_y + 1);
1457
1458 tmp = ia_sub_flt(ia_mul_flt(x1r, w_1), ia_mul_flt(x1i, w_4));
1459 x1i = (FLOAT32)ia_mac_flt(ia_mul_flt(x1r, w_4), x1i, w_1);
1460 x1r = tmp;
1461
1462 *ptr_y = ia_sub_flt((x0r), (x1r));
1463 *(ptr_y + 1) = ia_sub_flt((x0i), (x1i));
1464 ptr_y -= ((SIZE_T)del << 1);
1465
1466 *ptr_y = ia_add_flt((x0r), (x1r));
1467 *(ptr_y + 1) = ia_add_flt((x0i), (x1i));
1468 ptr_y += 2;
1469 }
1470 twiddles = ptr_w;
1471 for (j = del / 2; j != 0; j--) {
1472 FLOAT32 w_1 = *twiddles;
1473 FLOAT32 w_4 = *(twiddles + 257);
1474 FLOAT32 tmp;
1475 twiddles += nodespacing;
1476
1477 x0r = *ptr_y;
1478 x0i = *(ptr_y + 1);
1479 ptr_y += ((SIZE_T)del << 1);
1480
1481 x1r = *ptr_y;
1482 x1i = *(ptr_y + 1);
1483
1484 tmp = ia_add_flt(ia_mul_flt(x1r, w_4), ia_mul_flt(x1i, w_1));
1485 x1i = ia_add_flt(ia_negate_flt(ia_mul_flt(x1r, w_1)), ia_mul_flt(x1i, w_4));
1486 x1r = tmp;
1487
1488 *ptr_y = ia_sub_flt((x0r), (x1r));
1489 *(ptr_y + 1) = ia_sub_flt((x0i), (x1i));
1490 ptr_y -= ((SIZE_T)del << 1);
1491
1492 *ptr_y = ia_add_flt((x0r), (x1r));
1493 *(ptr_y + 1) = ia_add_flt((x0i), (x1i));
1494 ptr_y += 2;
1495 }
1496 }
1497
1498 for (i = 0; i < n_points; i++) {
1499 ptr_real[i] = y[2 * i];
1500 ptr_imag[i] = y[2 * i + 1];
1501 }
1502 }
ixheaace_cplx_fft_4(FLOAT32 * x_r,FLOAT32 * x_i)1503 static VOID ixheaace_cplx_fft_4(FLOAT32 *x_r, FLOAT32 *x_i) {
1504 FLOAT32 x_0, x_1, x_2, x_3;
1505 FLOAT32 x_4, x_5, x_6, x_7;
1506 FLOAT32 x0r, x1r, x2r, x3r;
1507 FLOAT32 x0i, x1i, x2i, x3i;
1508
1509 // 4 Point FFT
1510 x_0 = x_r[0];
1511 x_1 = x_i[0];
1512 x_2 = x_r[1];
1513 x_3 = x_i[1];
1514 x_4 = x_r[2];
1515 x_5 = x_i[2];
1516 x_6 = x_r[3];
1517 x_7 = x_i[3];
1518
1519 x0r = ia_add_flt(x_0, x_4);
1520 x0i = ia_add_flt(x_1, x_5);
1521 x2r = ia_sub_flt(x_0, x_4);
1522 x2i = ia_sub_flt(x_1, x_5);
1523 x1r = ia_add_flt(x_2, x_6);
1524 x1i = ia_add_flt(x_3, x_7);
1525 x3r = ia_sub_flt(x_2, x_6);
1526 x3i = ia_sub_flt(x_3, x_7);
1527
1528 x_r[0] = ia_add_flt(x0r, x1r);
1529 x_i[0] = ia_add_flt(x0i, x1i);
1530 x_r[2] = ia_sub_flt(x0r, x1r);
1531 x_i[2] = ia_sub_flt(x0i, x1i);
1532 x_r[1] = ia_add_flt(x2r, x3i);
1533 x_i[1] = ia_sub_flt(x2i, x3r);
1534 x_r[3] = ia_sub_flt(x2r, x3i);
1535 x_i[3] = ia_add_flt(x2i, x3r);
1536 return;
1537 }
iusace_complex_fft_4096(FLOAT32 * ptr_x_r,FLOAT32 * ptr_x_i,FLOAT32 * ptr_scratch_buf)1538 VOID iusace_complex_fft_4096(FLOAT32 *ptr_x_r, FLOAT32 *ptr_x_i, FLOAT32 *ptr_scratch_buf) {
1539 FLOAT32 *ptr_data_r;
1540 FLOAT32 *ptr_data_i;
1541 WORD32 fft_len = 4096;
1542 FLOAT32 *ptr_fft_interim_buf = &ptr_scratch_buf[2 * fft_len];
1543 WORD32 i, j;
1544 WORD32 dim2 = fft_len >> 10;
1545 WORD32 dim1 = fft_len / dim2;
1546 WORD32 fac = 4;
1547
1548 for (i = 0; i < dim2; i++) {
1549 ptr_data_r = &ptr_scratch_buf[(2 * i + 0) * dim1];
1550 ptr_data_i = &ptr_scratch_buf[(2 * i + 1) * dim1];
1551 for (j = 0; j < dim1; j++) {
1552 ptr_data_r[j] = ptr_x_r[(dim2 * j + i)];
1553 ptr_data_i[j] = 0;
1554 }
1555 ixheaace_rad2_cplx_fft(ptr_data_r, ptr_data_i, dim1, ptr_fft_interim_buf);
1556 }
1557 ptr_data_r = &ptr_scratch_buf[0];
1558 ptr_data_i = &ptr_scratch_buf[0];
1559 for (i = 0; i < dim1; i++) {
1560 FLOAT32 *ptr_cos_val = (FLOAT32 *)&ia_mixed_rad_twiddle_cos[i * dim2 * fac];
1561 FLOAT32 *ptr_sin_val = (FLOAT32 *)&ia_mixed_rad_twiddle_sin[i * dim2 * fac];
1562 for (j = 0; j < dim2; j++) {
1563 FLOAT32 real = ptr_data_r[(2 * j + 0) * dim1 + i];
1564 FLOAT32 imag = ptr_data_i[(2 * j + 1) * dim1 + i];
1565 FLOAT32 cos_val = ptr_cos_val[j * fac];
1566 FLOAT32 sin_val = ptr_sin_val[j * fac];
1567 FLOAT32 temp_real = (FLOAT32)(real * cos_val + imag * sin_val);
1568 FLOAT32 temp_imag = (FLOAT32)(imag * cos_val - real * sin_val);
1569 ptr_fft_interim_buf[(2 * i + 0) * dim2 + j] = temp_real;
1570 ptr_fft_interim_buf[(2 * i + 1) * dim2 + j] = temp_imag;
1571 }
1572 }
1573 for (i = 0; i < dim1; i++) {
1574 ptr_data_r = &ptr_fft_interim_buf[(2 * i + 0) * dim2];
1575 ptr_data_i = &ptr_fft_interim_buf[(2 * i + 1) * dim2];
1576 ixheaace_cplx_fft_4(ptr_data_r, ptr_data_i);
1577 }
1578 ptr_data_r = &ptr_fft_interim_buf[0];
1579 ptr_data_i = &ptr_fft_interim_buf[0];
1580 for (i = 0; i < dim1; i++) {
1581 for (j = 0; j < dim2; j++) {
1582 ptr_x_r[(j * dim1 + i)] = ptr_data_r[(2 * i + 0) * dim2 + j];
1583 ptr_x_i[(j * dim1 + i)] = ptr_data_i[(2 * i + 1) * dim2 + j];
1584 }
1585 }
1586 }