1 /******************************************************************************
2 * *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include "ixheaac_type_def.h"
24 #include "ixheaac_constants.h"
25 #include "ixheaac_basic_ops32.h"
26
27 #define PLATFORM_INLINE __inline
28
29 #define DIG_REV(i, m, j) \
30 do { \
31 unsigned _ = (i); \
32 _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \
33 _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \
34 _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \
35 (j) = _ >> (m); \
36 } while (0)
37
38 extern const FLOAT32 ixheaac_twiddle_table_fft_float[514];
39 extern const FLOAT32 ixheaac_twidle_tbl_48[64];
40 extern const FLOAT32 ixheaac_twidle_tbl_24[32];
41
ixheaac_real_synth_fft_p2(FLOAT32 * ptr_x,FLOAT32 * ptr_y,WORD32 npoints)42 void ixheaac_real_synth_fft_p2(FLOAT32 *ptr_x, FLOAT32 *ptr_y, WORD32 npoints) {
43 WORD32 i, j, k, n_stages, h2;
44 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
45 WORD32 del, nodespacing, in_loop_cnt;
46 WORD32 not_power_4;
47 WORD32 dig_rev_shift;
48 const FLOAT32 *ptr_w;
49
50 dig_rev_shift = ixheaac_norm32(npoints) + 1 - 16;
51 n_stages = 30 - ixheaac_norm32(npoints);
52 not_power_4 = n_stages & 1;
53
54 n_stages = n_stages >> 1;
55
56 ptr_w = ixheaac_twiddle_table_fft_float;
57
58 for (i = 0; i < npoints; i += 4) {
59 FLOAT32 *inp = ptr_x;
60
61 DIG_REV(i, dig_rev_shift, h2);
62 if (not_power_4) {
63 h2 += 1;
64 h2 &= ~1;
65 }
66 inp += (h2 >> 1);
67
68 x0r = *inp;
69 inp += (npoints >> 2);
70
71 x1r = *inp;
72 inp += (npoints >> 2);
73
74 x2r = *inp;
75 inp += (npoints >> 2);
76
77 x3r = *inp;
78
79 x0r = x0r + x2r;
80 x2r = x0r - (x2r * 2);
81 x1r = x1r + x3r;
82 x3r = x1r - (x3r * 2);
83 x0r = x0r + x1r;
84 x1r = x0r - (x1r * 2);
85
86 *ptr_y++ = x0r;
87 *ptr_y++ = 0;
88 *ptr_y++ = x2r;
89 *ptr_y++ = x3r;
90 *ptr_y++ = x1r;
91 *ptr_y++ = 0;
92 *ptr_y++ = x2r;
93 *ptr_y++ = -x3r;
94 }
95 ptr_y -= 2 * npoints;
96 del = 4;
97 nodespacing = 64;
98 in_loop_cnt = npoints >> 4;
99 for (i = n_stages - 1; i > 0; i--) {
100 const FLOAT32 *twiddles = ptr_w;
101 FLOAT32 *data = ptr_y;
102 FLOAT32 W1, W2, W3, W4, W5, W6;
103 WORD32 sec_loop_cnt;
104
105 for (k = in_loop_cnt; k != 0; k--) {
106 x0r = (*data);
107 x0i = (*(data + 1));
108 data += ((SIZE_T)del << 1);
109
110 x1r = (*data);
111 x1i = (*(data + 1));
112 data += ((SIZE_T)del << 1);
113
114 x2r = (*data);
115 x2i = (*(data + 1));
116 data += ((SIZE_T)del << 1);
117
118 x3r = (*data);
119 x3i = (*(data + 1));
120 data -= 3 * ((SIZE_T)del << 1);
121
122 x0r = x0r + x2r;
123 x0i = x0i + x2i;
124 x2r = x0r - (x2r * 2);
125 x2i = x0i - (x2i * 2);
126 x1r = x1r + x3r;
127 x1i = x1i + x3i;
128 x3r = x1r - (x3r * 2);
129 x3i = x1i - (x3i * 2);
130
131 x0r = x0r + x1r;
132 x0i = x0i + x1i;
133 x1r = x0r - (x1r * 2);
134 x1i = x0i - (x1i * 2);
135 x2r = x2r - x3i;
136 x2i = x2i + x3r;
137 x3i = x2r + (x3i * 2);
138 x3r = x2i - (x3r * 2);
139
140 *data = x0r;
141 *(data + 1) = x0i;
142 data += ((SIZE_T)del << 1);
143
144 *data = x2r;
145 *(data + 1) = x2i;
146 data += ((SIZE_T)del << 1);
147
148 *data = x1r;
149 *(data + 1) = x1i;
150 data += ((SIZE_T)del << 1);
151
152 *data = x3i;
153 *(data + 1) = x3r;
154 data += ((SIZE_T)del << 1);
155 }
156 data = ptr_y + 2;
157
158 sec_loop_cnt = (nodespacing * del);
159 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
160 (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
161 (sec_loop_cnt / 256);
162
163 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
164 W1 = *(twiddles + j);
165 W4 = *(twiddles + j + 257);
166 W2 = *(twiddles + ((SIZE_T)j << 1));
167 W5 = *(twiddles + ((SIZE_T)j << 1) + 257);
168 W3 = *(twiddles + j + ((SIZE_T)j << 1));
169 W6 = *(twiddles + j + ((SIZE_T)j << 1) + 257);
170
171 for (k = in_loop_cnt; k != 0; k--) {
172 FLOAT32 tmp;
173 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
174
175 data += ((SIZE_T)del << 1);
176
177 x1r = *data;
178 x1i = *(data + 1);
179 data += ((SIZE_T)del << 1);
180
181 x2r = *data;
182 x2i = *(data + 1);
183 data += ((SIZE_T)del << 1);
184
185 x3r = *data;
186 x3i = *(data + 1);
187 data -= 3 * ((SIZE_T)del << 1);
188
189 tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
190 x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
191 x1r = tmp;
192
193 tmp = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
194 x2i = (FLOAT32)(-((FLOAT32)x2r * W5) + (FLOAT32)x2i * W2);
195 x2r = tmp;
196
197 tmp = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6));
198 x3i = (FLOAT32)(-((FLOAT32)x3r * W6) + (FLOAT32)x3i * W3);
199 x3r = tmp;
200
201 x0r = (*data);
202 x0i = (*(data + 1));
203
204 x0r = x0r + (x2r);
205 x0i = x0i + (x2i);
206 x2r = x0r - (x2r * 2);
207 x2i = x0i - (x2i * 2);
208 x1r = x1r + x3r;
209 x1i = x1i + x3i;
210 x3r = x1r - (x3r * 2);
211 x3i = x1i - (x3i * 2);
212
213 x0r = x0r + (x1r);
214 x0i = x0i + (x1i);
215 x1r = x0r - (x1r * 2);
216 x1i = x0i - (x1i * 2);
217 x2r = x2r - (x3i);
218 x2i = x2i + (x3r);
219 x3i = x2r + (x3i * 2);
220 x3r = x2i - (x3r * 2);
221
222 *data = x0r;
223 *(data + 1) = x0i;
224 data += ((SIZE_T)del << 1);
225
226 *data = x2r;
227 *(data + 1) = x2i;
228 data += ((SIZE_T)del << 1);
229
230 *data = x1r;
231 *(data + 1) = x1i;
232 data += ((SIZE_T)del << 1);
233
234 *data = x3i;
235 *(data + 1) = x3r;
236 data += ((SIZE_T)del << 1);
237 }
238 data -= 2 * npoints;
239 data += 2;
240 }
241 for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
242 W1 = *(twiddles + j);
243 W4 = *(twiddles + j + 257);
244 W2 = *(twiddles + ((SIZE_T)j << 1));
245 W5 = *(twiddles + ((SIZE_T)j << 1) + 257);
246 W3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
247 W6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
248
249 for (k = in_loop_cnt; k != 0; k--) {
250 FLOAT32 tmp;
251 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
252
253 data += ((SIZE_T)del << 1);
254
255 x1r = *data;
256 x1i = *(data + 1);
257 data += ((SIZE_T)del << 1);
258
259 x2r = *data;
260 x2i = *(data + 1);
261 data += ((SIZE_T)del << 1);
262
263 x3r = *data;
264 x3i = *(data + 1);
265 data -= 3 * ((SIZE_T)del << 1);
266
267 tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
268 x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
269 x1r = tmp;
270
271 tmp = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
272 x2i = (FLOAT32)(-((FLOAT32)x2r * W5) + (FLOAT32)x2i * W2);
273 x2r = tmp;
274
275 tmp = (FLOAT32)(((FLOAT32)x3r * W6) - ((FLOAT32)x3i * W3));
276 x3i = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6));
277 x3r = tmp;
278
279 x0r = (*data);
280 x0i = (*(data + 1));
281
282 x0r = x0r + (x2r);
283 x0i = x0i + (x2i);
284 x2r = x0r - (x2r * 2);
285 x2i = x0i - (x2i * 2);
286 x1r = x1r + x3r;
287 x1i = x1i + x3i;
288 x3r = x1r - (x3r * 2);
289 x3i = x1i - (x3i * 2);
290
291 x0r = x0r + (x1r);
292 x0i = x0i + (x1i);
293 x1r = x0r - (x1r * 2);
294 x1i = x0i - (x1i * 2);
295 x2r = x2r - (x3i);
296 x2i = x2i + (x3r);
297 x3i = x2r + (x3i * 2);
298 x3r = x2i - (x3r * 2);
299
300 *data = x0r;
301 *(data + 1) = x0i;
302 data += ((SIZE_T)del << 1);
303
304 *data = x2r;
305 *(data + 1) = x2i;
306 data += ((SIZE_T)del << 1);
307
308 *data = x1r;
309 *(data + 1) = x1i;
310 data += ((SIZE_T)del << 1);
311
312 *data = x3i;
313 *(data + 1) = x3r;
314 data += ((SIZE_T)del << 1);
315 }
316 data -= 2 * npoints;
317 data += 2;
318 }
319 for (; j <= sec_loop_cnt * 2; j += nodespacing) {
320 W1 = *(twiddles + j);
321 W4 = *(twiddles + j + 257);
322 W2 = *(twiddles + ((SIZE_T)j << 1) - 256);
323 W5 = *(twiddles + ((SIZE_T)j << 1) + 1);
324 W3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
325 W6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
326
327 for (k = in_loop_cnt; k != 0; k--) {
328 FLOAT32 tmp;
329 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
330
331 data += ((SIZE_T)del << 1);
332
333 x1r = *data;
334 x1i = *(data + 1);
335 data += ((SIZE_T)del << 1);
336
337 x2r = *data;
338 x2i = *(data + 1);
339 data += ((SIZE_T)del << 1);
340
341 x3r = *data;
342 x3i = *(data + 1);
343 data -= 3 * ((SIZE_T)del << 1);
344
345 tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
346 x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
347 x1r = tmp;
348
349 tmp = (FLOAT32)(((FLOAT32)x2r * W5) - ((FLOAT32)x2i * W2));
350 x2i = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
351 x2r = tmp;
352
353 tmp = (FLOAT32)(((FLOAT32)x3r * W6) - ((FLOAT32)x3i * W3));
354 x3i = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6));
355 x3r = tmp;
356
357 x0r = (*data);
358 x0i = (*(data + 1));
359
360 x0r = x0r + (x2r);
361 x0i = x0i + (x2i);
362 x2r = x0r - (x2r * 2);
363 x2i = x0i - (x2i * 2);
364 x1r = x1r + x3r;
365 x1i = x1i + x3i;
366 x3r = x1r - (x3r * 2);
367 x3i = x1i - (x3i * 2);
368
369 x0r = x0r + (x1r);
370 x0i = x0i + (x1i);
371 x1r = x0r - (x1r * 2);
372 x1i = x0i - (x1i * 2);
373 x2r = x2r - (x3i);
374 x2i = x2i + (x3r);
375 x3i = x2r + (x3i * 2);
376 x3r = x2i - (x3r * 2);
377
378 *data = x0r;
379 *(data + 1) = x0i;
380 data += ((SIZE_T)del << 1);
381
382 *data = x2r;
383 *(data + 1) = x2i;
384 data += ((SIZE_T)del << 1);
385
386 *data = x1r;
387 *(data + 1) = x1i;
388 data += ((SIZE_T)del << 1);
389
390 *data = x3i;
391 *(data + 1) = x3r;
392 data += ((SIZE_T)del << 1);
393 }
394 data -= 2 * npoints;
395 data += 2;
396 }
397 for (; j < nodespacing * del; j += nodespacing) {
398 W1 = *(twiddles + j);
399 W4 = *(twiddles + j + 257);
400 W2 = *(twiddles + ((SIZE_T)j << 1) - 256);
401 W5 = *(twiddles + ((SIZE_T)j << 1) + 1);
402 W3 = *(twiddles + j + ((SIZE_T)j << 1) - 512);
403 W6 = *(twiddles + j + ((SIZE_T)j << 1) - 512 + 257);
404
405 for (k = in_loop_cnt; k != 0; k--) {
406 FLOAT32 tmp;
407 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
408
409 data += ((SIZE_T)del << 1);
410
411 x1r = *data;
412 x1i = *(data + 1);
413 data += ((SIZE_T)del << 1);
414
415 x2r = *data;
416 x2i = *(data + 1);
417 data += ((SIZE_T)del << 1);
418
419 x3r = *data;
420 x3i = *(data + 1);
421 data -= 3 * ((SIZE_T)del << 1);
422
423 tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
424 x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
425 x1r = tmp;
426
427 tmp = (FLOAT32)(((FLOAT32)x2r * W5) - ((FLOAT32)x2i * W2));
428 x2i = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
429 x2r = tmp;
430
431 tmp = (FLOAT32)(-((FLOAT32)x3r * W3) - ((FLOAT32)x3i * W6));
432 x3i = (FLOAT32)(-((FLOAT32)x3r * W6) + (FLOAT32)x3i * W3);
433 x3r = tmp;
434
435 x0r = (*data);
436 x0i = (*(data + 1));
437
438 x0r = x0r + (x2r);
439 x0i = x0i + (x2i);
440 x2r = x0r - (x2r * 2);
441 x2i = x0i - (x2i * 2);
442 x1r = x1r + x3r;
443 x1i = x1i - x3i;
444 x3r = x1r - (x3r * 2);
445 x3i = x1i + (x3i * 2);
446
447 x0r = x0r + (x1r);
448 x0i = x0i + (x1i);
449 x1r = x0r - (x1r * 2);
450 x1i = x0i - (x1i * 2);
451 x2r = x2r - (x3i);
452 x2i = x2i + (x3r);
453 x3i = x2r + (x3i * 2);
454 x3r = x2i - (x3r * 2);
455
456 *data = x0r;
457 *(data + 1) = x0i;
458 data += ((SIZE_T)del << 1);
459
460 *data = x2r;
461 *(data + 1) = x2i;
462 data += ((SIZE_T)del << 1);
463
464 *data = x1r;
465 *(data + 1) = x1i;
466 data += ((SIZE_T)del << 1);
467
468 *data = x3i;
469 *(data + 1) = x3r;
470 data += ((SIZE_T)del << 1);
471 }
472 data -= 2 * npoints;
473 data += 2;
474 }
475 nodespacing >>= 2;
476 del <<= 2;
477 in_loop_cnt >>= 2;
478 }
479
480 if (not_power_4) {
481 const FLOAT32 *twiddles = ptr_w;
482 nodespacing <<= 1;
483
484 for (j = del / 2; j != 0; j--) {
485 FLOAT32 W1 = *twiddles;
486 FLOAT32 W4 = *(twiddles + 257);
487 FLOAT32 tmp;
488 twiddles += nodespacing;
489
490 x0r = *ptr_y;
491 x0i = *(ptr_y + 1);
492 ptr_y += ((SIZE_T)del << 1);
493
494 x1r = *ptr_y;
495 x1i = *(ptr_y + 1);
496
497 tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
498 x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
499 x1r = tmp;
500
501 *ptr_y = (x0r) - (x1r);
502 *(ptr_y + 1) = (x0i) - (x1i);
503 ptr_y -= ((SIZE_T)del << 1);
504
505 *ptr_y = (x0r) + (x1r);
506 *(ptr_y + 1) = (x0i) + (x1i);
507 ptr_y += 2;
508 }
509 twiddles = ptr_w;
510 for (j = del / 2; j != 0; j--) {
511 FLOAT32 W1 = *twiddles;
512 FLOAT32 W4 = *(twiddles + 257);
513 FLOAT32 tmp;
514 twiddles += nodespacing;
515
516 x0r = *ptr_y;
517 x0i = *(ptr_y + 1);
518 ptr_y += ((SIZE_T)del << 1);
519
520 x1r = *ptr_y;
521 x1i = *(ptr_y + 1);
522 tmp = (FLOAT32)(((FLOAT32)x1r * W4) - ((FLOAT32)x1i * W1));
523 x1i = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
524 x1r = tmp;
525
526 *ptr_y = (x0r) - (x1r);
527 *(ptr_y + 1) = (x0i) - (x1i);
528 ptr_y -= ((SIZE_T)del << 1);
529
530 *ptr_y = (x0r) + (x1r);
531 *(ptr_y + 1) = (x0i) + (x1i);
532 ptr_y += 2;
533 }
534 }
535 }
536
ixheaac_cmplx_anal_fft_p2(FLOAT32 * ptr_x,FLOAT32 * ptr_y,WORD32 npoints)537 void ixheaac_cmplx_anal_fft_p2(FLOAT32 *ptr_x, FLOAT32 *ptr_y, WORD32 npoints) {
538 WORD32 i, j, k, n_stages, h2;
539 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
540 WORD32 del, nodespacing, in_loop_cnt;
541 WORD32 not_power_4;
542 WORD32 dig_rev_shift;
543 const FLOAT32 *ptr_w;
544
545 dig_rev_shift = ixheaac_norm32(npoints) + 1 - 16;
546 n_stages = 30 - ixheaac_norm32(npoints);
547 not_power_4 = n_stages & 1;
548
549 n_stages = n_stages >> 1;
550
551 ptr_w = ixheaac_twiddle_table_fft_float;
552
553 for (i = 0; i < npoints; i += 4) {
554 FLOAT32 *inp = ptr_x;
555
556 DIG_REV(i, dig_rev_shift, h2);
557 if (not_power_4) {
558 h2 += 1;
559 h2 &= ~1;
560 }
561 inp += (h2);
562
563 x0r = *inp;
564 x0i = *(inp + 1);
565 inp += (npoints >> 1);
566
567 x1r = *inp;
568 x1i = *(inp + 1);
569 inp += (npoints >> 1);
570
571 x2r = *inp;
572 x2i = *(inp + 1);
573 inp += (npoints >> 1);
574
575 x3r = *inp;
576 x3i = *(inp + 1);
577
578 x0r = x0r + x2r;
579 x0i = x0i + x2i;
580 x2r = x0r - (x2r * 2);
581 x2i = x0i - (x2i * 2);
582 x1r = x1r + x3r;
583 x1i = x1i + x3i;
584 x3r = x1r - (x3r * 2);
585 x3i = x1i - (x3i * 2);
586
587 x0r = x0r + x1r;
588 x0i = x0i + x1i;
589 x1r = x0r - (x1r * 2);
590 x1i = x0i - (x1i * 2);
591 x2r = x2r - x3i;
592 x2i = x2i + x3r;
593 x3i = x2r + (x3i * 2);
594 x3r = x2i - (x3r * 2);
595
596 *ptr_y++ = x0r;
597 *ptr_y++ = x0i;
598 *ptr_y++ = x2r;
599 *ptr_y++ = x2i;
600 *ptr_y++ = x1r;
601 *ptr_y++ = x1i;
602 *ptr_y++ = x3i;
603 *ptr_y++ = x3r;
604 }
605 ptr_y -= 2 * npoints;
606 del = 4;
607 nodespacing = 64;
608 in_loop_cnt = npoints >> 4;
609 for (i = n_stages - 1; i > 0; i--) {
610 const FLOAT32 *twiddles = ptr_w;
611 FLOAT32 *data = ptr_y;
612 FLOAT32 W1, W2, W3, W4, W5, W6;
613 WORD32 sec_loop_cnt;
614
615 for (k = in_loop_cnt; k != 0; k--) {
616 x0r = (*data);
617 x0i = (*(data + 1));
618 data += ((SIZE_T)del << 1);
619
620 x1r = (*data);
621 x1i = (*(data + 1));
622 data += ((SIZE_T)del << 1);
623
624 x2r = (*data);
625 x2i = (*(data + 1));
626 data += ((SIZE_T)del << 1);
627
628 x3r = (*data);
629 x3i = (*(data + 1));
630 data -= 3 * ((SIZE_T)del << 1);
631
632 x0r = x0r + x2r;
633 x0i = x0i + x2i;
634 x2r = x0r - (x2r * 2);
635 x2i = x0i - (x2i * 2);
636 x1r = x1r + x3r;
637 x1i = x1i + x3i;
638 x3r = x1r - (x3r * 2);
639 x3i = x1i - (x3i * 2);
640
641 x0r = x0r + x1r;
642 x0i = x0i + x1i;
643 x1r = x0r - (x1r * 2);
644 x1i = x0i - (x1i * 2);
645 x2r = x2r - x3i;
646 x2i = x2i + x3r;
647 x3i = x2r + (x3i * 2);
648 x3r = x2i - (x3r * 2);
649
650 *data = x0r;
651 *(data + 1) = x0i;
652 data += ((SIZE_T)del << 1);
653
654 *data = x2r;
655 *(data + 1) = x2i;
656 data += ((SIZE_T)del << 1);
657
658 *data = x1r;
659 *(data + 1) = x1i;
660 data += ((SIZE_T)del << 1);
661
662 *data = x3i;
663 *(data + 1) = x3r;
664 data += ((SIZE_T)del << 1);
665 }
666 data = ptr_y + 2;
667
668 sec_loop_cnt = (nodespacing * del);
669 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
670 (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
671 (sec_loop_cnt / 256);
672
673 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
674 W1 = *(twiddles + j);
675 W4 = *(twiddles + j + 257);
676 W2 = *(twiddles + ((SIZE_T)j << 1));
677 W5 = *(twiddles + ((SIZE_T)j << 1) + 257);
678 W3 = *(twiddles + j + ((SIZE_T)j << 1));
679 W6 = *(twiddles + j + ((SIZE_T)j << 1) + 257);
680
681 for (k = in_loop_cnt; k != 0; k--) {
682 FLOAT32 tmp;
683 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
684
685 data += ((SIZE_T)del << 1);
686
687 x1r = *data;
688 x1i = *(data + 1);
689 data += ((SIZE_T)del << 1);
690
691 x2r = *data;
692 x2i = *(data + 1);
693 data += ((SIZE_T)del << 1);
694
695 x3r = *data;
696 x3i = *(data + 1);
697 data -= 3 * ((SIZE_T)del << 1);
698
699 tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
700 x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
701 x1r = tmp;
702
703 tmp = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
704 x2i = (FLOAT32)(-((FLOAT32)x2r * W5) + (FLOAT32)x2i * W2);
705 x2r = tmp;
706
707 tmp = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6));
708 x3i = (FLOAT32)(-((FLOAT32)x3r * W6) + (FLOAT32)x3i * W3);
709 x3r = tmp;
710
711 x0r = (*data);
712 x0i = (*(data + 1));
713
714 x0r = x0r + (x2r);
715 x0i = x0i + (x2i);
716 x2r = x0r - (x2r * 2);
717 x2i = x0i - (x2i * 2);
718 x1r = x1r + x3r;
719 x1i = x1i + x3i;
720 x3r = x1r - (x3r * 2);
721 x3i = x1i - (x3i * 2);
722
723 x0r = x0r + (x1r);
724 x0i = x0i + (x1i);
725 x1r = x0r - (x1r * 2);
726 x1i = x0i - (x1i * 2);
727 x2r = x2r - (x3i);
728 x2i = x2i + (x3r);
729 x3i = x2r + (x3i * 2);
730 x3r = x2i - (x3r * 2);
731
732 *data = x0r;
733 *(data + 1) = x0i;
734 data += ((SIZE_T)del << 1);
735
736 *data = x2r;
737 *(data + 1) = x2i;
738 data += ((SIZE_T)del << 1);
739
740 *data = x1r;
741 *(data + 1) = x1i;
742 data += ((SIZE_T)del << 1);
743
744 *data = x3i;
745 *(data + 1) = x3r;
746 data += ((SIZE_T)del << 1);
747 }
748 data -= 2 * npoints;
749 data += 2;
750 }
751 for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
752 W1 = *(twiddles + j);
753 W4 = *(twiddles + j + 257);
754 W2 = *(twiddles + ((SIZE_T)j << 1));
755 W5 = *(twiddles + ((SIZE_T)j << 1) + 257);
756 W3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
757 W6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
758
759 for (k = in_loop_cnt; k != 0; k--) {
760 FLOAT32 tmp;
761 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
762
763 data += ((SIZE_T)del << 1);
764
765 x1r = *data;
766 x1i = *(data + 1);
767 data += ((SIZE_T)del << 1);
768
769 x2r = *data;
770 x2i = *(data + 1);
771 data += ((SIZE_T)del << 1);
772
773 x3r = *data;
774 x3i = *(data + 1);
775 data -= 3 * ((SIZE_T)del << 1);
776
777 tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
778 x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
779 x1r = tmp;
780
781 tmp = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
782 x2i = (FLOAT32)(-((FLOAT32)x2r * W5) + (FLOAT32)x2i * W2);
783 x2r = tmp;
784
785 tmp = (FLOAT32)(((FLOAT32)x3r * W6) - ((FLOAT32)x3i * W3));
786 x3i = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6));
787 x3r = tmp;
788
789 x0r = (*data);
790 x0i = (*(data + 1));
791
792 x0r = x0r + (x2r);
793 x0i = x0i + (x2i);
794 x2r = x0r - (x2r * 2);
795 x2i = x0i - (x2i * 2);
796 x1r = x1r + x3r;
797 x1i = x1i + x3i;
798 x3r = x1r - (x3r * 2);
799 x3i = x1i - (x3i * 2);
800
801 x0r = x0r + (x1r);
802 x0i = x0i + (x1i);
803 x1r = x0r - (x1r * 2);
804 x1i = x0i - (x1i * 2);
805 x2r = x2r - (x3i);
806 x2i = x2i + (x3r);
807 x3i = x2r + (x3i * 2);
808 x3r = x2i - (x3r * 2);
809
810 *data = x0r;
811 *(data + 1) = x0i;
812 data += ((SIZE_T)del << 1);
813
814 *data = x2r;
815 *(data + 1) = x2i;
816 data += ((SIZE_T)del << 1);
817
818 *data = x1r;
819 *(data + 1) = x1i;
820 data += ((SIZE_T)del << 1);
821
822 *data = x3i;
823 *(data + 1) = x3r;
824 data += ((SIZE_T)del << 1);
825 }
826 data -= 2 * npoints;
827 data += 2;
828 }
829 for (; j <= sec_loop_cnt * 2; j += nodespacing) {
830 W1 = *(twiddles + j);
831 W4 = *(twiddles + j + 257);
832 W2 = *(twiddles + ((SIZE_T)j << 1) - 256);
833 W5 = *(twiddles + ((SIZE_T)j << 1) + 1);
834 W3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
835 W6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
836
837 for (k = in_loop_cnt; k != 0; k--) {
838 FLOAT32 tmp;
839 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
840
841 data += ((SIZE_T)del << 1);
842
843 x1r = *data;
844 x1i = *(data + 1);
845 data += ((SIZE_T)del << 1);
846
847 x2r = *data;
848 x2i = *(data + 1);
849 data += ((SIZE_T)del << 1);
850
851 x3r = *data;
852 x3i = *(data + 1);
853 data -= 3 * ((SIZE_T)del << 1);
854
855 tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
856 x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
857 x1r = tmp;
858
859 tmp = (FLOAT32)(((FLOAT32)x2r * W5) - ((FLOAT32)x2i * W2));
860 x2i = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
861 x2r = tmp;
862
863 tmp = (FLOAT32)(((FLOAT32)x3r * W6) - ((FLOAT32)x3i * W3));
864 x3i = (FLOAT32)(((FLOAT32)x3r * W3) + ((FLOAT32)x3i * W6));
865 x3r = tmp;
866
867 x0r = (*data);
868 x0i = (*(data + 1));
869
870 x0r = x0r + (x2r);
871 x0i = x0i + (x2i);
872 x2r = x0r - (x2r * 2);
873 x2i = x0i - (x2i * 2);
874 x1r = x1r + x3r;
875 x1i = x1i + x3i;
876 x3r = x1r - (x3r * 2);
877 x3i = x1i - (x3i * 2);
878
879 x0r = x0r + (x1r);
880 x0i = x0i + (x1i);
881 x1r = x0r - (x1r * 2);
882 x1i = x0i - (x1i * 2);
883 x2r = x2r - (x3i);
884 x2i = x2i + (x3r);
885 x3i = x2r + (x3i * 2);
886 x3r = x2i - (x3r * 2);
887
888 *data = x0r;
889 *(data + 1) = x0i;
890 data += ((SIZE_T)del << 1);
891
892 *data = x2r;
893 *(data + 1) = x2i;
894 data += ((SIZE_T)del << 1);
895
896 *data = x1r;
897 *(data + 1) = x1i;
898 data += ((SIZE_T)del << 1);
899
900 *data = x3i;
901 *(data + 1) = x3r;
902 data += ((SIZE_T)del << 1);
903 }
904 data -= 2 * npoints;
905 data += 2;
906 }
907 for (; j < nodespacing * del; j += nodespacing) {
908 W1 = *(twiddles + j);
909 W4 = *(twiddles + j + 257);
910 W2 = *(twiddles + ((SIZE_T)j << 1) - 256);
911 W5 = *(twiddles + ((SIZE_T)j << 1) + 1);
912 W3 = *(twiddles + j + ((SIZE_T)j << 1) - 512);
913 W6 = *(twiddles + j + ((SIZE_T)j << 1) - 512 + 257);
914
915 for (k = in_loop_cnt; k != 0; k--) {
916 FLOAT32 tmp;
917 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
918
919 data += ((SIZE_T)del << 1);
920
921 x1r = *data;
922 x1i = *(data + 1);
923 data += ((SIZE_T)del << 1);
924
925 x2r = *data;
926 x2i = *(data + 1);
927 data += ((SIZE_T)del << 1);
928
929 x3r = *data;
930 x3i = *(data + 1);
931 data -= 3 * ((SIZE_T)del << 1);
932
933 tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
934 x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
935 x1r = tmp;
936
937 tmp = (FLOAT32)(((FLOAT32)x2r * W5) - ((FLOAT32)x2i * W2));
938 x2i = (FLOAT32)(((FLOAT32)x2r * W2) + ((FLOAT32)x2i * W5));
939 x2r = tmp;
940
941 tmp = (FLOAT32)(-((FLOAT32)x3r * W3) - ((FLOAT32)x3i * W6));
942 x3i = (FLOAT32)(-((FLOAT32)x3r * W6) + (FLOAT32)x3i * W3);
943 x3r = tmp;
944
945 x0r = (*data);
946 x0i = (*(data + 1));
947
948 x0r = x0r + (x2r);
949 x0i = x0i + (x2i);
950 x2r = x0r - (x2r * 2);
951 x2i = x0i - (x2i * 2);
952 x1r = x1r + x3r;
953 x1i = x1i - x3i;
954 x3r = x1r - (x3r * 2);
955 x3i = x1i + (x3i * 2);
956
957 x0r = x0r + (x1r);
958 x0i = x0i + (x1i);
959 x1r = x0r - (x1r * 2);
960 x1i = x0i - (x1i * 2);
961 x2r = x2r - (x3i);
962 x2i = x2i + (x3r);
963 x3i = x2r + (x3i * 2);
964 x3r = x2i - (x3r * 2);
965
966 *data = x0r;
967 *(data + 1) = x0i;
968 data += ((SIZE_T)del << 1);
969
970 *data = x2r;
971 *(data + 1) = x2i;
972 data += ((SIZE_T)del << 1);
973
974 *data = x1r;
975 *(data + 1) = x1i;
976 data += ((SIZE_T)del << 1);
977
978 *data = x3i;
979 *(data + 1) = x3r;
980 data += ((SIZE_T)del << 1);
981 }
982 data -= 2 * npoints;
983 data += 2;
984 }
985 nodespacing >>= 2;
986 del <<= 2;
987 in_loop_cnt >>= 2;
988 }
989
990 if (not_power_4) {
991 const FLOAT32 *twiddles = ptr_w;
992 nodespacing <<= 1;
993
994 for (j = del / 2; j != 0; j--) {
995 FLOAT32 W1 = *twiddles;
996 FLOAT32 W4 = *(twiddles + 257);
997 FLOAT32 tmp;
998 twiddles += nodespacing;
999
1000 x0r = *ptr_y;
1001 x0i = *(ptr_y + 1);
1002 ptr_y += ((SIZE_T)del << 1);
1003
1004 x1r = *ptr_y;
1005 x1i = *(ptr_y + 1);
1006
1007 tmp = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
1008 x1i = (FLOAT32)(-((FLOAT32)x1r * W4) + (FLOAT32)x1i * W1);
1009 x1r = tmp;
1010
1011 *ptr_y = (x0r) - (x1r);
1012 *(ptr_y + 1) = (x0i) - (x1i);
1013 ptr_y -= ((SIZE_T)del << 1);
1014
1015 *ptr_y = (x0r) + (x1r);
1016 *(ptr_y + 1) = (x0i) + (x1i);
1017 ptr_y += 2;
1018 }
1019 twiddles = ptr_w;
1020 for (j = del / 2; j != 0; j--) {
1021 FLOAT32 W1 = *twiddles;
1022 FLOAT32 W4 = *(twiddles + 257);
1023 FLOAT32 tmp;
1024 twiddles += nodespacing;
1025
1026 x0r = *ptr_y;
1027 x0i = *(ptr_y + 1);
1028 ptr_y += ((SIZE_T)del << 1);
1029
1030 x1r = *ptr_y;
1031 x1i = *(ptr_y + 1);
1032
1033 tmp = (FLOAT32)(((FLOAT32)x1r * W4) - ((FLOAT32)x1i * W1));
1034 x1i = (FLOAT32)(((FLOAT32)x1r * W1) + ((FLOAT32)x1i * W4));
1035 x1r = tmp;
1036
1037 *ptr_y = (x0r) - (x1r);
1038 *(ptr_y + 1) = (x0i) - (x1i);
1039 ptr_y -= ((SIZE_T)del << 1);
1040
1041 *ptr_y = (x0r) + (x1r);
1042 *(ptr_y + 1) = (x0i) + (x1i);
1043 ptr_y += 2;
1044 }
1045 }
1046 }
1047
ixheaac_aac_ld_dec_fft_3_float(FLOAT32 * inp,FLOAT32 * op)1048 static PLATFORM_INLINE void ixheaac_aac_ld_dec_fft_3_float(FLOAT32 *inp, FLOAT32 *op) {
1049 FLOAT32 add_r, sub_r;
1050 FLOAT32 add_i, sub_i;
1051 FLOAT32 temp_real, temp_imag, temp;
1052
1053 FLOAT32 p1, p2, p3, p4;
1054
1055 FLOAT32 sinmu;
1056 sinmu = -0.866025403784439f;
1057
1058 temp_real = inp[0] + inp[2];
1059 temp_imag = inp[1] + inp[3];
1060
1061 add_r = inp[2] + inp[4];
1062 add_i = inp[3] + inp[5];
1063
1064 sub_r = inp[2] - inp[4];
1065 sub_i = inp[3] - inp[5];
1066
1067 p1 = add_r / 2.0f;
1068 p4 = add_i / 2.0f;
1069 p2 = sub_i * sinmu;
1070 p3 = sub_r * sinmu;
1071
1072 temp = inp[0] - p1;
1073
1074 op[0] = temp_real + inp[4];
1075 op[1] = temp_imag + inp[5];
1076 op[2] = temp + p2;
1077 op[3] = (inp[1] - p3) - p4;
1078 op[4] = temp - p2;
1079 op[5] = (inp[1] + p3) - p4;
1080
1081 return;
1082 }
1083
ixheaac_real_synth_fft_p3(FLOAT32 * x_in,FLOAT32 * x_out,WORD32 npoints)1084 void ixheaac_real_synth_fft_p3(FLOAT32 *x_in, FLOAT32 *x_out, WORD32 npoints) {
1085 WORD32 i, j;
1086 FLOAT32 x_3[8];
1087 FLOAT32 y_3[16];
1088 FLOAT32 y[48];
1089 FLOAT32 x[48];
1090 FLOAT32 *ptr_y = y;
1091 FLOAT32 *y_p3 = y;
1092 FLOAT32 *x_p3 = x;
1093
1094 for (i = 0; i < 3; i += 1) {
1095 for (j = 0; j < (npoints / 3); j++) {
1096 x_3[j] = x_in[3 * j + i];
1097 }
1098
1099 ixheaac_real_synth_fft_p2(x_3, y_3, 8);
1100
1101 for (j = 0; j < 16; j += 2) {
1102 x[3 * j + 2 * i] = y_3[j];
1103 x[3 * j + 2 * i + 1] = y_3[j + 1];
1104 }
1105 }
1106
1107 {
1108 FLOAT32 *wr;
1109 FLOAT32 tmp;
1110 FLOAT32 *x_tw = x;
1111 wr = (FLOAT32 *)ixheaac_twidle_tbl_24;
1112 x_tw += 2;
1113
1114 for (i = 0; i < (npoints / 3); i++) {
1115 tmp = ((*x_tw) * (*wr) + (*(x_tw + 1)) * (*(wr + 1)));
1116 *(x_tw + 1) = (-(*x_tw) * (*(wr + 1)) + (*(x_tw + 1)) * (*wr));
1117 *x_tw = tmp;
1118
1119 wr += 2;
1120 x_tw += 2;
1121
1122 tmp = ((*x_tw) * (*wr) + (*(x_tw + 1)) * (*(wr + 1)));
1123 *(x_tw + 1) = (-(*x_tw) * (*(wr + 1)) + (*(x_tw + 1)) * (*wr));
1124 *x_tw = tmp;
1125
1126 wr += 2;
1127 x_tw += 4;
1128 }
1129 }
1130
1131 for (i = 0; i < (npoints / 3); i++) {
1132 ixheaac_aac_ld_dec_fft_3_float(x_p3, y_p3);
1133
1134 x_p3 = x_p3 + 6;
1135 y_p3 = y_p3 + 6;
1136 }
1137
1138 for (i = 0; i < 16; i += 2) {
1139 x_out[i] = *ptr_y++;
1140 x_out[i + 1] = *ptr_y++;
1141 x_out[16 + i] = *ptr_y++;
1142 x_out[16 + i + 1] = *ptr_y++;
1143 x_out[32 + i] = *ptr_y++;
1144 x_out[32 + i + 1] = *ptr_y++;
1145 }
1146 }
1147
ixheaac_cmplx_anal_fft_p3(FLOAT32 * x_in,FLOAT32 * x_out,WORD32 npoints)1148 void ixheaac_cmplx_anal_fft_p3(FLOAT32 *x_in, FLOAT32 *x_out, WORD32 npoints) {
1149 WORD32 i, j;
1150 FLOAT32 x_3[32];
1151 FLOAT32 y_3[32];
1152 FLOAT32 y[96];
1153 FLOAT32 *ptr_x = x_in;
1154 FLOAT32 *ptr_y = y;
1155 FLOAT32 *y_p3 = y;
1156
1157 for (i = 0; i < 6; i += 2) {
1158 for (j = 0; j < 32; j += 2) {
1159 x_3[j] = x_in[3 * j + i];
1160 x_3[j + 1] = x_in[3 * j + i + 1];
1161 }
1162
1163 ixheaac_cmplx_anal_fft_p2(x_3, y_3, 16);
1164
1165 for (j = 0; j < 32; j += 2) {
1166 x_in[3 * j + i] = y_3[j];
1167 x_in[3 * j + i + 1] = y_3[j + 1];
1168 }
1169 }
1170
1171 {
1172 FLOAT32 *wr;
1173 FLOAT32 tmp;
1174 wr = (FLOAT32 *)ixheaac_twidle_tbl_48;
1175 x_in += 2;
1176
1177 for (i = 0; i < (npoints / 3); i++) {
1178 tmp = ((*x_in) * (*wr) + (*(x_in + 1)) * (*(wr + 1)));
1179 *(x_in + 1) = (-(*x_in) * (*(wr + 1)) + (*(x_in + 1)) * (*wr));
1180 *x_in = tmp;
1181
1182 wr += 2;
1183 x_in += 2;
1184
1185 tmp = ((*x_in) * (*wr) + (*(x_in + 1)) * (*(wr + 1)));
1186 *(x_in + 1) = (-(*x_in) * (*(wr + 1)) + (*(x_in + 1)) * (*wr));
1187 *x_in = tmp;
1188
1189 wr += 2;
1190 x_in += 4;
1191 }
1192 }
1193
1194 for (i = 0; i < (npoints / 3); i++) {
1195 ixheaac_aac_ld_dec_fft_3_float(ptr_x, ptr_y);
1196
1197 ptr_x = ptr_x + 6;
1198 ptr_y = ptr_y + 6;
1199 }
1200
1201 for (i = 0; i < 32; i += 2) {
1202 x_out[i] = *y_p3++;
1203 x_out[i + 1] = *y_p3++;
1204 x_out[32 + i] = *y_p3++;
1205 x_out[32 + i + 1] = *y_p3++;
1206 x_out[64 + i] = *y_p3++;
1207 x_out[64 + i + 1] = *y_p3++;
1208 }
1209 }
1210