1 /******************************************************************************
2 * *
3 * Copyright (C) 2023 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 #include <stdlib.h>
22 #include <ixheaac_type_def.h>
23 #include "ixheaac_constants.h"
24 #include "ixheaace_constants.h"
25 #include "iusace_basic_ops_flt.h"
26 #include "ixheaace_common_utils.h"
27 #include "ixheaac_fft_ifft_rom.h"
28 #include "ixheaac_basic_ops32.h"
29 #include "ixheaac_basic_ops40.h"
30 #include "ixheaac_basic_ops.h"
31
32 #define DIG_REV(i, m, j) \
33 do { \
34 unsigned _ = (i); \
35 _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \
36 _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \
37 _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \
38 (j) = _ >> (m); \
39 } while (0)
40
41 #define CPLX_MPY_FFT(re, im, a, b, c, d) \
42 do { \
43 re = ((a * c) - (b * d)); \
44 im = ((a * d) + (b * c)); \
45 } while (0)
46
47 #define CPLX_MPY_IFFT(re, im, a, b, c, d) \
48 do { \
49 re = ((a * c) + (b * d)); \
50 im = (-(a * d) + (b * c)); \
51 } while (0)
52
ixheaace_hbe_apply_ifft_7(FLOAT32 * ptr_inp,FLOAT32 * ptr_op)53 VOID ixheaace_hbe_apply_ifft_7(FLOAT32 *ptr_inp, FLOAT32 *ptr_op) {
54 FLOAT32 x0r, x1r, x2r, x3r, x4r, x5r, x6r, x7r, x8r;
55 FLOAT32 x0i, x1i, x2i, x3i, x4i, x5i, x6i, x7i, x8i;
56 FLOAT32 y0r, y1r, y2r, y3r, y4r, y5r, y6r, y7r, y8r;
57 FLOAT32 y0i, y1i, y2i, y3i, y4i, y5i, y6i, y7i, y8i;
58
59 /*
60 * Node 1 of Winograd FFT for 7 point
61 *
62 * 1 0 0 0 0 0 0
63 * 0 1 0 0 0 0 1
64 * 0 1 0 0 0 0 -1
65 * 0 0 1 0 0 1 0
66 * 0 0 1 0 0 -1 0
67 * 0 0 0 1 1 0 0
68 * 0 0 0 -1 1 0 0
69 *
70 */
71
72 x0r = ptr_inp[0];
73 x0i = ptr_inp[1];
74 x1r = ptr_inp[2] + ptr_inp[12];
75 x1i = ptr_inp[3] + ptr_inp[13];
76 x2r = ptr_inp[2] - ptr_inp[12];
77 x2i = ptr_inp[3] - ptr_inp[13];
78 x3r = ptr_inp[4] + ptr_inp[10];
79 x3i = ptr_inp[5] + ptr_inp[11];
80 x4r = ptr_inp[4] - ptr_inp[10];
81 x4i = ptr_inp[5] - ptr_inp[11];
82 x5r = ptr_inp[8] + ptr_inp[6];
83 x5i = ptr_inp[9] + ptr_inp[7];
84 x6r = ptr_inp[8] - ptr_inp[6];
85 x6i = ptr_inp[9] - ptr_inp[7];
86
87 /*
88 * Node 2 of Winograd FFT for 7 point
89 *
90 * 1 0 0 0 0 0 0
91 * 0 1 0 1 0 1 0
92 * 0 1 0 -1 0 0 0
93 * 0 -1 0 0 0 1 0
94 * 0 0 0 1 0 -1 0
95 * 0 0 1 0 1 0 1
96 * 0 0 1 0 -1 0 0
97 * 0 0 -1 0 0 0 1
98 * 0 0 0 0 1 0 -1
99 *
100 */
101
102 y0r = x0r;
103 y0i = x0i;
104 y1r = x1r + x3r + x5r;
105 y1i = x1i + x3i + x5i;
106 y2r = x1r - x3r;
107 y2i = x1i - x3i;
108 y3r = x5r - x1r;
109 y3i = x5i - x1i;
110 y4r = x3r - x5r;
111 y4i = x3i - x5i;
112 y5r = x2r + x4r + x6r;
113 y5i = x2i + x4i + x6i;
114 y6r = x2r - x4r;
115 y6i = x2i - x4i;
116 y7r = x6r - x2r;
117 y7i = x6i - x2i;
118 y8r = x4r - x6r;
119 y8i = x4i - x6i;
120
121 /*
122 * Node 3 of Winograd FFT for 7 point
123 *
124 * 1 1 0 0 0 0 0 0 0
125 * 1 c70 0 0 0 0 0 0 0
126 * 0 0 c71 0 0 0 0 0 0
127 * 0 0 0 c72 0 0 0 0 0
128 * 0 0 0 0 c73 0 0 0 0
129 * 0 0 0 0 0 jc74 0 0 0
130 * 0 0 0 0 0 0 jc75 0 0
131 * 0 0 0 0 0 0 0 jc76 0
132 * 0 0 0 0 0 0 0 0 jc77
133 *
134 */
135 x0r = y0r + y1r;
136 x0i = y0i + y1i;
137 x1r = y0r + C70 * y1r;
138 x1i = y0i + C70 * y1i;
139 x2r = C71 * y2r;
140 x2i = C71 * y2i;
141 x3r = C72 * y3r;
142 x3i = C72 * y3i;
143 x4r = C73 * y4r;
144 x4i = C73 * y4i;
145 x5r = C74 * y5i;
146 x5i = -C74 * y5r;
147 x6r = C75 * y6i;
148 x6i = -C75 * y6r;
149 x7r = C76 * y7i;
150 x7i = -C76 * y7r;
151 x8r = C77 * y8i;
152 x8i = -C77 * y8r;
153
154 /*
155 * Node 4 of Winograd FFT for 7 point
156 *
157 * 1 0 0 0 0 0 0 0 0
158 * 0 1 1 0 1 0 0 0 0
159 * 0 1 -1 -1 0 0 0 0 0
160 * 0 1 0 1 -1 0 0 0 0
161 * 0 0 0 0 0 1 1 0 1
162 * 0 0 0 0 0 1 -1 -1 0
163 * 0 0 0 0 0 1 0 1 -1
164 *
165 */
166
167 y0r = x0r;
168 y0i = x0i;
169 y1r = x1r + x2r + x4r;
170 y1i = x1i + x2i + x4i;
171 y2r = x1r - x2r - x3r;
172 y2i = x1i - x2i - x3i;
173 y3r = x1r + x3r - x4r;
174 y3i = x1i + x3i - x4i;
175 y4r = x5r + x6r + x8r;
176 y4i = x5i + x6i + x8i;
177 y5r = x5r - x6r - x7r;
178 y5i = x5i - x6i - x7i;
179 y6r = x5r + x7r - x8r;
180 y6i = x5i + x7i - x8i;
181
182 /*
183 * Node 5 of Winograd FFT for 7 point
184 *
185 * 1 0 0 0 0 0 0
186 * 0 1 0 0 1 0 0
187 * 0 0 0 1 0 0 1
188 * 0 0 1 0 0 -1 0
189 * 0 0 1 0 0 1 0
190 * 0 0 0 1 0 0 -1
191 * 0 1 0 0 -1 0 0
192 *
193 */
194 x0r = y0r;
195 x0i = y0i;
196 x1r = y1r + y4r;
197 x1i = y1i + y4i;
198 x2r = y3r + y6r;
199 x2i = y3i + y6i;
200 x3r = y2r - y5r;
201 x3i = y2i - y5i;
202 x4r = y2r + y5r;
203 x4i = y2i + y5i;
204 x5r = y3r - y6r;
205 x5i = y3i - y6i;
206 x6r = y1r - y4r;
207 x6i = y1i - y4i;
208
209 ptr_op[0] = x0r;
210 ptr_op[1] = x0i;
211 ptr_op[2] = x1r;
212 ptr_op[3] = x1i;
213 ptr_op[4] = x2r;
214 ptr_op[5] = x2i;
215 ptr_op[6] = x3r;
216 ptr_op[7] = x3i;
217 ptr_op[8] = x4r;
218 ptr_op[9] = x4i;
219 ptr_op[10] = x5r;
220 ptr_op[11] = x5i;
221 ptr_op[12] = x6r;
222 ptr_op[13] = x6i;
223 }
224
ixheaace_hbe_apply_fft_3(FLOAT32 * ptr_inp,FLOAT32 * ptr_op,WORD32 i_sign)225 VOID ixheaace_hbe_apply_fft_3(FLOAT32 *ptr_inp, FLOAT32 *ptr_op, WORD32 i_sign) {
226 FLOAT32 add_r, sub_r;
227 FLOAT32 add_i, sub_i;
228 FLOAT32 x_01_r, x_01_i, temp;
229
230 FLOAT32 p1, p2, p3, p4;
231
232 /* mu = PI / 3; The cos and sin values are in Q31
233 cosmu is 0.5 so used >> 1 instead of multiplication */
234
235 FLOAT64 sinmu;
236 sinmu = -0.866025403784439 * (FLOAT64)i_sign;
237
238 x_01_r = ptr_inp[0] + ptr_inp[2];
239 x_01_i = ptr_inp[1] + ptr_inp[3];
240
241 add_r = ptr_inp[2] + ptr_inp[4];
242 add_i = ptr_inp[3] + ptr_inp[5];
243
244 sub_r = ptr_inp[2] - ptr_inp[4];
245 sub_i = ptr_inp[3] - ptr_inp[5];
246
247 p1 = add_r / (FLOAT32)2.0;
248 p4 = add_i / (FLOAT32)2.0;
249 p2 = (FLOAT32)((FLOAT64)sub_i * sinmu);
250 p3 = (FLOAT32)((FLOAT64)sub_r * sinmu);
251
252 temp = ptr_inp[0] - p1;
253
254 ptr_op[0] = x_01_r + ptr_inp[4];
255 ptr_op[1] = x_01_i + ptr_inp[5];
256 ptr_op[2] = temp + p2;
257 ptr_op[3] = (ptr_inp[1] - p3) - p4;
258 ptr_op[4] = temp - p2;
259 ptr_op[5] = (ptr_inp[1] + p3) - p4;
260 }
261
ixheaace_hbe_apply_tw_mult_ifft(FLOAT32 * ptr_inp,FLOAT32 * ptr_op,WORD32 dim1,WORD32 dim2,const FLOAT32 * ptr_tw)262 VOID ixheaace_hbe_apply_tw_mult_ifft(FLOAT32 *ptr_inp, FLOAT32 *ptr_op, WORD32 dim1, WORD32 dim2,
263 const FLOAT32 *ptr_tw) {
264 FLOAT32 accu1, accu2;
265 WORD32 i, j;
266 WORD32 step_val = (dim2 - 1) << 1;
267 for (i = 0; i < (dim2); i++) {
268 ptr_op[0] = ptr_inp[0];
269 ptr_op[1] = ptr_inp[1];
270 ptr_op += 2;
271 ptr_inp += 2;
272 }
273
274 for (j = 0; j < (dim1 - 1); j++) {
275 ptr_op[0] = ptr_inp[0];
276 ptr_op[1] = ptr_inp[1];
277 ptr_inp += 2;
278 ptr_op += 2;
279 for (i = 0; i < (dim2 - 1); i++) {
280 CPLX_MPY_IFFT(accu1, accu2, ptr_inp[2 * i + 0], ptr_inp[2 * i + 1], ptr_tw[2 * i + 1],
281 ptr_tw[2 * i]);
282 ptr_op[2 * i + 0] = accu1;
283 ptr_op[2 * i + 1] = accu2;
284 }
285 ptr_inp += step_val;
286 ptr_op += step_val;
287 ptr_tw += (dim2 - 1) * 2;
288 }
289 }
290
ixheaace_hbe_apply_tw_mult_fft(FLOAT32 * ptr_inp,FLOAT32 * ptr_op,WORD32 dim1,WORD32 dim2,const FLOAT32 * ptr_tw)291 VOID ixheaace_hbe_apply_tw_mult_fft(FLOAT32 *ptr_inp, FLOAT32 *ptr_op, WORD32 dim1, WORD32 dim2,
292 const FLOAT32 *ptr_tw) {
293 FLOAT32 accu1, accu2;
294 WORD32 i, j;
295 WORD32 step_val = (dim2 - 1) << 1;
296 for (i = 0; i < (dim2); i++) {
297 ptr_op[0] = ptr_inp[0];
298 ptr_op[1] = ptr_inp[1];
299 ptr_op += 2;
300 ptr_inp += 2;
301 }
302
303 for (j = 0; j < (dim1 - 1); j++) {
304 ptr_op[0] = ptr_inp[0];
305 ptr_op[1] = ptr_inp[1];
306 ptr_inp += 2;
307 ptr_op += 2;
308 for (i = 0; i < (dim2 - 1); i++) {
309 CPLX_MPY_FFT(accu1, accu2, ptr_inp[2 * i + 0], ptr_inp[2 * i + 1], ptr_tw[2 * i + 1],
310 ptr_tw[2 * i]);
311 ptr_op[2 * i + 0] = accu1;
312 ptr_op[2 * i + 1] = accu2;
313 }
314 ptr_inp += step_val;
315 ptr_op += step_val;
316 ptr_tw += (dim2 - 1) * 2;
317 }
318 }
319
ixheaace_hbe_apply_cfftn(FLOAT32 re[],FLOAT32 * ptr_scratch,WORD32 n_pass,WORD32 i_sign)320 VOID ixheaace_hbe_apply_cfftn(FLOAT32 re[], FLOAT32 *ptr_scratch, WORD32 n_pass, WORD32 i_sign) {
321 WORD32 i, j, k, n_stages, h2;
322 FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
323 WORD32 del, nodespacing, in_loop_cnt;
324 WORD32 not_power_4;
325 WORD32 dig_rev_shift;
326 WORD32 mpass = n_pass;
327 WORD32 npoints = n_pass;
328 const FLOAT64 *ptr_w;
329 FLOAT32 *ptr_x = ptr_scratch;
330 FLOAT32 *y = ptr_scratch + (2 * n_pass);
331 FLOAT32 *ptr_y = y;
332
333 dig_rev_shift = ixheaac_norm32(mpass) + 1 - 16;
334 n_stages = 30 - ixheaac_norm32(mpass); /* log2(npoints), if npoints=2^m */
335 not_power_4 = n_stages & 1;
336
337 n_stages = n_stages >> 1;
338
339 ptr_w = ixheaac_twid_tbl_fft_double;
340 ptr_x = re;
341
342 dig_rev_shift = MAX(dig_rev_shift, 0);
343
344 if (i_sign == -1) {
345 for (i = 0; i < npoints; i += 4) {
346 FLOAT32 *ptr_inp = ptr_x;
347 FLOAT32 tmk;
348
349 DIG_REV(i, dig_rev_shift, h2);
350 if (not_power_4) {
351 h2 += 1;
352 h2 &= ~1;
353 }
354 ptr_inp += (h2);
355
356 x0r = *ptr_inp;
357 x0i = *(ptr_inp + 1);
358 ptr_inp += (npoints >> 1);
359
360 x1r = *ptr_inp;
361 x1i = *(ptr_inp + 1);
362 ptr_inp += (npoints >> 1);
363
364 x2r = *ptr_inp;
365 x2i = *(ptr_inp + 1);
366 ptr_inp += (npoints >> 1);
367
368 x3r = *ptr_inp;
369 x3i = *(ptr_inp + 1);
370
371 x0r = x0r + x2r;
372 x0i = x0i + x2i;
373
374 tmk = x0r - x2r;
375 x2r = tmk - x2r;
376 tmk = x0i - x2i;
377 x2i = tmk - x2i;
378
379 x1r = x1r + x3r;
380 x1i = x1i + x3i;
381
382 tmk = x1r - x3r;
383 x3r = tmk - x3r;
384 tmk = x1i - x3i;
385 x3i = tmk - x3i;
386
387 x0r = x0r + x1r;
388 x0i = x0i + x1i;
389
390 tmk = x0r - x1r;
391 x1r = tmk - x1r;
392 tmk = x0i - x1i;
393 x1i = tmk - x1i;
394
395 x2r = x2r + x3i;
396 x2i = x2i - x3r;
397
398 tmk = x2r - x3i;
399 x3i = tmk - x3i;
400 tmk = x2i + x3r;
401 x3r = tmk + x3r;
402
403 *ptr_y++ = x0r;
404 *ptr_y++ = x0i;
405 *ptr_y++ = x2r;
406 *ptr_y++ = x2i;
407 *ptr_y++ = x1r;
408 *ptr_y++ = x1i;
409 *ptr_y++ = x3i;
410 *ptr_y++ = x3r;
411 }
412 ptr_y -= 2 * npoints;
413 del = 4;
414 nodespacing = 64;
415 in_loop_cnt = npoints >> 4;
416 for (i = n_stages - 1; i > 0; i--) {
417 const FLOAT64 *ptr_twiddle = ptr_w;
418 FLOAT32 *data = ptr_y;
419 FLOAT64 w_1, w_2, w_3, w_4, w_5, w_6;
420 WORD32 sec_loop_cnt;
421
422 for (k = in_loop_cnt; k != 0; k--) {
423 x0r = (*data);
424 x0i = (*(data + 1));
425 data += ((SIZE_T)del << 1);
426
427 x1r = (*data);
428 x1i = (*(data + 1));
429 data += ((SIZE_T)del << 1);
430
431 x2r = (*data);
432 x2i = (*(data + 1));
433 data += ((SIZE_T)del << 1);
434
435 x3r = (*data);
436 x3i = (*(data + 1));
437 data -= 3 * (del << 1);
438
439 x0r = x0r + x2r;
440 x0i = x0i + x2i;
441 x2r = x0r - (x2r * 2);
442 x2i = x0i - (x2i * 2);
443 x1r = x1r + x3r;
444 x1i = x1i + x3i;
445 x3r = x1r - (x3r * 2);
446 x3i = x1i - (x3i * 2);
447
448 x0r = x0r + x1r;
449 x0i = x0i + x1i;
450 x1r = x0r - (x1r * 2);
451 x1i = x0i - (x1i * 2);
452 x2r = x2r + x3i;
453 x2i = x2i - x3r;
454 x3i = x2r - (x3i * 2);
455 x3r = x2i + (x3r * 2);
456
457 *data = x0r;
458 *(data + 1) = x0i;
459 data += ((SIZE_T)del << 1);
460
461 *data = x2r;
462 *(data + 1) = x2i;
463 data += ((SIZE_T)del << 1);
464
465 *data = x1r;
466 *(data + 1) = x1i;
467 data += ((SIZE_T)del << 1);
468
469 *data = x3i;
470 *(data + 1) = x3r;
471 data += ((SIZE_T)del << 1);
472 }
473 data = ptr_y + 2;
474
475 sec_loop_cnt = (nodespacing * del);
476 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
477 (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
478 (sec_loop_cnt / 256);
479
480 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
481 w_1 = *(ptr_twiddle + j);
482 w_4 = *(ptr_twiddle + j + 257);
483 w_2 = *(ptr_twiddle + ((SIZE_T)j << 1));
484 w_5 = *(ptr_twiddle + ((SIZE_T)j << 1) + 257);
485 w_3 = *(ptr_twiddle + j + ((SIZE_T)j << 1));
486 w_6 = *(ptr_twiddle + j + ((SIZE_T)j << 1) + 257);
487
488 for (k = in_loop_cnt; k != 0; k--) {
489 FLOAT32 tmp;
490 FLOAT32 x0r1, x0i1, x1r1, x1i1, x2r1, x2i1, x3r1, x3i1;
491 /*x0 is loaded later to avoid register crunch*/
492
493 data += ((SIZE_T)del << 1);
494
495 x1r1 = *data;
496 x1i1 = *(data + 1);
497 data += ((SIZE_T)del << 1);
498
499 x2r1 = *data;
500 x2i1 = *(data + 1);
501 data += ((SIZE_T)del << 1);
502
503 x3r1 = *data;
504 x3i1 = *(data + 1);
505 data -= 3 * (del << 1);
506
507 tmp =
508 (FLOAT32)(ixheaace_dmult((FLOAT64)x1r1, w_1) - ixheaace_dmult((FLOAT64)x1i1, w_4));
509 x1i1 = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1r1, w_4), (FLOAT64)x1i1, w_1);
510 x1r1 = tmp;
511
512 tmp =
513 (FLOAT32)(ixheaace_dmult((FLOAT64)x2r1, w_2) - ixheaace_dmult((FLOAT64)x2i1, w_5));
514 x2i1 = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x2r1, w_5), (FLOAT64)x2i1, w_2);
515 x2r1 = tmp;
516
517 tmp =
518 (FLOAT32)(ixheaace_dmult((FLOAT64)x3r1, w_3) - ixheaace_dmult((FLOAT64)x3i1, w_6));
519 x3i1 = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x3r1, w_6), (FLOAT64)x3i1, w_3);
520 x3r1 = tmp;
521
522 x0r1 = (*data);
523 x0i1 = (*(data + 1));
524
525 x0r1 = x0r1 + (x2r1);
526 x0i1 = x0i1 + (x2i1);
527 x2r1 = x0r1 - (x2r1 * 2);
528 x2i1 = x0i1 - (x2i1 * 2);
529 x1r1 = x1r1 + x3r1;
530 x1i1 = x1i1 + x3i1;
531 x3r1 = x1r1 - (x3r1 * 2);
532 x3i1 = x1i1 - (x3i1 * 2);
533
534 x0r1 = x0r1 + (x1r1);
535 x0i1 = x0i1 + (x1i1);
536 x1r1 = x0r1 - (x1r1 * 2);
537 x1i1 = x0i1 - (x1i1 * 2);
538 x2r1 = x2r1 + (x3i1);
539 x2i1 = x2i1 - (x3r1);
540 x3i1 = x2r1 - (x3i1 * 2);
541 x3r1 = x2i1 + (x3r1 * 2);
542
543 *data = x0r1;
544 *(data + 1) = x0i1;
545 data += ((SIZE_T)del << 1);
546
547 *data = x2r1;
548 *(data + 1) = x2i1;
549 data += ((SIZE_T)del << 1);
550
551 *data = x1r1;
552 *(data + 1) = x1i1;
553 data += ((SIZE_T)del << 1);
554
555 *data = x3i1;
556 *(data + 1) = x3r1;
557 data += ((SIZE_T)del << 1);
558 }
559 data -= 2 * npoints;
560 data += 2;
561 }
562 for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
563 w_1 = *(ptr_twiddle + j);
564 w_4 = *(ptr_twiddle + j + 257);
565 w_2 = *(ptr_twiddle + ((SIZE_T)j << 1));
566 w_5 = *(ptr_twiddle + ((SIZE_T)j << 1) + 257);
567 w_3 = *(ptr_twiddle + j + ((SIZE_T)j << 1) - 256);
568 w_6 = *(ptr_twiddle + j + ((SIZE_T)j << 1) + 1);
569
570 for (k = in_loop_cnt; k != 0; k--) {
571 FLOAT32 tmp;
572 FLOAT32 x0r1, x0i1, x1r1, x1i1, x2r1, x2i1, x3r1, x3i1;
573 /*x0 is loaded later to avoid register crunch*/
574
575 data += ((SIZE_T)del << 1);
576
577 x1r1 = *data;
578 x1i1 = *(data + 1);
579 data += ((SIZE_T)del << 1);
580
581 x2r1 = *data;
582 x2i1 = *(data + 1);
583 data += ((SIZE_T)del << 1);
584
585 x3r1 = *data;
586 x3i1 = *(data + 1);
587 data -= 3 * (del << 1);
588
589 tmp =
590 (FLOAT32)(ixheaace_dmult((FLOAT64)x1r1, w_1) - ixheaace_dmult((FLOAT64)x1i1, w_4));
591 x1i1 = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1r1, w_4), (FLOAT64)x1i1, w_1);
592 x1r1 = tmp;
593
594 tmp =
595 (FLOAT32)(ixheaace_dmult((FLOAT64)x2r1, w_2) - ixheaace_dmult((FLOAT64)x2i1, w_5));
596 x2i1 = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x2r1, w_5), (FLOAT64)x2i1, w_2);
597 x2r1 = tmp;
598
599 tmp =
600 (FLOAT32)(ixheaace_dmult((FLOAT64)x3r1, w_6) + ixheaace_dmult((FLOAT64)x3i1, w_3));
601 x3i1 =
602 (FLOAT32)(-ixheaace_dmult((FLOAT64)x3r1, w_3) + ixheaace_dmult((FLOAT64)x3i1, w_6));
603 x3r1 = tmp;
604
605 x0r1 = (*data);
606 x0i1 = (*(data + 1));
607
608 x0r1 = x0r1 + (x2r1);
609 x0i1 = x0i1 + (x2i1);
610 x2r1 = x0r1 - (x2r1 * 2);
611 x2i1 = x0i1 - (x2i1 * 2);
612 x1r1 = x1r1 + x3r1;
613 x1i1 = x1i1 + x3i1;
614 x3r1 = x1r1 - (x3r1 * 2);
615 x3i1 = x1i1 - (x3i1 * 2);
616
617 x0r1 = x0r1 + (x1r1);
618 x0i1 = x0i1 + (x1i1);
619 x1r1 = x0r1 - (x1r1 * 2);
620 x1i1 = x0i1 - (x1i1 * 2);
621 x2r1 = x2r1 + (x3i1);
622 x2i1 = x2i1 - (x3r1);
623 x3i1 = x2r1 - (x3i1 * 2);
624 x3r1 = x2i1 + (x3r1 * 2);
625
626 *data = x0r1;
627 *(data + 1) = x0i1;
628 data += ((SIZE_T)del << 1);
629
630 *data = x2r1;
631 *(data + 1) = x2i1;
632 data += ((SIZE_T)del << 1);
633
634 *data = x1r1;
635 *(data + 1) = x1i1;
636 data += ((SIZE_T)del << 1);
637
638 *data = x3i1;
639 *(data + 1) = x3r1;
640 data += ((SIZE_T)del << 1);
641 }
642 data -= 2 * npoints;
643 data += 2;
644 }
645 for (; j <= sec_loop_cnt * 2; j += nodespacing) {
646 w_1 = *(ptr_twiddle + j);
647 w_4 = *(ptr_twiddle + j + 257);
648 w_2 = *(ptr_twiddle + ((SIZE_T)j << 1) - 256);
649 w_5 = *(ptr_twiddle + ((SIZE_T)j << 1) + 1);
650 w_3 = *(ptr_twiddle + j + ((SIZE_T)j << 1) - 256);
651 w_6 = *(ptr_twiddle + j + ((SIZE_T)j << 1) + 1);
652
653 for (k = in_loop_cnt; k != 0; k--) {
654 FLOAT32 tmp;
655 FLOAT32 x0r1, x0i1, x1r1, x1i1, x2r1, x2i1, x3r1, x3i1;
656 /*x0 is loaded later to avoid register crunch*/
657
658 data += ((SIZE_T)del << 1);
659
660 x1r1 = *data;
661 x1i1 = *(data + 1);
662 data += ((SIZE_T)del << 1);
663
664 x2r1 = *data;
665 x2i1 = *(data + 1);
666 data += ((SIZE_T)del << 1);
667
668 x3r1 = *data;
669 x3i1 = *(data + 1);
670 data -= 3 * (del << 1);
671
672 tmp =
673 (FLOAT32)(ixheaace_dmult((FLOAT64)x1r1, w_1) - ixheaace_dmult((FLOAT64)x1i1, w_4));
674 x1i1 = (FLOAT32)ixheaace_dmac(ixheaace_dmult(x1r1, w_4), x1i1, w_1);
675 x1r1 = tmp;
676
677 tmp =
678 (FLOAT32)(ixheaace_dmult((FLOAT64)x2r1, w_5) + ixheaace_dmult((FLOAT64)x2i1, w_2));
679 x2i1 = (FLOAT32)(-ixheaace_dmult(x2r1, w_2) + ixheaace_dmult(x2i1, w_5));
680 x2r1 = tmp;
681
682 tmp =
683 (FLOAT32)(ixheaace_dmult((FLOAT64)x3r1, w_6) + ixheaace_dmult((FLOAT64)x3i1, w_3));
684 x3i1 =
685 (FLOAT32)(-ixheaace_dmult((FLOAT64)x3r1, w_3) + ixheaace_dmult((FLOAT64)x3i1, w_6));
686 x3r1 = tmp;
687
688 x0r1 = (*data);
689 x0i1 = (*(data + 1));
690
691 x0r1 = x0r1 + (x2r1);
692 x0i1 = x0i1 + (x2i1);
693 x2r1 = x0r1 - (x2r1 * 2);
694 x2i1 = x0i1 - (x2i1 * 2);
695 x1r1 = x1r1 + x3r1;
696 x1i1 = x1i1 + x3i1;
697 x3r1 = x1r1 - (x3r1 * 2);
698 x3i1 = x1i1 - (x3i1 * 2);
699
700 x0r1 = x0r1 + (x1r1);
701 x0i1 = x0i1 + (x1i1);
702 x1r1 = x0r1 - (x1r1 * 2);
703 x1i1 = x0i1 - (x1i1 * 2);
704 x2r1 = x2r1 + (x3i1);
705 x2i1 = x2i1 - (x3r1);
706 x3i1 = x2r1 - (x3i1 * 2);
707 x3r1 = x2i1 + (x3r1 * 2);
708
709 *data = x0r1;
710 *(data + 1) = x0i1;
711 data += ((SIZE_T)del << 1);
712
713 *data = x2r1;
714 *(data + 1) = x2i1;
715 data += ((SIZE_T)del << 1);
716
717 *data = x1r1;
718 *(data + 1) = x1i1;
719 data += ((SIZE_T)del << 1);
720
721 *data = x3i1;
722 *(data + 1) = x3r1;
723 data += ((SIZE_T)del << 1);
724 }
725 data -= 2 * npoints;
726 data += 2;
727 }
728 for (; j < nodespacing * del; j += nodespacing) {
729 w_1 = *(ptr_twiddle + j);
730 w_4 = *(ptr_twiddle + j + 257);
731 w_2 = *(ptr_twiddle + ((SIZE_T)j << 1) - 256);
732 w_5 = *(ptr_twiddle + ((SIZE_T)j << 1) + 1);
733 w_3 = *(ptr_twiddle + j + ((SIZE_T)j << 1) - 512);
734 w_6 = *(ptr_twiddle + j + ((SIZE_T)j << 1) - 512 + 257);
735
736 for (k = in_loop_cnt; k != 0; k--) {
737 FLOAT32 tmp;
738 FLOAT32 x0r1, x0i1, x1r1, x1i1, x2r1, x2i1, x3r1, x3i1;
739 /*x0 is loaded later to avoid register crunch*/
740
741 data += ((SIZE_T)del << 1);
742
743 x1r1 = *data;
744 x1i1 = *(data + 1);
745 data += ((SIZE_T)del << 1);
746
747 x2r1 = *data;
748 x2i1 = *(data + 1);
749 data += ((SIZE_T)del << 1);
750
751 x3r1 = *data;
752 x3i1 = *(data + 1);
753 data -= 3 * (del << 1);
754
755 tmp =
756 (FLOAT32)(ixheaace_dmult((FLOAT64)x1r1, w_1) - ixheaace_dmult((FLOAT64)x1i1, w_4));
757 x1i1 = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1r1, w_4), (FLOAT64)x1i1, w_1);
758 x1r1 = tmp;
759
760 tmp =
761 (FLOAT32)(ixheaace_dmult((FLOAT64)x2r1, w_5) + ixheaace_dmult((FLOAT64)x2i1, w_2));
762 x2i1 =
763 (FLOAT32)(-ixheaace_dmult((FLOAT64)x2r1, w_2) + ixheaace_dmult((FLOAT64)x2i1, w_5));
764 x2r1 = tmp;
765
766 tmp =
767 (FLOAT32)(-ixheaace_dmult((FLOAT64)x3r1, w_3) + ixheaace_dmult((FLOAT64)x3i1, w_6));
768 x3i1 = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x3r1, w_6), (FLOAT64)x3i1, w_3);
769 x3r1 = tmp;
770
771 x0r1 = (*data);
772 x0i1 = (*(data + 1));
773
774 x0r1 = x0r1 + (x2r1);
775 x0i1 = x0i1 + (x2i1);
776 x2r1 = x0r1 - (x2r1 * 2);
777 x2i1 = x0i1 - (x2i1 * 2);
778 x1r1 = x1r1 + x3r1;
779 x1i1 = x1i1 - x3i1;
780 x3r1 = x1r1 - (x3r1 * 2);
781 x3i1 = x1i1 + (x3i1 * 2);
782
783 x0r1 = x0r1 + (x1r1);
784 x0i1 = x0i1 + (x1i1);
785 x1r1 = x0r1 - (x1r1 * 2);
786 x1i1 = x0i1 - (x1i1 * 2);
787 x2r1 = x2r1 + (x3i1);
788 x2i1 = x2i1 - (x3r1);
789 x3i1 = x2r1 - (x3i1 * 2);
790 x3r1 = x2i1 + (x3r1 * 2);
791
792 *data = x0r1;
793 *(data + 1) = x0i1;
794 data += ((SIZE_T)del << 1);
795
796 *data = x2r1;
797 *(data + 1) = x2i1;
798 data += ((SIZE_T)del << 1);
799
800 *data = x1r1;
801 *(data + 1) = x1i1;
802 data += ((SIZE_T)del << 1);
803
804 *data = x3i1;
805 *(data + 1) = x3r1;
806 data += ((SIZE_T)del << 1);
807 }
808 data -= 2 * npoints;
809 data += 2;
810 }
811 nodespacing >>= 2;
812 del <<= 2;
813 in_loop_cnt >>= 2;
814 }
815 if (not_power_4) {
816 const double *ptr_twiddle = ptr_w;
817 nodespacing <<= 1;
818
819 for (j = del / 2; j != 0; j--) {
820 FLOAT64 w_1 = *ptr_twiddle;
821 FLOAT64 w_4 = *(ptr_twiddle + 257);
822 FLOAT32 tmp;
823 ptr_twiddle += nodespacing;
824
825 x0r = *ptr_y;
826 x0i = *(ptr_y + 1);
827 ptr_y += ((SIZE_T)del << 1);
828
829 x1r = *ptr_y;
830 x1i = *(ptr_y + 1);
831
832 tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_1) - ixheaace_dmult((FLOAT64)x1i, w_4));
833 x1i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1r, w_4), (FLOAT64)x1i, w_1);
834 x1r = tmp;
835
836 *ptr_y = (x0r) - (x1r);
837 *(ptr_y + 1) = (x0i) - (x1i);
838 ptr_y -= ((SIZE_T)del << 1);
839
840 *ptr_y = (x0r) + (x1r);
841 *(ptr_y + 1) = (x0i) + (x1i);
842 ptr_y += 2;
843 }
844 ptr_twiddle = ptr_w;
845 for (j = del / 2; j != 0; j--) {
846 FLOAT64 w_1 = *ptr_twiddle;
847 FLOAT64 w_4 = *(ptr_twiddle + 257);
848 FLOAT32 tmp;
849 ptr_twiddle += nodespacing;
850
851 x0r = *ptr_y;
852 x0i = *(ptr_y + 1);
853 ptr_y += ((SIZE_T)del << 1);
854
855 x1r = *ptr_y;
856 x1i = *(ptr_y + 1);
857
858 tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_4) + ixheaace_dmult((FLOAT64)x1i, w_1));
859 x1i = (FLOAT32)(-ixheaace_dmult((FLOAT64)x1r, w_1) + ixheaace_dmult((FLOAT64)x1i, w_4));
860 x1r = tmp;
861
862 *ptr_y = (x0r) - (x1r);
863 *(ptr_y + 1) = (x0i) - (x1i);
864 ptr_y -= ((SIZE_T)del << 1);
865
866 *ptr_y = (x0r) + (x1r);
867 *(ptr_y + 1) = (x0i) + (x1i);
868 ptr_y += 2;
869 }
870 }
871 }
872
873 /**********************IFFT******************************************/
874
875 else {
876 for (i = 0; i < npoints; i += 4) {
877 FLOAT32 *ptr_inp = ptr_x;
878
879 DIG_REV(i, dig_rev_shift, h2);
880 if (not_power_4) {
881 h2 += 1;
882 h2 &= ~1;
883 }
884 ptr_inp += (h2);
885
886 x0r = *ptr_inp;
887 x0i = *(ptr_inp + 1);
888 ptr_inp += (npoints >> 1);
889
890 x1r = *ptr_inp;
891 x1i = *(ptr_inp + 1);
892 ptr_inp += (npoints >> 1);
893
894 x2r = *ptr_inp;
895 x2i = *(ptr_inp + 1);
896 ptr_inp += (npoints >> 1);
897
898 x3r = *ptr_inp;
899 x3i = *(ptr_inp + 1);
900
901 x0r = x0r + x2r;
902 x0i = x0i + x2i;
903 x2r = x0r - (x2r * 2);
904 x2i = x0i - (x2i * 2);
905 x1r = x1r + x3r;
906 x1i = x1i + x3i;
907 x3r = x1r - (x3r * 2);
908 x3i = x1i - (x3i * 2);
909
910 x0r = x0r + x1r;
911 x0i = x0i + x1i;
912 x1r = x0r - (x1r * 2);
913 x1i = x0i - (x1i * 2);
914 x2r = x2r - x3i;
915 x2i = x2i + x3r;
916 x3i = x2r + (x3i * 2);
917 x3r = x2i - (x3r * 2);
918
919 *ptr_y++ = x0r;
920 *ptr_y++ = x0i;
921 *ptr_y++ = x2r;
922 *ptr_y++ = x2i;
923 *ptr_y++ = x1r;
924 *ptr_y++ = x1i;
925 *ptr_y++ = x3i;
926 *ptr_y++ = x3r;
927 }
928 ptr_y -= 2 * npoints;
929 del = 4;
930 nodespacing = 64;
931 in_loop_cnt = npoints >> 4;
932 for (i = n_stages - 1; i > 0; i--) {
933 const double *ptr_twiddle = ptr_w;
934 float *data = ptr_y;
935 double w_1, w_2, w_3, w_4, w_5, w_6;
936 int sec_loop_cnt;
937
938 for (k = in_loop_cnt; k != 0; k--) {
939 x0r = (*data);
940 x0i = (*(data + 1));
941 data += ((SIZE_T)del << 1);
942
943 x1r = (*data);
944 x1i = (*(data + 1));
945 data += ((SIZE_T)del << 1);
946
947 x2r = (*data);
948 x2i = (*(data + 1));
949 data += ((SIZE_T)del << 1);
950
951 x3r = (*data);
952 x3i = (*(data + 1));
953 data -= 3 * (del << 1);
954
955 x0r = x0r + x2r;
956 x0i = x0i + x2i;
957 x2r = x0r - (x2r * 2);
958 x2i = x0i - (x2i * 2);
959 x1r = x1r + x3r;
960 x1i = x1i + x3i;
961 x3r = x1r - (x3r * 2);
962 x3i = x1i - (x3i * 2);
963
964 x0r = x0r + x1r;
965 x0i = x0i + x1i;
966 x1r = x0r - (x1r * 2);
967 x1i = x0i - (x1i * 2);
968 x2r = x2r - x3i;
969 x2i = x2i + x3r;
970 x3i = x2r + (x3i * 2);
971 x3r = x2i - (x3r * 2);
972
973 *data = x0r;
974 *(data + 1) = x0i;
975 data += ((SIZE_T)del << 1);
976
977 *data = x2r;
978 *(data + 1) = x2i;
979 data += ((SIZE_T)del << 1);
980
981 *data = x1r;
982 *(data + 1) = x1i;
983 data += ((SIZE_T)del << 1);
984
985 *data = x3i;
986 *(data + 1) = x3r;
987 data += ((SIZE_T)del << 1);
988 }
989 data = ptr_y + 2;
990
991 sec_loop_cnt = (nodespacing * del);
992 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
993 (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
994 (sec_loop_cnt / 256);
995
996 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
997 w_1 = *(ptr_twiddle + j);
998 w_4 = *(ptr_twiddle + j + 257);
999 w_2 = *(ptr_twiddle + ((SIZE_T)j << 1));
1000 w_5 = *(ptr_twiddle + ((SIZE_T)j << 1) + 257);
1001 w_3 = *(ptr_twiddle + j + ((SIZE_T)j << 1));
1002 w_6 = *(ptr_twiddle + j + ((SIZE_T)j << 1) + 257);
1003
1004 for (k = in_loop_cnt; k != 0; k--) {
1005 FLOAT32 tmp;
1006 FLOAT32 x0r1, x0i1, x1r1, x1i1, x2r1, x2i1, x3r1, x3i1;
1007 /*x0 is loaded later to avoid register crunch*/
1008
1009 data += ((SIZE_T)del << 1);
1010
1011 x1r1 = *data;
1012 x1i1 = *(data + 1);
1013 data += ((SIZE_T)del << 1);
1014
1015 x2r1 = *data;
1016 x2i1 = *(data + 1);
1017 data += ((SIZE_T)del << 1);
1018
1019 x3r1 = *data;
1020 x3i1 = *(data + 1);
1021 data -= 3 * (del << 1);
1022
1023 tmp = (FLOAT32)(((FLOAT64)x1r1 * w_1) + ((FLOAT64)x1i1 * w_4));
1024 x1i1 = (FLOAT32)(-((FLOAT64)x1r1 * w_4) + (FLOAT64)x1i1 * w_1);
1025 x1r1 = tmp;
1026
1027 tmp = (FLOAT32)(((FLOAT64)x2r1 * w_2) + ((FLOAT64)x2i1 * w_5));
1028 x2i1 = (FLOAT32)(-((FLOAT64)x2r1 * w_5) + (FLOAT64)x2i1 * w_2);
1029 x2r1 = tmp;
1030
1031 tmp = (FLOAT32)(((FLOAT64)x3r1 * w_3) + ((FLOAT64)x3i1 * w_6));
1032 x3i1 = (FLOAT32)(-((FLOAT64)x3r1 * w_6) + (FLOAT64)x3i1 * w_3);
1033 x3r1 = tmp;
1034
1035 x0r1 = (*data);
1036 x0i1 = (*(data + 1));
1037
1038 x0r1 = x0r1 + (x2r1);
1039 x0i1 = x0i1 + (x2i1);
1040 x2r1 = x0r1 - (x2r1 * 2);
1041 x2i1 = x0i1 - (x2i1 * 2);
1042 x1r1 = x1r1 + x3r1;
1043 x1i1 = x1i1 + x3i1;
1044 x3r1 = x1r1 - (x3r1 * 2);
1045 x3i1 = x1i1 - (x3i1 * 2);
1046
1047 x0r1 = x0r1 + (x1r1);
1048 x0i1 = x0i1 + (x1i1);
1049 x1r1 = x0r1 - (x1r1 * 2);
1050 x1i1 = x0i1 - (x1i1 * 2);
1051 x2r1 = x2r1 - (x3i1);
1052 x2i1 = x2i1 + (x3r1);
1053 x3i1 = x2r1 + (x3i1 * 2);
1054 x3r1 = x2i1 - (x3r1 * 2);
1055
1056 *data = x0r1;
1057 *(data + 1) = x0i1;
1058 data += ((SIZE_T)del << 1);
1059
1060 *data = x2r1;
1061 *(data + 1) = x2i1;
1062 data += ((SIZE_T)del << 1);
1063
1064 *data = x1r1;
1065 *(data + 1) = x1i1;
1066 data += ((SIZE_T)del << 1);
1067
1068 *data = x3i1;
1069 *(data + 1) = x3r1;
1070 data += ((SIZE_T)del << 1);
1071 }
1072 data -= 2 * npoints;
1073 data += 2;
1074 }
1075 for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
1076 w_1 = *(ptr_twiddle + j);
1077 w_4 = *(ptr_twiddle + j + 257);
1078 w_2 = *(ptr_twiddle + ((SIZE_T)j << 1));
1079 w_5 = *(ptr_twiddle + ((SIZE_T)j << 1) + 257);
1080 w_3 = *(ptr_twiddle + j + ((SIZE_T)j << 1) - 256);
1081 w_6 = *(ptr_twiddle + j + ((SIZE_T)j << 1) + 1);
1082
1083 for (k = in_loop_cnt; k != 0; k--) {
1084 FLOAT32 tmp;
1085 FLOAT32 x0r1, x0i1, x1r1, x1i1, x2r1, x2i1, x3r1, x3i1;
1086 /*x0 is loaded later to avoid register crunch*/
1087
1088 data += ((SIZE_T)del << 1);
1089
1090 x1r1 = *data;
1091 x1i1 = *(data + 1);
1092 data += ((SIZE_T)del << 1);
1093
1094 x2r1 = *data;
1095 x2i1 = *(data + 1);
1096 data += ((SIZE_T)del << 1);
1097
1098 x3r1 = *data;
1099 x3i1 = *(data + 1);
1100 data -= 3 * (del << 1);
1101
1102 tmp = (FLOAT32)(((FLOAT64)x1r1 * w_1) + ((FLOAT64)x1i1 * w_4));
1103 x1i1 = (FLOAT32)(-((FLOAT64)x1r1 * w_4) + (FLOAT64)x1i1 * w_1);
1104 x1r1 = tmp;
1105
1106 tmp = (FLOAT32)(((FLOAT64)x2r1 * w_2) + ((FLOAT64)x2i1 * w_5));
1107 x2i1 = (FLOAT32)(-((FLOAT64)x2r1 * w_5) + (FLOAT64)x2i1 * w_2);
1108 x2r1 = tmp;
1109
1110 tmp = (FLOAT32)(((FLOAT64)x3r1 * w_6) - ((FLOAT64)x3i1 * w_3));
1111 x3i1 = (FLOAT32)(((FLOAT64)x3r1 * w_3) + ((FLOAT64)x3i1 * w_6));
1112 x3r1 = tmp;
1113
1114 x0r1 = (*data);
1115 x0i1 = (*(data + 1));
1116
1117 x0r1 = x0r1 + (x2r1);
1118 x0i1 = x0i1 + (x2i1);
1119 x2r1 = x0r1 - (x2r1 * 2);
1120 x2i1 = x0i1 - (x2i1 * 2);
1121 x1r1 = x1r1 + x3r1;
1122 x1i1 = x1i1 + x3i1;
1123 x3r1 = x1r1 - (x3r1 * 2);
1124 x3i1 = x1i1 - (x3i1 * 2);
1125
1126 x0r1 = x0r1 + (x1r1);
1127 x0i1 = x0i1 + (x1i1);
1128 x1r1 = x0r1 - (x1r1 * 2);
1129 x1i1 = x0i1 - (x1i1 * 2);
1130 x2r1 = x2r1 - (x3i1);
1131 x2i1 = x2i1 + (x3r1);
1132 x3i1 = x2r1 + (x3i1 * 2);
1133 x3r1 = x2i1 - (x3r1 * 2);
1134
1135 *data = x0r1;
1136 *(data + 1) = x0i1;
1137 data += ((SIZE_T)del << 1);
1138
1139 *data = x2r1;
1140 *(data + 1) = x2i1;
1141 data += ((SIZE_T)del << 1);
1142
1143 *data = x1r1;
1144 *(data + 1) = x1i1;
1145 data += ((SIZE_T)del << 1);
1146
1147 *data = x3i1;
1148 *(data + 1) = x3r1;
1149 data += ((SIZE_T)del << 1);
1150 }
1151 data -= 2 * npoints;
1152 data += 2;
1153 }
1154 for (; j <= sec_loop_cnt * 2; j += nodespacing) {
1155 w_1 = *(ptr_twiddle + j);
1156 w_4 = *(ptr_twiddle + j + 257);
1157 w_2 = *(ptr_twiddle + ((SIZE_T)j << 1) - 256);
1158 w_5 = *(ptr_twiddle + ((SIZE_T)j << 1) + 1);
1159 w_3 = *(ptr_twiddle + j + ((SIZE_T)j << 1) - 256);
1160 w_6 = *(ptr_twiddle + j + ((SIZE_T)j << 1) + 1);
1161
1162 for (k = in_loop_cnt; k != 0; k--) {
1163 FLOAT32 tmp;
1164 FLOAT32 x0r1, x0i1, x1r1, x1i1, x2r1, x2i1, x3r1, x3i1;
1165 /*x0 is loaded later to avoid register crunch*/
1166
1167 data += ((SIZE_T)del << 1);
1168
1169 x1r1 = *data;
1170 x1i1 = *(data + 1);
1171 data += ((SIZE_T)del << 1);
1172
1173 x2r1 = *data;
1174 x2i1 = *(data + 1);
1175 data += ((SIZE_T)del << 1);
1176
1177 x3r1 = *data;
1178 x3i1 = *(data + 1);
1179 data -= 3 * (del << 1);
1180
1181 tmp = (FLOAT32)(((FLOAT64)x1r1 * w_1) + ((FLOAT64)x1i1 * w_4));
1182 x1i1 = (FLOAT32)(-((FLOAT64)x1r1 * w_4) + (FLOAT64)x1i1 * w_1);
1183 x1r1 = tmp;
1184
1185 tmp = (FLOAT32)(((FLOAT64)x2r1 * w_5) - ((FLOAT64)x2i1 * w_2));
1186 x2i1 = (FLOAT32)(((FLOAT64)x2r1 * w_2) + ((FLOAT64)x2i1 * w_5));
1187 x2r1 = tmp;
1188
1189 tmp = (FLOAT32)(((FLOAT64)x3r1 * w_6) - ((FLOAT64)x3i1 * w_3));
1190 x3i1 = (FLOAT32)(((FLOAT64)x3r1 * w_3) + ((FLOAT64)x3i1 * w_6));
1191 x3r1 = tmp;
1192
1193 x0r1 = (*data);
1194 x0i1 = (*(data + 1));
1195
1196 x0r1 = x0r1 + (x2r1);
1197 x0i1 = x0i1 + (x2i1);
1198 x2r1 = x0r1 - (x2r1 * 2);
1199 x2i1 = x0i1 - (x2i1 * 2);
1200 x1r1 = x1r1 + x3r1;
1201 x1i1 = x1i1 + x3i1;
1202 x3r1 = x1r1 - (x3r1 * 2);
1203 x3i1 = x1i1 - (x3i1 * 2);
1204
1205 x0r1 = x0r1 + (x1r1);
1206 x0i1 = x0i1 + (x1i1);
1207 x1r1 = x0r1 - (x1r1 * 2);
1208 x1i1 = x0i1 - (x1i1 * 2);
1209 x2r1 = x2r1 - (x3i1);
1210 x2i1 = x2i1 + (x3r1);
1211 x3i1 = x2r1 + (x3i1 * 2);
1212 x3r1 = x2i1 - (x3r1 * 2);
1213
1214 *data = x0r1;
1215 *(data + 1) = x0i1;
1216 data += ((SIZE_T)del << 1);
1217
1218 *data = x2r1;
1219 *(data + 1) = x2i1;
1220 data += ((SIZE_T)del << 1);
1221
1222 *data = x1r1;
1223 *(data + 1) = x1i1;
1224 data += ((SIZE_T)del << 1);
1225
1226 *data = x3i1;
1227 *(data + 1) = x3r1;
1228 data += ((SIZE_T)del << 1);
1229 }
1230 data -= 2 * npoints;
1231 data += 2;
1232 }
1233 for (; j < nodespacing * del; j += nodespacing) {
1234 w_1 = *(ptr_twiddle + j);
1235 w_4 = *(ptr_twiddle + j + 257);
1236 w_2 = *(ptr_twiddle + ((SIZE_T)j << 1) - 256);
1237 w_5 = *(ptr_twiddle + ((SIZE_T)j << 1) + 1);
1238 w_3 = *(ptr_twiddle + j + ((SIZE_T)j << 1) - 512);
1239 w_6 = *(ptr_twiddle + j + ((SIZE_T)j << 1) - 512 + 257);
1240
1241 for (k = in_loop_cnt; k != 0; k--) {
1242 FLOAT32 tmp;
1243 FLOAT32 x0r1, x0i1, x1r1, x1i1, x2r1, x2i1, x3r1, x3i1;
1244 /*x0 is loaded later to avoid register crunch*/
1245
1246 data += ((SIZE_T)del << 1);
1247
1248 x1r1 = *data;
1249 x1i1 = *(data + 1);
1250 data += ((SIZE_T)del << 1);
1251
1252 x2r1 = *data;
1253 x2i1 = *(data + 1);
1254 data += ((SIZE_T)del << 1);
1255
1256 x3r1 = *data;
1257 x3i1 = *(data + 1);
1258 data -= 3 * (del << 1);
1259
1260 tmp = (FLOAT32)(((FLOAT64)x1r1 * w_1) + ((FLOAT64)x1i1 * w_4));
1261 x1i1 = (FLOAT32)(-((FLOAT64)x1r1 * w_4) + (FLOAT64)x1i1 * w_1);
1262 x1r1 = tmp;
1263
1264 tmp = (FLOAT32)(((FLOAT64)x2r1 * w_5) - ((FLOAT64)x2i1 * w_2));
1265 x2i1 = (FLOAT32)(((FLOAT64)x2r1 * w_2) + ((FLOAT64)x2i1 * w_5));
1266 x2r1 = tmp;
1267
1268 tmp = (FLOAT32)(-((FLOAT64)x3r1 * w_3) - ((FLOAT64)x3i1 * w_6));
1269 x3i1 = (FLOAT32)(-((FLOAT64)x3r1 * w_6) + (FLOAT64)x3i1 * w_3);
1270 x3r1 = tmp;
1271
1272 x0r1 = (*data);
1273 x0i1 = (*(data + 1));
1274
1275 x0r1 = x0r1 + (x2r1);
1276 x0i1 = x0i1 + (x2i1);
1277 x2r1 = x0r1 - (x2r1 * 2);
1278 x2i1 = x0i1 - (x2i1 * 2);
1279 x1r1 = x1r1 + x3r1;
1280 x1i1 = x1i1 - x3i1;
1281 x3r1 = x1r1 - (x3r1 * 2);
1282 x3i1 = x1i1 + (x3i1 * 2);
1283
1284 x0r1 = x0r1 + (x1r1);
1285 x0i1 = x0i1 + (x1i1);
1286 x1r1 = x0r1 - (x1r1 * 2);
1287 x1i1 = x0i1 - (x1i1 * 2);
1288 x2r1 = x2r1 - (x3i1);
1289 x2i1 = x2i1 + (x3r1);
1290 x3i1 = x2r1 + (x3i1 * 2);
1291 x3r1 = x2i1 - (x3r1 * 2);
1292
1293 *data = x0r1;
1294 *(data + 1) = x0i1;
1295 data += ((SIZE_T)del << 1);
1296
1297 *data = x2r1;
1298 *(data + 1) = x2i1;
1299 data += ((SIZE_T)del << 1);
1300
1301 *data = x1r1;
1302 *(data + 1) = x1i1;
1303 data += ((SIZE_T)del << 1);
1304
1305 *data = x3i1;
1306 *(data + 1) = x3r1;
1307 data += ((SIZE_T)del << 1);
1308 }
1309 data -= 2 * npoints;
1310 data += 2;
1311 }
1312 nodespacing >>= 2;
1313 del <<= 2;
1314 in_loop_cnt >>= 2;
1315 }
1316
1317 if (not_power_4) {
1318 const FLOAT64 *ptr_twiddle = ptr_w;
1319 nodespacing <<= 1;
1320
1321 for (j = del / 2; j != 0; j--) {
1322 FLOAT64 w_1 = *ptr_twiddle;
1323 FLOAT64 w_4 = *(ptr_twiddle + 257);
1324 FLOAT32 tmp;
1325 ptr_twiddle += nodespacing;
1326
1327 x0r = *ptr_y;
1328 x0i = *(ptr_y + 1);
1329 ptr_y += ((SIZE_T)del << 1);
1330
1331 x1r = *ptr_y;
1332 x1i = *(ptr_y + 1);
1333
1334 tmp = (FLOAT32)(((FLOAT64)x1r * w_1) + ((FLOAT64)x1i * w_4));
1335 x1i = (FLOAT32)(-((FLOAT64)x1r * w_4) + (FLOAT64)x1i * w_1);
1336 x1r = tmp;
1337
1338 *ptr_y = (x0r) - (x1r);
1339 *(ptr_y + 1) = (x0i) - (x1i);
1340 ptr_y -= ((SIZE_T)del << 1);
1341
1342 *ptr_y = (x0r) + (x1r);
1343 *(ptr_y + 1) = (x0i) + (x1i);
1344 ptr_y += 2;
1345 }
1346 ptr_twiddle = ptr_w;
1347 for (j = del / 2; j != 0; j--) {
1348 FLOAT64 w_1 = *ptr_twiddle;
1349 FLOAT64 w_4 = *(ptr_twiddle + 257);
1350 FLOAT32 tmp;
1351 ptr_twiddle += nodespacing;
1352
1353 x0r = *ptr_y;
1354 x0i = *(ptr_y + 1);
1355 ptr_y += ((SIZE_T)del << 1);
1356
1357 x1r = *ptr_y;
1358 x1i = *(ptr_y + 1);
1359
1360 tmp = (FLOAT32)(((FLOAT64)x1r * w_4) - ((FLOAT64)x1i * w_1));
1361 x1i = (FLOAT32)(((FLOAT64)x1r * w_1) + ((FLOAT64)x1i * w_4));
1362 x1r = tmp;
1363
1364 *ptr_y = (x0r) - (x1r);
1365 *(ptr_y + 1) = (x0i) - (x1i);
1366 ptr_y -= ((SIZE_T)del << 1);
1367
1368 *ptr_y = (x0r) + (x1r);
1369 *(ptr_y + 1) = (x0i) + (x1i);
1370 ptr_y += 2;
1371 }
1372 }
1373 }
1374
1375 for (i = 0; i < n_pass; i++) {
1376 re[2 * i + 0] = y[2 * i + 0];
1377 re[2 * i + 1] = y[2 * i + 1];
1378 }
1379 }
1380
ixheaace_hbe_apply_cfftn_gen(FLOAT32 in[],FLOAT32 * ptr_scratch,WORD32 n_pass,WORD32 i_sign)1381 VOID ixheaace_hbe_apply_cfftn_gen(FLOAT32 in[], FLOAT32 *ptr_scratch, WORD32 n_pass,
1382 WORD32 i_sign) {
1383 WORD32 i, j;
1384 WORD32 m_points = n_pass;
1385 FLOAT32 *y, *re_3;
1386 FLOAT32 *ptr_x, *ptr_y;
1387 ptr_x = ptr_scratch;
1388 ptr_scratch += 2 * m_points;
1389 ptr_y = y = ptr_scratch;
1390 ptr_scratch += 4 * m_points;
1391 re_3 = ptr_scratch;
1392 ptr_scratch += 2 * m_points;
1393 WORD32 cnfac;
1394 WORD32 mpass = n_pass;
1395
1396 cnfac = 0;
1397 while (mpass % 3 == 0) {
1398 mpass /= 3;
1399 cnfac++;
1400 }
1401
1402 for (i = 0; i < 3 * cnfac; i++) {
1403 for (j = 0; j < mpass; j++) {
1404 re_3[2 * j + 0] = in[6 * j + 2 * i + 0];
1405 re_3[2 * j + 1] = in[6 * j + 2 * i + 1];
1406 }
1407
1408 ixheaace_hbe_apply_cfftn(re_3, ptr_scratch, mpass, i_sign);
1409
1410 for (j = 0; j < mpass; j++) {
1411 in[6 * j + 2 * i + 0] = re_3[2 * j + 0];
1412 in[6 * j + 2 * i + 1] = re_3[2 * j + 1];
1413 }
1414 }
1415
1416 {
1417 FLOAT64 *ptr_w1r, *ptr_w1i;
1418 FLOAT32 tmp;
1419 ptr_w1r = (FLOAT64 *)ixheaac_twid_tbl_fft_ntwt3r;
1420 ptr_w1i = (FLOAT64 *)ixheaac_twid_tbl_fft_ntwt3i;
1421
1422 if (i_sign < 0) {
1423 i = 0;
1424 while (i < n_pass) {
1425 tmp =
1426 (FLOAT32)((FLOAT64)in[2 * i + 0] * (*ptr_w1r) - (FLOAT64)in[2 * i + 1] * (*ptr_w1i));
1427 in[2 * i + 1] =
1428 (FLOAT32)((FLOAT64)in[2 * i + 0] * (*ptr_w1i) + (FLOAT64)in[2 * i + 1] * (*ptr_w1r));
1429 in[2 * i + 0] = tmp;
1430
1431 ptr_w1r++;
1432 ptr_w1i++;
1433
1434 tmp =
1435 (FLOAT32)((FLOAT64)in[2 * i + 2] * (*ptr_w1r) - (FLOAT64)in[2 * i + 3] * (*ptr_w1i));
1436 in[2 * i + 3] =
1437 (FLOAT32)((FLOAT64)in[2 * i + 2] * (*ptr_w1i) + (FLOAT64)in[2 * i + 3] * (*ptr_w1r));
1438 in[2 * i + 2] = tmp;
1439
1440 ptr_w1r++;
1441 ptr_w1i++;
1442
1443 tmp =
1444 (FLOAT32)((FLOAT64)in[2 * i + 4] * (*ptr_w1r) - (FLOAT64)in[2 * i + 5] * (*ptr_w1i));
1445 in[2 * i + 5] =
1446 (FLOAT32)((FLOAT64)in[2 * i + 4] * (*ptr_w1i) + (FLOAT64)in[2 * i + 5] * (*ptr_w1r));
1447 in[2 * i + 4] = tmp;
1448
1449 ptr_w1r += 3 * (128 / mpass - 1) + 1;
1450 ptr_w1i += 3 * (128 / mpass - 1) + 1;
1451 i += 3;
1452 }
1453 }
1454
1455 else {
1456 i = 0;
1457 while (i < n_pass) {
1458 tmp =
1459 (FLOAT32)((FLOAT64)in[2 * i + 0] * (*ptr_w1r) + (FLOAT64)in[2 * i + 1] * (*ptr_w1i));
1460 in[2 * i + 1] =
1461 (FLOAT32)(-(FLOAT64)in[2 * i + 0] * (*ptr_w1i) + (FLOAT64)in[2 * i + 1] * (*ptr_w1r));
1462 in[2 * i + 0] = tmp;
1463
1464 ptr_w1r++;
1465 ptr_w1i++;
1466
1467 tmp =
1468 (FLOAT32)((FLOAT64)in[2 * i + 2] * (*ptr_w1r) + (FLOAT64)in[2 * i + 3] * (*ptr_w1i));
1469 in[2 * i + 3] =
1470 (FLOAT32)(-(FLOAT64)in[2 * i + 2] * (*ptr_w1i) + (FLOAT64)in[2 * i + 3] * (*ptr_w1r));
1471 in[2 * i + 2] = tmp;
1472
1473 ptr_w1r++;
1474 ptr_w1i++;
1475
1476 tmp =
1477 (FLOAT32)((FLOAT64)in[2 * i + 4] * (*ptr_w1r) + (FLOAT64)in[2 * i + 5] * (*ptr_w1i));
1478 in[2 * i + 5] =
1479 (FLOAT32)(-(FLOAT64)in[2 * i + 4] * (*ptr_w1i) + (FLOAT64)in[2 * i + 5] * (*ptr_w1r));
1480 in[2 * i + 4] = tmp;
1481
1482 ptr_w1r += 3 * (128 / mpass - 1) + 1;
1483 ptr_w1i += 3 * (128 / mpass - 1) + 1;
1484 i += 3;
1485 }
1486 }
1487 }
1488
1489 for (i = 0; i < n_pass; i++) {
1490 ptr_x[2 * i + 0] = in[2 * i + 0];
1491 ptr_x[2 * i + 1] = in[2 * i + 1];
1492 }
1493 for (i = 0; i < mpass; i++) {
1494 ixheaace_hbe_apply_fft_3(ptr_x, ptr_y, i_sign);
1495
1496 ptr_x = ptr_x + 6;
1497 ptr_y = ptr_y + 6;
1498 }
1499
1500 for (i = 0; i < mpass; i++) {
1501 in[2 * i + 0] = y[6 * i + 0];
1502 in[2 * i + 1] = y[6 * i + 1];
1503 }
1504
1505 for (i = 0; i < mpass; i++) {
1506 in[2 * mpass + 2 * i + 0] = y[6 * i + 2];
1507 in[2 * mpass + 2 * i + 1] = y[6 * i + 3];
1508 }
1509
1510 for (i = 0; i < mpass; i++) {
1511 in[4 * mpass + 2 * i + 0] = y[6 * i + 4];
1512 in[4 * mpass + 2 * i + 1] = y[6 * i + 5];
1513 }
1514 }
1515
ixheaace_hbe_apply_fft_288(FLOAT32 * ptr_inp,FLOAT32 * ptr_scratch,WORD32 len,WORD32 i_sign)1516 VOID ixheaace_hbe_apply_fft_288(FLOAT32 *ptr_inp, FLOAT32 *ptr_scratch, WORD32 len,
1517 WORD32 i_sign) {
1518 /* Dividing the 288-point FFT into 96x3 i.e nx3*/
1519 FLOAT32 *ptr_op = ptr_scratch;
1520 WORD32 mpoints = len / 96;
1521 WORD32 fpoints = len / 3;
1522 WORD32 ii, jj;
1523 ptr_scratch += 2 * len;
1524
1525 for (ii = 0; ii < mpoints; ii++) {
1526 for (jj = 0; jj < fpoints; jj++) {
1527 ptr_op[2 * jj + 0] = ptr_inp[2 * mpoints * jj + 2 * ii];
1528 ptr_op[2 * jj + 1] = ptr_inp[2 * mpoints * jj + 2 * ii + 1];
1529 }
1530
1531 /* 96-point (32x3-point) of FFT */
1532 if (fpoints & (fpoints - 1))
1533 ixheaace_hbe_apply_cfftn_gen(ptr_op, ptr_scratch, fpoints, i_sign);
1534 else
1535 ixheaace_hbe_apply_cfftn(ptr_op, ptr_scratch, fpoints, i_sign);
1536
1537 for (jj = 0; jj < fpoints; jj++) {
1538 ptr_inp[mpoints * 2 * jj + 2 * ii + 0] = ptr_op[2 * jj + 0];
1539 ptr_inp[mpoints * 2 * jj + 2 * ii + 1] = ptr_op[2 * jj + 1];
1540 }
1541 }
1542
1543 /* Multiplication FFT with twiddle table */
1544 ixheaace_hbe_apply_tw_mult_fft(ptr_inp, ptr_op, fpoints, mpoints, ixheaac_twid_tbl_fft_288);
1545
1546 for (ii = 0; ii < fpoints; ii++) {
1547 /* 3-point of FFT */
1548 ixheaace_hbe_apply_fft_3(ptr_op, ptr_scratch, i_sign);
1549 ptr_op = ptr_op + (mpoints * 2);
1550 ptr_scratch = ptr_scratch + (mpoints * 2);
1551 }
1552
1553 ptr_scratch -= fpoints * mpoints * 2;
1554
1555 for (jj = 0; jj < fpoints; jj++) {
1556 ptr_inp[2 * jj + 0] = ptr_scratch[6 * jj];
1557 ptr_inp[2 * jj + 1] = ptr_scratch[6 * jj + 1];
1558 }
1559 for (jj = 0; jj < fpoints; jj++) {
1560 ptr_inp[2 * fpoints + 2 * jj + 0] = ptr_scratch[6 * jj + 2];
1561 ptr_inp[2 * fpoints + 2 * jj + 1] = ptr_scratch[6 * jj + 3];
1562 }
1563 for (jj = 0; jj < fpoints; jj++) {
1564 ptr_inp[4 * fpoints + 2 * jj + 0] = ptr_scratch[6 * jj + 4];
1565 ptr_inp[4 * fpoints + 2 * jj + 1] = ptr_scratch[6 * jj + 5];
1566 }
1567 }
1568
ixheaace_hbe_apply_ifft_224(FLOAT32 * ptr_inp,FLOAT32 * ptr_scratch,WORD32 len,WORD32 i_sign)1569 VOID ixheaace_hbe_apply_ifft_224(FLOAT32 *ptr_inp, FLOAT32 *ptr_scratch, WORD32 len,
1570 WORD32 i_sign) {
1571 /* Dividing 224-point IFFT into 32x7 */
1572 WORD32 mpoints = len / 32;
1573 WORD32 fpoints = len / 7;
1574 WORD32 ii, jj;
1575 FLOAT32 *ptr_op = ptr_scratch;
1576 ptr_scratch += 2 * len;
1577
1578 for (ii = 0; ii < mpoints; ii++) {
1579 for (jj = 0; jj < fpoints; jj++) {
1580 ptr_op[2 * jj + 0] = ptr_inp[2 * mpoints * jj + 2 * ii];
1581 ptr_op[2 * jj + 1] = ptr_inp[2 * mpoints * jj + 2 * ii + 1];
1582 }
1583
1584 /* 32-point of IFFT*/
1585 if (fpoints & (fpoints - 1))
1586 ixheaace_hbe_apply_cfftn_gen(ptr_op, ptr_scratch, fpoints, i_sign);
1587 else
1588 ixheaace_hbe_apply_cfftn(ptr_op, ptr_scratch, fpoints, i_sign);
1589
1590 for (jj = 0; jj < fpoints; jj++) {
1591 ptr_inp[mpoints * 2 * jj + 2 * ii + 0] = ptr_op[2 * jj + 0];
1592 ptr_inp[mpoints * 2 * jj + 2 * ii + 1] = ptr_op[2 * jj + 1];
1593 }
1594 }
1595
1596 /* Multiplication IFFT with twiddle table */
1597 ixheaace_hbe_apply_tw_mult_ifft(ptr_inp, ptr_op, fpoints, mpoints, ixheaac_twid_tbl_fft_224);
1598
1599 for (ii = 0; ii < fpoints; ii++) {
1600 /* 7-point of IFFT */
1601 ixheaace_hbe_apply_ifft_7(ptr_op, ptr_scratch);
1602 ptr_scratch += (mpoints * 2);
1603 ptr_op += (mpoints * 2);
1604 }
1605
1606 ptr_scratch -= fpoints * mpoints * 2;
1607
1608 for (jj = 0; jj < fpoints; jj++) {
1609 for (ii = 0; ii < mpoints; ii++) {
1610 ptr_inp[fpoints * ii * 2 + 2 * jj + 0] = ptr_scratch[mpoints * jj * 2 + 2 * ii + 0];
1611 ptr_inp[fpoints * ii * 2 + 2 * jj + 1] = ptr_scratch[mpoints * jj * 2 + 2 * ii + 1];
1612 }
1613 }
1614 }
1615
ixheaace_hbe_apply_ifft_336(FLOAT32 * ptr_inp,FLOAT32 * ptr_scratch,WORD32 len,WORD32 i_sign)1616 VOID ixheaace_hbe_apply_ifft_336(FLOAT32 *ptr_inp, FLOAT32 *ptr_scratch, WORD32 len,
1617 WORD32 i_sign) {
1618 WORD32 i, j;
1619 WORD32 m_points = len / 7;
1620 WORD32 n_points = len / 48;
1621 FLOAT32 *ptr_real, *ptr_imag, *ptr_real_1, *ptr_scratch_local;
1622 ptr_real = ptr_scratch;
1623 ptr_scratch += 2 * len;
1624 ptr_imag = ptr_scratch;
1625 ptr_scratch += len;
1626 ptr_scratch_local = ptr_scratch;
1627 ptr_scratch += len;
1628 ptr_real_1 = ptr_scratch;
1629 ptr_scratch += len;
1630
1631 for (i = 0; i < len; i++) {
1632 ptr_real[i] = ptr_inp[2 * i + 0];
1633 ptr_imag[i] = ptr_inp[2 * i + 1];
1634 }
1635
1636 for (i = 0; i < m_points; i++) {
1637 for (j = 0; j < n_points; j++) {
1638 ptr_real_1[2 * j + 0] = ptr_inp[m_points * 2 * j + 2 * i + 0];
1639 ptr_real_1[2 * j + 1] = ptr_inp[m_points * 2 * j + 2 * i + 1];
1640 }
1641
1642 ixheaace_hbe_apply_ifft_7(ptr_real_1, ptr_scratch);
1643
1644 for (j = 0; j < n_points; j++) {
1645 ptr_inp[m_points * 2 * j + 2 * i + 0] = ptr_scratch[2 * j + 0];
1646 ptr_inp[m_points * 2 * j + 2 * i + 1] = ptr_scratch[2 * j + 1];
1647 }
1648 }
1649
1650 switch (m_points) {
1651 case 48:
1652 ixheaace_hbe_apply_tw_mult_ifft(ptr_inp, ptr_scratch_local, n_points, m_points,
1653 ixheaac_twid_tbl_fft_336);
1654 break;
1655
1656 default:
1657 ixheaace_hbe_apply_tw_mult_ifft(ptr_inp, ptr_scratch_local, n_points, m_points,
1658 ixheaac_twid_tbl_fft_168);
1659 break;
1660 }
1661 for (i = 0; i < len; i++) {
1662 ptr_real[2 * i + 0] = ptr_scratch_local[2 * i + 0];
1663 ptr_real[2 * i + 1] = ptr_scratch_local[2 * i + 1];
1664 }
1665
1666 for (i = 0; i < n_points; i++) {
1667 ixheaace_hbe_apply_cfftn_gen(ptr_real, ptr_scratch, m_points, i_sign);
1668 ptr_real += (2 * m_points);
1669 }
1670
1671 ptr_real -= n_points * 2 * m_points;
1672
1673 for (j = 0; j < n_points; j++) {
1674 for (i = 0; i < m_points; i++) {
1675 ptr_inp[n_points * 2 * i + 2 * j + 0] = ptr_real[2 * m_points * j + 2 * i + 0];
1676 ptr_inp[n_points * 2 * i + 2 * j + 1] = ptr_real[2 * m_points * j + 2 * i + 1];
1677 }
1678 }
1679 }
1680