xref: /aosp_15_r20/external/libxaac/encoder/ixheaace_fft.c (revision 15dc779a375ca8b5125643b829a8aa4b70d7f451)
1 /******************************************************************************
2  *                                                                            *
3  * Copyright (C) 2023 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19  */
20 
21 #include <string.h>
22 
23 #include "ixheaac_type_def.h"
24 #include "ixheaac_constants.h"
25 #include "ixheaace_psy_const.h"
26 #include "ixheaace_tns.h"
27 #include "ixheaace_tns_params.h"
28 #include "ixheaace_rom.h"
29 #include "ixheaace_common_rom.h"
30 #include "ixheaace_bitbuffer.h"
31 #include "ixheaace_aac_constants.h"
32 #include "ixheaace_fft.h"
33 #include "ixheaac_basic_ops32.h"
34 #include "ixheaac_basic_ops40.h"
35 #include "ixheaac_basic_ops.h"
36 #include "iusace_basic_ops_flt.h"
37 
ia_enhaacplus_enc_shift_mdct_delay_buffer(FLOAT32 * ptr_mdct_delay_buffer,const FLOAT32 * ptr_time_signal,WORD32 ch_increment,WORD32 long_frame_len)38 static VOID ia_enhaacplus_enc_shift_mdct_delay_buffer(FLOAT32 *ptr_mdct_delay_buffer,
39                                                       const FLOAT32 *ptr_time_signal,
40                                                       WORD32 ch_increment,
41                                                       WORD32 long_frame_len) {
42   WORD32 i;
43   FLOAT32 *ptr_mdct_buff = ptr_mdct_delay_buffer;
44   if (ch_increment == 2) {
45     const FLOAT32 *ptr_input = ptr_time_signal;
46     FLOAT32 temp1, temp2, temp3, temp4;
47     temp1 = *ptr_input++;
48     ptr_input++;
49     temp2 = *ptr_input++;
50     ptr_input++;
51     temp3 = *ptr_input++;
52     ptr_input++;
53     for (i = ((long_frame_len >> 2) - 2); i >= 0; i--) {
54       *ptr_mdct_buff++ = temp1;
55       temp4 = *ptr_input++;
56       ptr_input++;
57 
58       *ptr_mdct_buff++ = temp2;
59       *ptr_mdct_buff++ = temp3;
60       *ptr_mdct_buff++ = temp4;
61 
62       temp1 = *ptr_input++;
63       ptr_input++;
64       temp2 = *ptr_input++;
65       ptr_input++;
66       temp3 = *ptr_input++;
67       ptr_input++;
68     }
69     *ptr_mdct_buff++ = temp1;
70     temp4 = *ptr_input;
71     *ptr_mdct_buff++ = temp2;
72     *ptr_mdct_buff++ = temp3;
73     *ptr_mdct_buff++ = temp4;
74   } else {
75     for (i = 0; i < long_frame_len; i += 2) {
76       *ptr_mdct_buff++ = ptr_time_signal[i * ch_increment];
77       *ptr_mdct_buff++ = ptr_time_signal[(i + 1) * ch_increment];
78     }
79   }
80 }
81 
ia_eaacp_enc_inverse_transform_512(FLOAT32 * ptr_data,FLOAT32 * ptr_win_buf,const FLOAT32 * ptr_cos_sin_tbl,WORD8 * ptr_scratch)82 static VOID ia_eaacp_enc_inverse_transform_512(FLOAT32 *ptr_data, FLOAT32 *ptr_win_buf,
83                                                const FLOAT32 *ptr_cos_sin_tbl,
84                                                WORD8 *ptr_scratch) {
85   WORD32 n = FRAME_LEN_512;
86   WORD32 n_by_2 = n >> 1;
87 
88   ixheaace_scratch_mem *pstr_scratch = (ixheaace_scratch_mem *)ptr_scratch;
89 
90   ia_eaacp_enc_pre_twiddle_aac(ptr_win_buf, ptr_data, n, ptr_cos_sin_tbl);
91 
92   ia_enhaacplus_enc_complex_fft(ptr_win_buf, n_by_2, pstr_scratch);
93 
94   ia_enhaacplus_enc_post_twiddle(ptr_data, ptr_win_buf, ptr_cos_sin_tbl, n);
95 }
96 
ixheaace_pre_mdct(FLOAT32 * ptr_x,WORD32 m,const FLOAT32 * ptr_sine_window)97 static VOID ixheaace_pre_mdct(FLOAT32 *ptr_x, WORD32 m, const FLOAT32 *ptr_sine_window) {
98   WORD32 i;
99   FLOAT32 wre, wim, re1, re2, im1, im2;
100 
101   for (i = 0; i < m / 4; i++) {
102     re1 = ptr_x[2 * i];
103     im2 = ptr_x[2 * i + 1];
104     re2 = ptr_x[m - 2 - 2 * i];
105     im1 = ptr_x[m - 1 - 2 * i];
106 
107     wim = ptr_sine_window[i * 2];
108     wre = ptr_sine_window[m - 1 - 2 * i];
109 
110     ptr_x[2 * i] = im1 * wim + re1 * wre;
111 
112     ptr_x[2 * i + 1] = im1 * wre - re1 * wim;
113 
114     wim = ptr_sine_window[m - 2 - 2 * i];
115     wre = ptr_sine_window[2 * i + 1];
116 
117     ptr_x[m - 2 - 2 * i] = im2 * wim + re2 * wre;
118 
119     ptr_x[m - 1 - 2 * i] = im2 * wre - re2 * wim;
120   }
121 }
122 
ia_enhaacplus_enc_tranform_mac4(FLOAT32 * ptr_op,const FLOAT32 * ptr_win,FLOAT32 * ptr_buf1,FLOAT32 * ptr_buf2,FLOAT32 * ptr_buf3,FLOAT32 * ptr_buf4,UWORD32 len,WORD32 increment)123 static VOID ia_enhaacplus_enc_tranform_mac4(FLOAT32 *ptr_op, const FLOAT32 *ptr_win,
124                                             FLOAT32 *ptr_buf1, FLOAT32 *ptr_buf2,
125                                             FLOAT32 *ptr_buf3, FLOAT32 *ptr_buf4, UWORD32 len,
126                                             WORD32 increment) {
127   WORD32 i;
128 
129   if (increment > 0) {
130     for (i = len >> 2; i > 0; i--) {
131       *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2++)));
132       *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--)));
133       *ptr_op = (*ptr_op + (ptr_win[3] * (*ptr_buf4--)));
134       ptr_op++;
135 
136       *ptr_op = ((ptr_win[4] * (*ptr_buf1++)) + (ptr_win[5] * (*ptr_buf2++)));
137       *ptr_op = (*ptr_op + (ptr_win[6] * (*ptr_buf3--)));
138       *ptr_op = (*ptr_op + (ptr_win[7] * (*ptr_buf4--)));
139       ptr_op++;
140 
141       *ptr_op = ((ptr_win[8] * (*ptr_buf1++)) + (ptr_win[9] * (*ptr_buf2++)));
142       *ptr_op = (*ptr_op + (ptr_win[10] * (*ptr_buf3--)));
143       *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf4--)));
144       ptr_op++;
145 
146       *ptr_op = ((ptr_win[12] * (*ptr_buf1++)) + (ptr_win[13] * (*ptr_buf2++)));
147       *ptr_op = (*ptr_op + (ptr_win[14] * (*ptr_buf3--)));
148       *ptr_op = (*ptr_op + (ptr_win[15] * (*ptr_buf4--)));
149       ptr_op++;
150       ptr_win += 16;
151     }
152   } else {
153     for (i = len >> 2; i > 0; i--) {
154       *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2++)));
155       *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--)));
156       *ptr_op = (*ptr_op + (ptr_win[3] * (*ptr_buf4--)));
157       ptr_op--;
158 
159       *ptr_op = ((ptr_win[4] * (*ptr_buf1++)) + (ptr_win[5] * (*ptr_buf2++)));
160       *ptr_op = (*ptr_op + (ptr_win[6] * (*ptr_buf3--)));
161       *ptr_op = (*ptr_op + (ptr_win[7] * (*ptr_buf4--)));
162       ptr_op--;
163 
164       *ptr_op = ((ptr_win[8] * (*ptr_buf1++)) + (ptr_win[9] * (*ptr_buf2++)));
165       *ptr_op = (*ptr_op + (ptr_win[10] * (*ptr_buf3--)));
166       *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf4--)));
167       ptr_op--;
168 
169       *ptr_op = ((ptr_win[12] * (*ptr_buf1++)) + (ptr_win[13] * (*ptr_buf2++)));
170       *ptr_op = (*ptr_op + (ptr_win[14] * (*ptr_buf3--)));
171       *ptr_op = (*ptr_op + (ptr_win[15] * (*ptr_buf4--)));
172       ptr_op--;
173       ptr_win += 16;
174     }
175   }
176 }
177 
ia_enhaacplus_enc_tranform_mac3(FLOAT32 * ptr_op,const FLOAT32 * ptr_win,FLOAT32 * ptr_buf1,FLOAT32 * ptr_buf2,FLOAT32 * ptr_buf3,UWORD32 len,WORD32 increment)178 static VOID ia_enhaacplus_enc_tranform_mac3(FLOAT32 *ptr_op, const FLOAT32 *ptr_win,
179                                             FLOAT32 *ptr_buf1, FLOAT32 *ptr_buf2,
180                                             FLOAT32 *ptr_buf3, UWORD32 len, WORD32 increment) {
181   WORD32 i;
182 
183   if (increment > 0) {
184     for (i = len >> 2; i > 0; i--) {
185       *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2--)));
186       *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--)));
187       ptr_op++;
188 
189       *ptr_op = ((ptr_win[3] * (*ptr_buf1++)) + (ptr_win[4] * (*ptr_buf2--)));
190       *ptr_op = (*ptr_op + (ptr_win[5] * (*ptr_buf3--)));
191       ptr_op++;
192 
193       *ptr_op = ((ptr_win[6] * (*ptr_buf1++)) + (ptr_win[7] * (*ptr_buf2--)));
194       *ptr_op = (*ptr_op + (ptr_win[8] * (*ptr_buf3--)));
195       ptr_op++;
196 
197       *ptr_op = ((ptr_win[9] * (*ptr_buf1++)) + (ptr_win[10] * (*ptr_buf2--)));
198       *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf3--)));
199       ptr_op++;
200       ptr_win += 12;
201     }
202   } else {
203     for (i = len >> 2; i > 0; i--) {
204       *ptr_op = ((ptr_win[0] * (*ptr_buf1++)) + (ptr_win[1] * (*ptr_buf2--)));
205       *ptr_op = (*ptr_op + (ptr_win[2] * (*ptr_buf3--)));
206       ptr_op--;
207 
208       *ptr_op = ((ptr_win[3] * (*ptr_buf1++)) + (ptr_win[4] * (*ptr_buf2--)));
209       *ptr_op = (*ptr_op + (ptr_win[5] * (*ptr_buf3--)));
210       ptr_op--;
211 
212       *ptr_op = ((ptr_win[6] * (*ptr_buf1++)) + (ptr_win[7] * (*ptr_buf2--)));
213       *ptr_op = (*ptr_op + (ptr_win[8] * (*ptr_buf3--)));
214       ptr_op--;
215 
216       *ptr_op = ((ptr_win[9] * (*ptr_buf1++)) + (ptr_win[10] * (*ptr_buf2--)));
217       *ptr_op = (*ptr_op + (ptr_win[11] * (*ptr_buf3--)));
218       ptr_op--;
219       ptr_win += 12;
220     }
221   }
222 }
223 
ia_enhaacplus_enc_transform_real(FLOAT32 * ptr_mdct_delay_buffer,const FLOAT32 * ptr_time_signal,WORD32 ch_increment,FLOAT32 * ptr_real_out,ixheaace_mdct_tables * pstr_mdct_tab,FLOAT32 * ptr_shared_buffer1,WORD8 * ptr_shared_buffer5,WORD32 long_frame_len)224 VOID ia_enhaacplus_enc_transform_real(FLOAT32 *ptr_mdct_delay_buffer,
225                                       const FLOAT32 *ptr_time_signal, WORD32 ch_increment,
226                                       FLOAT32 *ptr_real_out, ixheaace_mdct_tables *pstr_mdct_tab,
227                                       FLOAT32 *ptr_shared_buffer1, WORD8 *ptr_shared_buffer5,
228                                       WORD32 long_frame_len) {
229   WORD32 n, n1;
230   FLOAT32 *ptr_windowed_buf = ptr_shared_buffer1;
231   const FLOAT32 *ptr_ws1;
232   WORD32 i, len = long_frame_len;
233   FLOAT32 *ptr_real_in;
234   FLOAT32 *ptr_data1, *ptr_data2, *ptr_data3, *ptr_data4;
235   FLOAT32 *ptr_op1;
236 
237   ptr_real_in = ptr_mdct_delay_buffer;
238 
239   n = long_frame_len << 1;
240   n1 = long_frame_len >> 1;
241 
242   ptr_ws1 =
243       (long_frame_len == FRAME_LEN_512) ? pstr_mdct_tab->win_512_ld : pstr_mdct_tab->win_480_ld;
244 
245   ptr_op1 = ptr_real_out;
246   ptr_data1 = &ptr_real_in[n1];
247   ptr_data2 = &ptr_real_in[n + n1];
248   ptr_data3 = &ptr_real_in[n1 - 1];
249   ptr_data4 = &ptr_real_in[n + n1 - 1];
250 
251   ia_enhaacplus_enc_tranform_mac4(ptr_op1, ptr_ws1, ptr_data1, ptr_data2, ptr_data3, ptr_data4,
252                                   n1, 1);
253   ptr_ws1 += ((SIZE_T)n1 << 2);
254 
255   for (i = 0; i < long_frame_len << 1; i++) {
256     ptr_mdct_delay_buffer[i] = ptr_mdct_delay_buffer[long_frame_len + i];
257   }
258   ia_enhaacplus_enc_shift_mdct_delay_buffer(&ptr_mdct_delay_buffer[2 * long_frame_len],
259                                             ptr_time_signal, ch_increment, long_frame_len);
260 
261   ptr_op1 = &ptr_real_out[long_frame_len - 1];
262   ptr_data1 = &ptr_real_in[n + len - n1];
263   ptr_data2 = &ptr_real_in[len - n1];
264   ptr_data3 = &ptr_real_in[len - n1 - 1];
265   ptr_data4 = &ptr_real_in[n + len - n1 - 1];
266 
267   ia_enhaacplus_enc_tranform_mac4(ptr_op1, ptr_ws1, ptr_data1, ptr_data2, ptr_data3, ptr_data4,
268                                   (n1 >> 1), -1);
269   ptr_op1 -= (n1 >> 1);
270   ptr_ws1 += ((SIZE_T)n1 << 1);
271   ptr_data2 += (n1 >> 1);
272   ptr_data3 -= (n1 >> 1);
273   ptr_data4 -= (n1 >> 1);
274   ia_enhaacplus_enc_tranform_mac3(ptr_op1, ptr_ws1, ptr_data2, ptr_data3, ptr_data4, (n1 >> 1),
275                                   -1);
276 
277   if (long_frame_len == FRAME_LEN_480) {
278     ia_aac_ld_enc_mdct_480(ptr_real_out, ptr_windowed_buf, 1, pstr_mdct_tab);
279   } else {
280     ia_eaacp_enc_inverse_transform_512(ptr_real_out, ptr_windowed_buf,
281                                        pstr_mdct_tab->cosine_array_1024, ptr_shared_buffer5);
282   }
283 }
284 
ia_eaacp_enc_pre_twiddle_compute(FLOAT32 * ptr_in1,FLOAT32 * ptr_in2,FLOAT32 * ptr_x,const FLOAT32 * ptr_cos_sin,WORD n_by_4)285 static VOID ia_eaacp_enc_pre_twiddle_compute(FLOAT32 *ptr_in1, FLOAT32 *ptr_in2, FLOAT32 *ptr_x,
286                                              const FLOAT32 *ptr_cos_sin, WORD n_by_4) {
287   WORD32 i;
288   FLOAT32 temp_r, temp_i;
289   FLOAT32 temp_r1, temp_i1;
290   FLOAT32 *ptr_x1 = ptr_x + (SIZE_T)((n_by_4 << 2) - 1);
291   FLOAT32 c, c1, s, s1;
292 
293   for (i = 0; i < n_by_4; i++) {
294     c = *ptr_cos_sin++;
295     s = *ptr_cos_sin++;
296     s1 = *ptr_cos_sin++;
297     c1 = *ptr_cos_sin++;
298 
299     temp_r = *ptr_in1++;
300     temp_i1 = *ptr_in1++;
301     temp_i = *ptr_in2--;
302     temp_r1 = *ptr_in2--;
303     *ptr_x = ((temp_r * c) + (temp_i * s));
304     ptr_x++;
305 
306     *ptr_x = ((temp_i * c) - (temp_r * s));
307     ptr_x++;
308 
309     *ptr_x1 = ((temp_i1 * c1) - (temp_r1 * s1));
310     ptr_x1--;
311 
312     *ptr_x1 = ((temp_r1 * c1) + (temp_i1 * s1));
313     ptr_x1--;
314   }
315 }
316 
ia_enhaacplus_enc_post_twiddle(FLOAT32 * ptr_out,FLOAT32 * ptr_x,const FLOAT32 * ptr_cos_sin_tbl,WORD m)317 VOID ia_enhaacplus_enc_post_twiddle(FLOAT32 *ptr_out, FLOAT32 *ptr_x,
318                                     const FLOAT32 *ptr_cos_sin_tbl, WORD m) {
319   WORD i;
320   FLOAT32 c, c1, s, s1;
321   FLOAT32 tmp_var;
322   FLOAT32 tempr, tempr1, tempi, tempi1;
323   FLOAT32 *ptr_out1 = ptr_out + m - 1;
324   FLOAT32 *ptr_x1 = ptr_x + m - 1;
325 
326   for (i = 0; i < (m >> 2); i++) {
327     c = *ptr_cos_sin_tbl++;
328     s = *ptr_cos_sin_tbl++;
329     s1 = *ptr_cos_sin_tbl++;
330     c1 = *ptr_cos_sin_tbl++;
331     tempr = *ptr_x++;
332     tempi = *ptr_x++;
333     tempi1 = *ptr_x1--;
334     tempr1 = *ptr_x1--;
335 
336     tmp_var = ((tempr * c) + (tempi * s));
337     *ptr_out++ = tmp_var;
338 
339     tmp_var = ((tempr * s) - (tempi * c));
340     *ptr_out1-- = tmp_var;
341 
342     tmp_var = ((tempr1 * c1) + (tempi1 * s1));
343     *ptr_out1-- = tmp_var;
344 
345     tmp_var = ((tempr1 * s1) - (tempi1 * c1));
346     *ptr_out++ = tmp_var;
347   }
348 }
349 
ia_eaacp_enc_pre_twiddle_aac(FLOAT32 * ptr_x,FLOAT32 * ptr_data,WORD32 n,const FLOAT32 * ptr_cos_array)350 VOID ia_eaacp_enc_pre_twiddle_aac(FLOAT32 *ptr_x, FLOAT32 *ptr_data, WORD32 n,
351                                   const FLOAT32 *ptr_cos_array) {
352   WORD n_by_4;
353   FLOAT32 *ptr_in1, *ptr_in2;
354 
355   n_by_4 = n >> 2;
356 
357   ptr_in1 = ptr_data;
358   ptr_in2 = ptr_data + n - 1;
359 
360   ia_eaacp_enc_pre_twiddle_compute(ptr_in1, ptr_in2, ptr_x, ptr_cos_array, n_by_4);
361 }
362 
ia_enhaacplus_enc_calc_norm(WORD32 a)363 static PLATFORM_INLINE WORD8 ia_enhaacplus_enc_calc_norm(WORD32 a) {
364   WORD8 norm_val;
365 
366   if (a == 0) {
367     norm_val = 31;
368   } else {
369     if (a == (WORD32)0xffffffffL) {
370       norm_val = 31;
371     } else {
372       if (a < 0) {
373         a = ~a;
374       }
375       for (norm_val = 0; a < (WORD32)0x40000000L; norm_val++) {
376         a <<= 1;
377       }
378     }
379   }
380 
381   return norm_val;
382 }
383 
ia_enhaacplus_enc_complex_3point_fft(FLOAT32 * ptr_in,FLOAT32 * ptr_out)384 static PLATFORM_INLINE VOID ia_enhaacplus_enc_complex_3point_fft(FLOAT32 *ptr_in,
385                                                                  FLOAT32 *ptr_out) {
386   FLOAT32 add_r, sub_r;
387   FLOAT32 add_i, sub_i;
388   FLOAT32 x_01_r, x_01_i, temp;
389   FLOAT32 p1, p2, p3, p4;
390   FLOAT64 sin_mu = 0.866025403784439f;
391 
392   x_01_r = ptr_in[0] + ptr_in[2];
393   x_01_i = ptr_in[1] + ptr_in[3];
394 
395   add_r = ptr_in[2] + ptr_in[4];
396   add_i = ptr_in[3] + ptr_in[5];
397 
398   sub_r = ptr_in[2] - ptr_in[4];
399   sub_i = ptr_in[3] - ptr_in[5];
400 
401   p1 = add_r / (FLOAT32)2.0f;
402   p4 = add_i / (FLOAT32)2.0f;
403   p2 = (FLOAT32)((FLOAT64)sub_i * sin_mu);
404   p3 = (FLOAT32)((FLOAT64)sub_r * sin_mu);
405 
406   temp = ptr_in[0] - p1;
407 
408   ptr_out[0] = x_01_r + ptr_in[4];
409   ptr_out[1] = x_01_i + ptr_in[5];
410   ptr_out[2] = temp + p2;
411   ptr_out[3] = (ptr_in[1] - p3) - p4;
412   ptr_out[4] = temp - p2;
413   ptr_out[5] = (ptr_in[1] + p3) - p4;
414 }
415 
ia_enhaacplus_enc_complex_fft_p2(FLOAT32 * ptr_x,WORD32 nlength,FLOAT32 * ptr_scratch_fft_p2_y)416 VOID ia_enhaacplus_enc_complex_fft_p2(FLOAT32 *ptr_x, WORD32 nlength,
417                                       FLOAT32 *ptr_scratch_fft_p2_y) {
418   WORD32 i, j, k, n_stages, h2;
419   FLOAT32 x0_r, x0_i, x1_r, x1_i, x2_r, x2_i, x3_r, x3_i;
420   WORD32 del, nodespacing, in_loop_cnt;
421   WORD32 not_power_4;
422   WORD32 dig_rev_shift;
423   FLOAT32 *ptr_p2_y = ptr_scratch_fft_p2_y;
424   WORD32 mpass = nlength;
425   WORD32 npoints = nlength;
426   FLOAT32 *ptr_y = ptr_p2_y;
427   const FLOAT64 *ptr_w;
428   FLOAT32 *ptr_inp;
429   FLOAT32 tmk;
430   const FLOAT64 *ptr_twiddles;
431   FLOAT32 *ptr_data;
432   FLOAT64 w_1, w_2, w_3, w_4, w_5, w_6;
433   WORD32 sec_loop_cnt;
434   FLOAT32 tmp;
435 
436   memset(ptr_y, 0, nlength * 2 * sizeof(*ptr_y));
437 
438   dig_rev_shift = ia_enhaacplus_enc_calc_norm(mpass) + 1 - 16;
439   n_stages = 30 - ia_enhaacplus_enc_calc_norm(mpass);
440   not_power_4 = n_stages & 1;
441 
442   n_stages = n_stages >> 1;
443 
444   ptr_w = ia_enhaacplus_enc_twiddle_table_fft_32x32;
445 
446   dig_rev_shift = MAX(dig_rev_shift, 0);
447 
448   for (i = 0; i < npoints; i += 4) {
449     ptr_inp = ptr_x;
450     DIG_REV_NEW(i, dig_rev_shift, h2);
451     if (not_power_4) {
452       h2 += 1;
453       h2 &= ~1;
454     }
455     ptr_inp += (h2);
456 
457     x0_r = *ptr_inp;
458     x0_i = *(ptr_inp + 1);
459     ptr_inp += (npoints >> 1);
460 
461     x1_r = *ptr_inp;
462     x1_i = *(ptr_inp + 1);
463     ptr_inp += (npoints >> 1);
464 
465     x2_r = *ptr_inp;
466     x2_i = *(ptr_inp + 1);
467     ptr_inp += (npoints >> 1);
468 
469     x3_r = *ptr_inp;
470     x3_i = *(ptr_inp + 1);
471 
472     x0_r = x0_r + x2_r;
473     x0_i = x0_i + x2_i;
474 
475     tmk = x0_r - x2_r;
476     x2_r = tmk - x2_r;
477     tmk = x0_i - x2_i;
478     x2_i = tmk - x2_i;
479 
480     x1_r = x1_r + x3_r;
481     x1_i = x1_i + x3_i;
482 
483     tmk = x1_r - x3_r;
484     x3_r = tmk - x3_r;
485     tmk = x1_i - x3_i;
486     x3_i = tmk - x3_i;
487 
488     x0_r = x0_r + x1_r;
489     x0_i = x0_i + x1_i;
490 
491     tmk = x0_r - x1_r;
492     x1_r = tmk - x1_r;
493     tmk = x0_i - x1_i;
494     x1_i = tmk - x1_i;
495 
496     x2_r = x2_r + x3_i;
497     x2_i = x2_i - x3_r;
498 
499     tmk = x2_r - x3_i;
500     x3_i = tmk - x3_i;
501     tmk = x2_i + x3_r;
502     x3_r = tmk + x3_r;
503 
504     *ptr_y++ = x0_r;
505     *ptr_y++ = x0_i;
506     *ptr_y++ = x2_r;
507     *ptr_y++ = x2_i;
508     *ptr_y++ = x1_r;
509     *ptr_y++ = x1_i;
510     *ptr_y++ = x3_i;
511     *ptr_y++ = x3_r;
512   }
513   ptr_y -= 2 * npoints;
514   del = 4;
515   nodespacing = 64;
516   in_loop_cnt = npoints >> 4;
517   for (i = n_stages - 1; i > 0; i--) {
518     ptr_twiddles = ptr_w;
519     ptr_data = ptr_y;
520     for (k = in_loop_cnt; k != 0; k--) {
521       x0_r = (*ptr_data);
522       x0_i = (*(ptr_data + 1));
523       ptr_data += ((SIZE_T)del << 1);
524 
525       x1_r = (*ptr_data);
526       x1_i = (*(ptr_data + 1));
527       ptr_data += ((SIZE_T)del << 1);
528 
529       x2_r = (*ptr_data);
530       x2_i = (*(ptr_data + 1));
531       ptr_data += ((SIZE_T)del << 1);
532 
533       x3_r = (*ptr_data);
534       x3_i = (*(ptr_data + 1));
535       ptr_data -= 3 * (del << 1);
536 
537       x0_r = x0_r + x2_r;
538       x0_i = x0_i + x2_i;
539       x2_r = x0_r - (x2_r * 2);
540       x2_i = x0_i - (x2_i * 2);
541       x1_r = x1_r + x3_r;
542       x1_i = x1_i + x3_i;
543       x3_r = x1_r - (x3_r * 2);
544       x3_i = x1_i - (x3_i * 2);
545 
546       x0_r = x0_r + x1_r;
547       x0_i = x0_i + x1_i;
548       x1_r = x0_r - (x1_r * 2);
549       x1_i = x0_i - (x1_i * 2);
550       x2_r = x2_r + x3_i;
551       x2_i = x2_i - x3_r;
552       x3_i = x2_r - (x3_i * 2);
553       x3_r = x2_i + (x3_r * 2);
554 
555       *ptr_data = x0_r;
556       *(ptr_data + 1) = x0_i;
557       ptr_data += ((SIZE_T)del << 1);
558 
559       *ptr_data = x2_r;
560       *(ptr_data + 1) = x2_i;
561       ptr_data += ((SIZE_T)del << 1);
562 
563       *ptr_data = x1_r;
564       *(ptr_data + 1) = x1_i;
565       ptr_data += ((SIZE_T)del << 1);
566 
567       *ptr_data = x3_i;
568       *(ptr_data + 1) = x3_r;
569       ptr_data += ((SIZE_T)del << 1);
570     }
571     ptr_data = ptr_y + 2;
572 
573     sec_loop_cnt = (nodespacing * del);
574     sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
575                    (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
576                    (sec_loop_cnt / 256);
577 
578     for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
579       w_1 = *(ptr_twiddles + j);
580       w_4 = *(ptr_twiddles + j + 257);
581       w_2 = *(ptr_twiddles + ((SIZE_T)j << 1));
582       w_5 = *(ptr_twiddles + ((SIZE_T)j << 1) + 257);
583       w_3 = *(ptr_twiddles + j + ((SIZE_T)j << 1));
584       w_6 = *(ptr_twiddles + j + ((SIZE_T)j << 1) + 257);
585 
586       for (k = in_loop_cnt; k != 0; k--) {
587         ptr_data += ((SIZE_T)del << 1);
588 
589         x1_r = *ptr_data;
590         x1_i = *(ptr_data + 1);
591         ptr_data += ((SIZE_T)del << 1);
592 
593         x2_r = *ptr_data;
594         x2_i = *(ptr_data + 1);
595         ptr_data += ((SIZE_T)del << 1);
596 
597         x3_r = *ptr_data;
598         x3_i = *(ptr_data + 1);
599         ptr_data -= 3 * (del << 1);
600 
601         tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
602         x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1);
603         x1_r = tmp;
604 
605         tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_2) - ixheaace_dmult((FLOAT64)x2_i, w_5));
606         x2_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x2_r, w_5), (FLOAT64)x2_i, w_2);
607         x2_r = tmp;
608 
609         tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3_r, w_3) - ixheaace_dmult((FLOAT64)x3_i, w_6));
610         x3_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x3_r, w_6), (FLOAT64)x3_i, w_3);
611         x3_r = tmp;
612 
613         x0_r = (*ptr_data);
614         x0_i = (*(ptr_data + 1));
615 
616         x0_r = x0_r + (x2_r);
617         x0_i = x0_i + (x2_i);
618         x2_r = x0_r - (x2_r * 2);
619         x2_i = x0_i - (x2_i * 2);
620         x1_r = x1_r + x3_r;
621         x1_i = x1_i + x3_i;
622         x3_r = x1_r - (x3_r * 2);
623         x3_i = x1_i - (x3_i * 2);
624 
625         x0_r = x0_r + (x1_r);
626         x0_i = x0_i + (x1_i);
627         x1_r = x0_r - (x1_r * 2);
628         x1_i = x0_i - (x1_i * 2);
629         x2_r = x2_r + (x3_i);
630         x2_i = x2_i - (x3_r);
631         x3_i = x2_r - (x3_i * 2);
632         x3_r = x2_i + (x3_r * 2);
633 
634         *ptr_data = x0_r;
635         *(ptr_data + 1) = x0_i;
636         ptr_data += ((SIZE_T)del << 1);
637 
638         *ptr_data = x2_r;
639         *(ptr_data + 1) = x2_i;
640         ptr_data += ((SIZE_T)del << 1);
641 
642         *ptr_data = x1_r;
643         *(ptr_data + 1) = x1_i;
644         ptr_data += ((SIZE_T)del << 1);
645 
646         *ptr_data = x3_i;
647         *(ptr_data + 1) = x3_r;
648         ptr_data += ((SIZE_T)del << 1);
649       }
650       ptr_data -= 2 * npoints;
651       ptr_data += 2;
652     }
653     for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
654       w_1 = *(ptr_twiddles + j);
655       w_4 = *(ptr_twiddles + j + 257);
656       w_2 = *(ptr_twiddles + ((SIZE_T)j << 1));
657       w_5 = *(ptr_twiddles + ((SIZE_T)j << 1) + 257);
658       w_3 = *(ptr_twiddles + j + ((SIZE_T)j << 1) - 256);
659       w_6 = *(ptr_twiddles + j + ((SIZE_T)j << 1) + 1);
660 
661       for (k = in_loop_cnt; k != 0; k--) {
662         ptr_data += ((SIZE_T)del << 1);
663 
664         x1_r = *ptr_data;
665         x1_i = *(ptr_data + 1);
666         ptr_data += ((SIZE_T)del << 1);
667 
668         x2_r = *ptr_data;
669         x2_i = *(ptr_data + 1);
670         ptr_data += ((SIZE_T)del << 1);
671 
672         x3_r = *ptr_data;
673         x3_i = *(ptr_data + 1);
674         ptr_data -= 3 * (del << 1);
675 
676         tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
677         x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1);
678         x1_r = tmp;
679 
680         tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_2) - ixheaace_dmult((FLOAT64)x2_i, w_5));
681         x2_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x2_r, w_5), (FLOAT64)x2_i, w_2);
682         x2_r = tmp;
683 
684         tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3_r, w_6) + ixheaace_dmult((FLOAT64)x3_i, w_3));
685         x3_i =
686             (FLOAT32)(-ixheaace_dmult((FLOAT64)x3_r, w_3) + ixheaace_dmult((FLOAT64)x3_i, w_6));
687         x3_r = tmp;
688 
689         x0_r = (*ptr_data);
690         x0_i = (*(ptr_data + 1));
691 
692         x0_r = x0_r + (x2_r);
693         x0_i = x0_i + (x2_i);
694         x2_r = x0_r - (x2_r * 2);
695         x2_i = x0_i - (x2_i * 2);
696         x1_r = x1_r + x3_r;
697         x1_i = x1_i + x3_i;
698         x3_r = x1_r - (x3_r * 2);
699         x3_i = x1_i - (x3_i * 2);
700 
701         x0_r = x0_r + (x1_r);
702         x0_i = x0_i + (x1_i);
703         x1_r = x0_r - (x1_r * 2);
704         x1_i = x0_i - (x1_i * 2);
705         x2_r = x2_r + (x3_i);
706         x2_i = x2_i - (x3_r);
707         x3_i = x2_r - (x3_i * 2);
708         x3_r = x2_i + (x3_r * 2);
709 
710         *ptr_data = x0_r;
711         *(ptr_data + 1) = x0_i;
712         ptr_data += ((SIZE_T)del << 1);
713 
714         *ptr_data = x2_r;
715         *(ptr_data + 1) = x2_i;
716         ptr_data += ((SIZE_T)del << 1);
717 
718         *ptr_data = x1_r;
719         *(ptr_data + 1) = x1_i;
720         ptr_data += ((SIZE_T)del << 1);
721 
722         *ptr_data = x3_i;
723         *(ptr_data + 1) = x3_r;
724         ptr_data += ((SIZE_T)del << 1);
725       }
726       ptr_data -= 2 * npoints;
727       ptr_data += 2;
728     }
729     for (; j <= sec_loop_cnt * 2; j += nodespacing) {
730       w_1 = *(ptr_twiddles + j);
731       w_4 = *(ptr_twiddles + j + 257);
732       w_2 = *(ptr_twiddles + (SIZE_T)((j << 1) - 256));
733       w_5 = *(ptr_twiddles + (SIZE_T)((j << 1) + 1));
734       w_3 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) - 256));
735       w_6 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) + 1));
736 
737       for (k = in_loop_cnt; k != 0; k--) {
738         ptr_data += ((SIZE_T)del << 1);
739 
740         x1_r = *ptr_data;
741         x1_i = *(ptr_data + 1);
742         ptr_data += ((SIZE_T)del << 1);
743 
744         x2_r = *ptr_data;
745         x2_i = *(ptr_data + 1);
746         ptr_data += ((SIZE_T)del << 1);
747 
748         x3_r = *ptr_data;
749         x3_i = *(ptr_data + 1);
750         ptr_data -= 3 * (del << 1);
751 
752         tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
753         x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult(x1_r, w_4), x1_i, w_1);
754         x1_r = tmp;
755 
756         tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_5) + ixheaace_dmult((FLOAT64)x2_i, w_2));
757         x2_i = (FLOAT32)(-ixheaace_dmult(x2_r, w_2) + ixheaace_dmult(x2_i, w_5));
758         x2_r = tmp;
759 
760         tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3_r, w_6) + ixheaace_dmult((FLOAT64)x3_i, w_3));
761         x3_i =
762             (FLOAT32)(-ixheaace_dmult((FLOAT64)x3_r, w_3) + ixheaace_dmult((FLOAT64)x3_i, w_6));
763         x3_r = tmp;
764 
765         x0_r = (*ptr_data);
766         x0_i = (*(ptr_data + 1));
767 
768         x0_r = x0_r + (x2_r);
769         x0_i = x0_i + (x2_i);
770         x2_r = x0_r - (x2_r * 2);
771         x2_i = x0_i - (x2_i * 2);
772         x1_r = x1_r + x3_r;
773         x1_i = x1_i + x3_i;
774         x3_r = x1_r - (x3_r * 2);
775         x3_i = x1_i - (x3_i * 2);
776 
777         x0_r = x0_r + (x1_r);
778         x0_i = x0_i + (x1_i);
779         x1_r = x0_r - (x1_r * 2);
780         x1_i = x0_i - (x1_i * 2);
781         x2_r = x2_r + (x3_i);
782         x2_i = x2_i - (x3_r);
783         x3_i = x2_r - (x3_i * 2);
784         x3_r = x2_i + (x3_r * 2);
785 
786         *ptr_data = x0_r;
787         *(ptr_data + 1) = x0_i;
788         ptr_data += ((SIZE_T)del << 1);
789 
790         *ptr_data = x2_r;
791         *(ptr_data + 1) = x2_i;
792         ptr_data += ((SIZE_T)del << 1);
793 
794         *ptr_data = x1_r;
795         *(ptr_data + 1) = x1_i;
796         ptr_data += ((SIZE_T)del << 1);
797 
798         *ptr_data = x3_i;
799         *(ptr_data + 1) = x3_r;
800         ptr_data += ((SIZE_T)del << 1);
801       }
802       ptr_data -= 2 * npoints;
803       ptr_data += 2;
804     }
805     for (; j < nodespacing * del; j += nodespacing) {
806       w_1 = *(ptr_twiddles + j);
807       w_4 = *(ptr_twiddles + j + 257);
808       w_2 = *(ptr_twiddles + (SIZE_T)((j << 1) - 256));
809       w_5 = *(ptr_twiddles + (SIZE_T)((j << 1) + 1));
810       w_3 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) - 512));
811       w_6 = *(ptr_twiddles + (SIZE_T)(j + (j << 1) - 512 + 257));
812 
813       for (k = in_loop_cnt; k != 0; k--) {
814         ptr_data += ((SIZE_T)del << 1);
815 
816         x1_r = *ptr_data;
817         x1_i = *(ptr_data + 1);
818         ptr_data += ((SIZE_T)del << 1);
819 
820         x2_r = *ptr_data;
821         x2_i = *(ptr_data + 1);
822         ptr_data += ((SIZE_T)del << 1);
823 
824         x3_r = *ptr_data;
825         x3_i = *(ptr_data + 1);
826         ptr_data -= 3 * (del << 1);
827 
828         tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
829         x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1);
830         x1_r = tmp;
831 
832         tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2_r, w_5) + ixheaace_dmult((FLOAT64)x2_i, w_2));
833         x2_i =
834             (FLOAT32)(-ixheaace_dmult((FLOAT64)x2_r, w_2) + ixheaace_dmult((FLOAT64)x2_i, w_5));
835         x2_r = tmp;
836 
837         tmp = (FLOAT32)(-ixheaace_dmult((FLOAT64)x3_r, w_3) + ixheaace_dmult((FLOAT64)x3_i, w_6));
838         x3_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x3_r, w_6), (FLOAT64)x3_i, w_3);
839         x3_r = tmp;
840 
841         x0_r = (*ptr_data);
842         x0_i = (*(ptr_data + 1));
843 
844         x0_r = x0_r + (x2_r);
845         x0_i = x0_i + (x2_i);
846         x2_r = x0_r - (x2_r * 2);
847         x2_i = x0_i - (x2_i * 2);
848         x1_r = x1_r + x3_r;
849         x1_i = x1_i - x3_i;
850         x3_r = x1_r - (x3_r * 2);
851         x3_i = x1_i + (x3_i * 2);
852 
853         x0_r = x0_r + (x1_r);
854         x0_i = x0_i + (x1_i);
855         x1_r = x0_r - (x1_r * 2);
856         x1_i = x0_i - (x1_i * 2);
857         x2_r = x2_r + (x3_i);
858         x2_i = x2_i - (x3_r);
859         x3_i = x2_r - (x3_i * 2);
860         x3_r = x2_i + (x3_r * 2);
861 
862         *ptr_data = x0_r;
863         *(ptr_data + 1) = x0_i;
864         ptr_data += ((SIZE_T)del << 1);
865 
866         *ptr_data = x2_r;
867         *(ptr_data + 1) = x2_i;
868         ptr_data += ((SIZE_T)del << 1);
869 
870         *ptr_data = x1_r;
871         *(ptr_data + 1) = x1_i;
872         ptr_data += ((SIZE_T)del << 1);
873 
874         *ptr_data = x3_i;
875         *(ptr_data + 1) = x3_r;
876         ptr_data += ((SIZE_T)del << 1);
877       }
878       ptr_data -= 2 * npoints;
879       ptr_data += 2;
880     }
881     nodespacing >>= 2;
882     del <<= 2;
883     in_loop_cnt >>= 2;
884   }
885   if (not_power_4) {
886     ptr_twiddles = ptr_w;
887     nodespacing <<= 1;
888 
889     for (j = del / 2; j != 0; j--) {
890       w_1 = *ptr_twiddles;
891       w_4 = *(ptr_twiddles + 257);
892       ptr_twiddles += nodespacing;
893 
894       x0_r = *ptr_y;
895       x0_i = *(ptr_y + 1);
896       ptr_y += ((SIZE_T)del << 1);
897 
898       x1_r = *ptr_y;
899       x1_i = *(ptr_y + 1);
900 
901       tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_1) - ixheaace_dmult((FLOAT64)x1_i, w_4));
902       x1_i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1_r, w_4), (FLOAT64)x1_i, w_1);
903       x1_r = tmp;
904 
905       *ptr_y = (x0_r) - (x1_r);
906       *(ptr_y + 1) = (x0_i) - (x1_i);
907       ptr_y -= ((SIZE_T)del << 1);
908 
909       *ptr_y = (x0_r) + (x1_r);
910       *(ptr_y + 1) = (x0_i) + (x1_i);
911       ptr_y += 2;
912     }
913     ptr_twiddles = ptr_w;
914     for (j = del / 2; j != 0; j--) {
915       w_1 = *ptr_twiddles;
916       w_4 = *(ptr_twiddles + 257);
917       ptr_twiddles += nodespacing;
918 
919       x0_r = *ptr_y;
920       x0_i = *(ptr_y + 1);
921       ptr_y += ((SIZE_T)del << 1);
922 
923       x1_r = *ptr_y;
924       x1_i = *(ptr_y + 1);
925 
926       tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1_r, w_4) +
927                       ixheaace_dmult((FLOAT64)x1_i, w_1)) /*/2*/;
928       x1_i = (FLOAT32)(-ixheaace_dmult((FLOAT64)x1_r, w_1) +
929                        ixheaace_dmult((FLOAT64)x1_i, w_4)) /*/2*/;
930       x1_r = tmp;
931 
932       *ptr_y = (x0_r) - (x1_r);
933       *(ptr_y + 1) = (x0_i) - (x1_i);
934       ptr_y -= ((SIZE_T)del << 1);
935 
936       *ptr_y = (x0_r) + (x1_r);
937       *(ptr_y + 1) = (x0_i) + (x1_i);
938       ptr_y += 2;
939     }
940   }
941 
942   for (i = 0; i < nlength; i++) {
943     *(ptr_x + 2 * i) = ptr_p2_y[2 * i];
944     *(ptr_x + 2 * i + 1) = ptr_p2_y[2 * i + 1];
945   }
946 }
947 
ia_enhaacplus_enc_complex_fft_p3(FLOAT32 * ptr_data,WORD32 nlength,ixheaace_scratch_mem * pstr_scratch)948 static VOID ia_enhaacplus_enc_complex_fft_p3(FLOAT32 *ptr_data, WORD32 nlength,
949                                              ixheaace_scratch_mem *pstr_scratch) {
950   WORD32 i, j;
951   FLOAT32 *ptr_data_3 = pstr_scratch->p_fft_p3_data_3;
952   FLOAT32 *ptr_p3_y = pstr_scratch->p_fft_p3_y;
953   WORD32 cnfac;
954   WORD32 mpass = nlength;
955   FLOAT32 *ptr_x = ptr_data;
956   FLOAT32 *ptr_y = ptr_p3_y;
957   cnfac = 0;
958   const FLOAT64 *ptr_w1_r, *ptr_w1_i;
959   FLOAT32 tmp;
960   ptr_w1_r = ia_enhaacplus_enc_twiddle_table_3pr;
961   ptr_w1_i = ia_enhaacplus_enc_twiddle_table_3pi;
962 
963   while (mpass % 3 == 0) {
964     mpass /= 3;
965     cnfac++;
966   }
967 
968   for (i = 0; i < 3 * cnfac; i++) {
969     for (j = 0; j < mpass; j++) {
970       ptr_data_3[2 * j] = ptr_data[3 * (2 * j) + (2 * i)];
971       ptr_data_3[2 * j + 1] = ptr_data[3 * (2 * j) + 1 + (2 * i)];
972     }
973     ia_enhaacplus_enc_complex_fft_p2(ptr_data_3, mpass, pstr_scratch->p_fft_p2_y);
974 
975     for (j = 0; j < mpass; j++) {
976       ptr_data[3 * (2 * j) + (2 * i)] = ptr_data_3[2 * j];
977       ptr_data[3 * (2 * j) + 1 + (2 * i)] = ptr_data_3[2 * j + 1];
978     }
979   }
980 
981   {
982     for (i = 0; i < nlength; i += 3) {
983       tmp = (FLOAT32)((FLOAT64)ptr_data[2 * i] * (*ptr_w1_r) -
984                       (FLOAT64)ptr_data[2 * i + 1] * (*ptr_w1_i));
985       ptr_data[2 * i + 1] = (FLOAT32)((FLOAT64)ptr_data[2 * i] * (*ptr_w1_i) +
986                                       (FLOAT64)ptr_data[2 * i + 1] * (*ptr_w1_r));
987       ptr_data[2 * i] = tmp;
988 
989       ptr_w1_r++;
990       ptr_w1_i++;
991 
992       tmp = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 1)] * (*ptr_w1_r) -
993                       (FLOAT64)ptr_data[2 * (i + 1) + 1] * (*ptr_w1_i));
994       ptr_data[2 * (i + 1) + 1] = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 1)] * (*ptr_w1_i) +
995                                             (FLOAT64)ptr_data[2 * (i + 1) + 1] * (*ptr_w1_r));
996       ptr_data[2 * (i + 1)] = tmp;
997 
998       ptr_w1_r++;
999       ptr_w1_i++;
1000 
1001       tmp = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 2)] * (*ptr_w1_r) -
1002                       (FLOAT64)ptr_data[2 * (i + 2) + 1] * (*ptr_w1_i));
1003       ptr_data[2 * (i + 2) + 1] = (FLOAT32)((FLOAT64)ptr_data[2 * (i + 2)] * (*ptr_w1_i) +
1004                                             (FLOAT64)ptr_data[2 * (i + 2) + 1] * (*ptr_w1_r));
1005       ptr_data[2 * (i + 2)] = tmp;
1006 
1007       ptr_w1_r += 3 * (128 / mpass - 1) + 1;
1008       ptr_w1_i += 3 * (128 / mpass - 1) + 1;
1009     }
1010   }
1011 
1012   for (i = 0; i < mpass; i++) {
1013     ia_enhaacplus_enc_complex_3point_fft(ptr_x, ptr_y);
1014 
1015     ptr_x = ptr_x + 6;
1016     ptr_y = ptr_y + 6;
1017   }
1018 
1019   for (i = 0; i < mpass; i++) {
1020     ptr_data[2 * i] = ptr_p3_y[6 * i];
1021     ptr_data[2 * i + 1] = ptr_p3_y[6 * i + 1];
1022   }
1023 
1024   for (i = 0; i < mpass; i++) {
1025     ptr_data[2 * (i + mpass)] = ptr_p3_y[6 * i + 2];
1026     ptr_data[2 * (i + mpass) + 1] = ptr_p3_y[6 * i + 3];
1027   }
1028 
1029   for (i = 0; i < mpass; i++) {
1030     ptr_data[2 * (i + 2 * mpass)] = ptr_p3_y[6 * i + 4];
1031     ptr_data[2 * (i + 2 * mpass) + 1] = ptr_p3_y[6 * i + 5];
1032   }
1033 }
1034 
ia_enhaacplus_enc_complex_fft(FLOAT32 * ptr_data,WORD32 len,ixheaace_scratch_mem * pstr_scratch)1035 VOID ia_enhaacplus_enc_complex_fft(FLOAT32 *ptr_data, WORD32 len,
1036                                    ixheaace_scratch_mem *pstr_scratch) {
1037   if (len & (len - 1)) {
1038     ia_enhaacplus_enc_complex_fft_p3(ptr_data, len, pstr_scratch);
1039   } else {
1040     ia_enhaacplus_enc_complex_fft_p2(ptr_data, len, pstr_scratch->p_fft_p2_y);
1041   }
1042 }
1043 
ixheaace_post_mdct(FLOAT32 * ptr_x,WORD32 m,const FLOAT32 * ptr_trig_data,WORD32 step,WORD32 trig_data_size)1044 static VOID ixheaace_post_mdct(FLOAT32 *ptr_x, WORD32 m, const FLOAT32 *ptr_trig_data,
1045                                WORD32 step, WORD32 trig_data_size) {
1046   WORD32 i;
1047   FLOAT32 w_re, w_im, re1, re2, im1, im2;
1048   const FLOAT32 *ptr_sin = ptr_trig_data;
1049   const FLOAT32 *ptr_cos = ptr_trig_data + trig_data_size;
1050 
1051   w_im = *ptr_sin;
1052   w_re = *ptr_cos;
1053 
1054   for (i = 0; i < m / 4; i++) {
1055     re1 = ptr_x[2 * i];
1056     im1 = ptr_x[2 * i + 1];
1057     re2 = ptr_x[m - 2 - 2 * i];
1058     im2 = ptr_x[m - 1 - 2 * i];
1059 
1060     ptr_x[2 * i] = (re1 * w_re + im1 * w_im);
1061 
1062     ptr_x[m - 1 - 2 * i] = (re1 * w_im - im1 * w_re);
1063 
1064     ptr_sin += step;
1065     ptr_cos -= step;
1066 
1067     w_im = *ptr_sin;
1068     w_re = *ptr_cos;
1069 
1070     ptr_x[m - 2 - 2 * i] = (re2 * w_im + im2 * w_re);
1071 
1072     ptr_x[2 * i + 1] = (re2 * w_re - im2 * w_im);
1073   }
1074 }
1075 
ixheaace_cplx_mult_twid(FLOAT32 * ptr_re,FLOAT32 * ptr_im,FLOAT32 a,FLOAT32 b,FLOAT32 twid_table,FLOAT32 twid_table_h)1076 static VOID ixheaace_cplx_mult_twid(FLOAT32 *ptr_re, FLOAT32 *ptr_im, FLOAT32 a, FLOAT32 b,
1077                                     FLOAT32 twid_table, FLOAT32 twid_table_h) {
1078   *ptr_re = (a * twid_table) - (b * twid_table_h);
1079   *ptr_im = (a * twid_table_h) + (b * twid_table);
1080 }
1081 
ixheaace_cfft_15_twiddle(FLOAT32 * ptr_inp)1082 static VOID ixheaace_cfft_15_twiddle(FLOAT32 *ptr_inp) {
1083   const FLOAT32 *ptr_tw_flt = &ixheaace_mix_rad_twid_tbl[0];
1084   const FLOAT32 *ptr_tw_flt_h = &ixheaace_mix_rad_twid_tbl_h[0];
1085   FLOAT32 accu1, accu2;
1086   WORD32 i, j;
1087   ptr_inp += 12;
1088 
1089   for (j = 0; j < 2; j++) {
1090     for (i = 0; i < 4; i++) {
1091       ixheaace_cplx_mult_twid(&accu1, &accu2, ptr_inp[2 * i + 0], ptr_inp[2 * i + 1],
1092                               ptr_tw_flt[i], ptr_tw_flt_h[i]);
1093       ptr_inp[2 * i + 0] = accu1;
1094       ptr_inp[2 * i + 1] = accu2;
1095     }
1096     ptr_inp += 10;
1097     ptr_tw_flt += 4;
1098     ptr_tw_flt_h += 4;
1099   }
1100 }
1101 
ixheaace_cfft_15_480(FLOAT32 * ptr_inp,FLOAT32 * ptr_op,FLOAT32 * ptr_fft3_out)1102 static VOID ixheaace_cfft_15_480(FLOAT32 *ptr_inp, FLOAT32 *ptr_op, FLOAT32 *ptr_fft3_out) {
1103   WORD32 i, idx;
1104   FLOAT32 *ptr_buf1, *ptr_buf2, *ptr_buf3;
1105   FLOAT32 add_r, sub_r;
1106   FLOAT32 add_i, sub_i;
1107   FLOAT32 x_01_r, x_01_i, temp;
1108   FLOAT32 p1, p2, p3, p4;
1109 
1110   FLOAT32 sin_mu_flt = 0.866027832f;
1111   FLOAT32 c51_flt = 0.951049805f;
1112   FLOAT32 c52_flt = -0.76940918f;
1113   FLOAT32 c53_flt = -0.36328125f;
1114   FLOAT32 c54_flt = 0.559020996f;
1115   FLOAT32 c55_flt = -0.625f;
1116 
1117   FLOAT32 r1, r2, r3, r4;
1118   FLOAT32 s1, s2, s3, s4, t, temp1, temp2;
1119   FLOAT32 *ptr_out_fft3 = ptr_fft3_out;
1120 
1121   FLOAT32 xr_0, xr_1, xr_2;
1122   FLOAT32 xi_0, xi_1, xi_2;
1123 
1124   ptr_buf2 = ptr_fft3_out;
1125   ptr_buf1 = ptr_buf3 = ptr_fft3_out;
1126 
1127   for (i = 0; i < FFT3; i++) {
1128     *ptr_buf1++ = ptr_inp[0 + 64 * i];
1129     *ptr_buf1++ = ptr_inp[1 + 64 * i];
1130 
1131     *ptr_buf1++ = ptr_inp[192 + 64 * i];
1132     *ptr_buf1++ = ptr_inp[193 + 64 * i];
1133 
1134     *ptr_buf1++ = ptr_inp[384 + 64 * i];
1135     *ptr_buf1++ = ptr_inp[385 + 64 * i];
1136 
1137     *ptr_buf1++ = ptr_inp[576 + 64 * i];
1138     *ptr_buf1++ = ptr_inp[577 + 64 * i];
1139 
1140     *ptr_buf1++ = ptr_inp[768 + 64 * i];
1141     *ptr_buf1++ = ptr_inp[769 + 64 * i];
1142 
1143     r1 = ptr_buf3[2] + ptr_buf3[8];
1144     r4 = ptr_buf3[2] - ptr_buf3[8];
1145     r3 = ptr_buf3[4] + ptr_buf3[6];
1146     r2 = ptr_buf3[4] - ptr_buf3[6];
1147     t = ((r1 - r3) * c54_flt);
1148 
1149     r1 = r1 + r3;
1150 
1151     temp1 = ptr_buf3[0] + r1;
1152 
1153     r1 = temp1 + ((r1 * c55_flt) * 2);
1154 
1155     r3 = r1 - t;
1156     r1 = r1 + t;
1157 
1158     t = ((r4 + r2) * c51_flt);
1159     r4 = t + ((r4 * c52_flt) * 2);
1160     r2 = t + (r2 * c53_flt);
1161 
1162     s1 = ptr_buf3[3] + ptr_buf3[9];
1163     s4 = ptr_buf3[3] - ptr_buf3[9];
1164     s3 = ptr_buf3[5] + ptr_buf3[7];
1165     s2 = ptr_buf3[5] - ptr_buf3[7];
1166 
1167     t = ((s1 - s3) * c54_flt);
1168 
1169     s1 = s1 + s3;
1170 
1171     temp2 = ptr_buf3[1] + s1;
1172 
1173     s1 = temp2 + (((s1 * c55_flt)) * 2);
1174 
1175     s3 = s1 - t;
1176     s1 = s1 + t;
1177 
1178     t = ((s4 + s2) * c51_flt);
1179     s4 = t + (((s4 * c52_flt)) * 2);
1180     s2 = t + ((s2 * c53_flt));
1181 
1182     *ptr_buf2++ = temp1;
1183     *ptr_buf2++ = temp2;
1184     *ptr_buf2++ = r1 + s2;
1185     *ptr_buf2++ = s1 - r2;
1186     *ptr_buf2++ = r3 - s4;
1187     *ptr_buf2++ = s3 + r4;
1188     *ptr_buf2++ = r3 + s4;
1189     *ptr_buf2++ = s3 - r4;
1190     *ptr_buf2++ = r1 - s2;
1191     *ptr_buf2++ = s1 + r2;
1192     ptr_buf3 = ptr_buf1;
1193   }
1194 
1195   idx = 0;
1196   ixheaace_cfft_15_twiddle(ptr_out_fft3);
1197 
1198   for (i = 0; i < FFT5; i++) {
1199     xr_0 = ptr_out_fft3[0];
1200     xi_0 = ptr_out_fft3[1];
1201 
1202     xr_1 = ptr_out_fft3[10];
1203     xi_1 = ptr_out_fft3[11];
1204 
1205     xr_2 = ptr_out_fft3[20];
1206     xi_2 = ptr_out_fft3[21];
1207 
1208     x_01_r = (xr_0 + xr_1);
1209     x_01_i = (xi_0 + xi_1);
1210 
1211     add_r = (xr_1 + xr_2);
1212     add_i = (xi_1 + xi_2);
1213 
1214     sub_r = (xr_1 - xr_2);
1215     sub_i = (xi_1 - xi_2);
1216 
1217     p1 = add_r / 2;
1218 
1219     p2 = (sub_i * sin_mu_flt);
1220     p3 = (sub_r * sin_mu_flt);
1221 
1222     p4 = add_i / 2;
1223 
1224     temp = (xr_0 - p1);
1225     temp1 = (xi_0 + p3);
1226     temp2 = (xi_0 - p3);
1227 
1228     ptr_op[idx] = (x_01_r + xr_2);
1229     ptr_op[idx + 1] = (x_01_i + xi_2);
1230 
1231     idx = idx + 320;
1232     ptr_op[idx] = (temp + p2);
1233     ptr_op[idx + 1] = (temp2 - p4);
1234 
1235     idx = idx + 320;
1236     ptr_op[idx] = (temp - p2);
1237     ptr_op[idx + 1] = (temp1 - p4);
1238     ptr_out_fft3 += 2;
1239     idx = idx - 576;
1240   }
1241 }
1242 
ixheaace_cfft_twiddle_mult(FLOAT32 * ptr_inp,FLOAT32 * ptr_op,WORD32 dim1,WORD32 dim2,const FLOAT32 * ptr_tw_flt,const FLOAT32 * ptr_tw_h_flt)1243 static VOID ixheaace_cfft_twiddle_mult(FLOAT32 *ptr_inp, FLOAT32 *ptr_op, WORD32 dim1,
1244                                        WORD32 dim2, const FLOAT32 *ptr_tw_flt,
1245                                        const FLOAT32 *ptr_tw_h_flt) {
1246   FLOAT32 accu1, accu2;
1247   WORD32 i, j;
1248   WORD32 step_val = (dim2 - 1) << 1;
1249   for (i = 0; i < dim2; i++) {
1250     ptr_op[0] = ptr_inp[0];
1251     ptr_op[1] = ptr_inp[1];
1252     ptr_op += 2;
1253     ptr_inp += 2;
1254   }
1255 
1256   for (j = 0; j < (dim1 - 1); j++) {
1257     ptr_op[0] = ptr_inp[0];
1258     ptr_op[1] = ptr_inp[1];
1259     ptr_inp += 2;
1260     ptr_op += 2;
1261     for (i = 0; i < (dim2 - 1); i++) {
1262       ixheaace_cplx_mult_twid(&accu1, &accu2, ptr_inp[2 * i + 0], ptr_inp[2 * i + 1],
1263                               ptr_tw_flt[i], ptr_tw_h_flt[i]);
1264       ptr_op[2 * i + 0] = accu1;
1265       ptr_op[2 * i + 1] = accu2;
1266     }
1267     ptr_inp += step_val;
1268     ptr_op += step_val;
1269     ptr_tw_flt += (dim2 - 1);
1270     ptr_tw_h_flt += (dim2 - 1);
1271   }
1272 }
1273 
ixheaace_cfft_32_480(FLOAT32 * ptr_in,FLOAT32 * ptr_out)1274 static VOID ixheaace_cfft_32_480(FLOAT32 *ptr_in, FLOAT32 *ptr_out) {
1275   WORD32 i, l1, l2, h2;
1276   FLOAT32 xh0_0, xh1_0, xl0_0, xl1_0;
1277   FLOAT32 xh0_1, xh1_1, xl0_1, xl1_1;
1278   FLOAT32 x_0, x_1, x_2, x_3;
1279   FLOAT32 x_4, x_5, x_6, x_7;
1280   FLOAT32 *ptr_x;
1281   FLOAT32 *ptr_y;
1282   FLOAT32 interm_y[FFT32X2];
1283   FLOAT32 n00, n10, n20, n30, n01, n11, n21, n31;
1284 
1285   FLOAT32 inp_0qi, inp_0qr;
1286   FLOAT32 inp_1qi, inp_1qr;
1287   FLOAT32 inp_2qi, inp_2qr;
1288   FLOAT32 inp_3qi, inp_3qr;
1289   FLOAT32 mul_0qi, mul_0qr;
1290   FLOAT32 mul_1qi, mul_1qr;
1291   FLOAT32 mul_2qi, mul_2qr;
1292   FLOAT32 mul_3qi, mul_3qr;
1293   FLOAT32 sum_0qi, sum_0qr;
1294   FLOAT32 sum_1qi, sum_1qr;
1295   FLOAT32 sum_2qi, sum_2qr;
1296   FLOAT32 sum_3qi, sum_3qr;
1297   WORD32 idx1 = 0, idx2 = FFT15 * FFT32;
1298   FLOAT32 mul_i, mul_r;
1299 
1300   ptr_x = ptr_in;
1301 
1302   // This computes first and second stage butterflies. So, 4-point FFT is done.
1303   for (i = 0; i < 8; i++) {
1304     x_0 = ptr_x[0];
1305     x_1 = ptr_x[1];
1306     x_2 = ptr_x[16];
1307     x_3 = ptr_x[16 + 1];
1308     x_4 = ptr_x[32];
1309     x_5 = ptr_x[32 + 1];
1310     x_6 = ptr_x[48];
1311     x_7 = ptr_x[48 + 1];
1312 
1313     xh0_0 = x_0 + x_4;
1314     xh1_0 = x_1 + x_5;
1315     xl0_0 = x_0 - x_4;
1316     xl1_0 = x_1 - x_5;
1317     xh0_1 = x_2 + x_6;
1318     xh1_1 = x_3 + x_7;
1319     xl0_1 = x_2 - x_6;
1320     xl1_1 = x_3 - x_7;
1321 
1322     n00 = xh0_0 + xh0_1;
1323     n01 = xh1_0 + xh1_1;
1324     n10 = xl0_0 + xl1_1;
1325     n11 = xl1_0 - xl0_1;
1326     n20 = xh0_0 - xh0_1;
1327     n21 = xh1_0 - xh1_1;
1328     n30 = xl0_0 - xl1_1;
1329     n31 = xl1_0 + xl0_1;
1330 
1331     ptr_x[0] = n00;
1332     ptr_x[1] = n01;
1333     ptr_x[16] = n10;
1334     ptr_x[16 + 1] = n11;
1335     ptr_x[32] = n20;
1336     ptr_x[32 + 1] = n21;
1337     ptr_x[48] = n30;
1338     ptr_x[48 + 1] = n31;
1339 
1340     ptr_x += 2;
1341   }
1342 
1343   // This computes third and fourth stage butterflies. So, next 4-point FFT is done.
1344   {
1345     h2 = 16 >> 1;
1346     l1 = 16;
1347     l2 = 16 + (16 >> 1);
1348 
1349     ptr_x = ptr_in;
1350     ptr_y = &interm_y[0];
1351 
1352     /* Butter fly summation in 2 steps */
1353     inp_0qr = ptr_x[0];
1354     inp_0qi = ptr_x[1];
1355     inp_1qr = ptr_x[4];
1356     inp_1qi = ptr_x[5];
1357     inp_2qr = ptr_x[8];
1358     inp_2qi = ptr_x[9];
1359     inp_3qr = ptr_x[12];
1360     inp_3qi = ptr_x[13];
1361 
1362     mul_0qr = inp_0qr;
1363     mul_0qi = inp_0qi;
1364     mul_1qr = inp_1qr;
1365     mul_1qi = inp_1qi;
1366     mul_2qr = inp_2qr;
1367     mul_2qi = inp_2qi;
1368     mul_3qr = inp_3qr;
1369     mul_3qi = inp_3qi;
1370 
1371     sum_0qr = mul_0qr + mul_2qr;
1372     sum_0qi = mul_0qi + mul_2qi;
1373     sum_1qr = mul_0qr - mul_2qr;
1374     sum_1qi = mul_0qi - mul_2qi;
1375     sum_2qr = mul_1qr + mul_3qr;
1376     sum_2qi = mul_1qi + mul_3qi;
1377     sum_3qr = mul_1qr - mul_3qr;
1378     sum_3qi = mul_1qi - mul_3qi;
1379 
1380     ptr_y[0] = sum_0qr + sum_2qr;
1381     ptr_y[1] = sum_0qi + sum_2qi;
1382     ptr_y[h2] = sum_1qr + sum_3qi;
1383     ptr_y[h2 + 1] = sum_1qi - sum_3qr;
1384     ptr_y[l1] = sum_0qr - sum_2qr;
1385     ptr_y[l1 + 1] = sum_0qi - sum_2qi;
1386     ptr_y[l2] = sum_1qr - sum_3qi;
1387     ptr_y[l2 + 1] = sum_1qi + sum_3qr;
1388 
1389     ptr_y += 2;
1390     ptr_x += 16;
1391 
1392     /* 2nd butter fly */
1393 
1394     inp_0qr = ptr_x[0];
1395     inp_0qi = ptr_x[1];
1396     inp_1qr = ptr_x[4];
1397     inp_1qi = ptr_x[5];
1398     inp_2qr = ptr_x[8];
1399     inp_2qi = ptr_x[9];
1400     inp_3qr = ptr_x[12];
1401     inp_3qi = ptr_x[13];
1402 
1403     mul_0qr = inp_0qr;
1404     mul_0qi = inp_0qi;
1405 
1406     mul_1qr = (inp_1qr * 0.461929321f) + (inp_1qi * 0.191329956f);
1407     mul_1qi = (inp_1qr * -0.191329956f) + (inp_1qi * 0.461929321f);
1408 
1409     mul_2qr = ((inp_2qr + inp_2qi) * 0.353546143f);
1410     mul_2qi = ((-inp_2qr + inp_2qi) * 0.353546143f);
1411 
1412     mul_3qr = (inp_3qr * 0.191329956f) + (inp_3qi * 0.461929321f);
1413     mul_3qi = (inp_3qr * -0.461929321f) + (inp_3qi * 0.191329956f);
1414 
1415     sum_0qr = mul_0qr + (mul_2qr * 2);
1416     sum_0qi = mul_0qi + (mul_2qi * 2);
1417     sum_1qr = mul_0qr - (mul_2qr * 2);
1418     sum_1qi = mul_0qi - (mul_2qi * 2);
1419 
1420     sum_2qr = mul_1qr + mul_3qr;
1421     sum_2qi = mul_1qi + mul_3qi;
1422     sum_3qr = mul_1qr - mul_3qr;
1423     sum_3qi = mul_1qi - mul_3qi;
1424 
1425     ptr_y[0] = sum_0qr + (sum_2qr * 2);
1426     ptr_y[1] = sum_0qi + (sum_2qi * 2);
1427     ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1428     ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1429     ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1430     ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1431     ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1432     ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1433 
1434     ptr_y += 2;
1435     ptr_x += 16;
1436 
1437     /* 3rd butter fly */
1438 
1439     inp_0qr = ptr_x[0];
1440     inp_0qi = ptr_x[1];
1441     inp_1qr = ptr_x[4];
1442     inp_1qi = ptr_x[5];
1443     inp_2qr = ptr_x[8];
1444     inp_2qi = ptr_x[9];
1445     inp_3qr = ptr_x[12];
1446     inp_3qi = ptr_x[13];
1447 
1448     mul_0qr = inp_0qr;
1449     mul_0qi = inp_0qi;
1450 
1451     mul_1qr = ((inp_1qr + inp_1qi) * 0.353546143f);
1452     mul_1qi = ((-inp_1qr + inp_1qi) * 0.353546143f);
1453 
1454     mul_2qr = inp_2qi;
1455     mul_2qi = inp_2qr;
1456 
1457     mul_3qr = ((-inp_3qr + inp_3qi) * 0.353546143f);
1458     mul_3qi = ((inp_3qr + inp_3qi) * -0.353546143f);
1459 
1460     sum_0qr = mul_0qr + mul_2qr;
1461     sum_0qi = mul_0qi - mul_2qi;
1462     sum_1qr = mul_0qr - mul_2qr;
1463     sum_1qi = mul_0qi + mul_2qi;
1464     sum_2qr = mul_1qr + mul_3qr;
1465     sum_2qi = mul_1qi + mul_3qi;
1466     sum_3qr = mul_1qr - mul_3qr;
1467     sum_3qi = mul_1qi - mul_3qi;
1468 
1469     ptr_y[0] = sum_0qr + (sum_2qr * 2);
1470     ptr_y[1] = sum_0qi + (sum_2qi * 2);
1471     ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1472     ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1473     ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1474     ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1475     ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1476     ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1477 
1478     ptr_y += 2;
1479     ptr_x += 16;
1480 
1481     /* 4th butter fly */
1482 
1483     inp_0qr = ptr_x[0];
1484     inp_0qi = ptr_x[1];
1485     inp_1qr = ptr_x[4];
1486     inp_1qi = ptr_x[5];
1487     inp_2qr = ptr_x[8];
1488     inp_2qi = ptr_x[9];
1489     inp_3qr = ptr_x[12];
1490     inp_3qi = ptr_x[13];
1491 
1492     mul_0qr = inp_0qr;
1493     mul_0qi = inp_0qi;
1494 
1495     mul_1qr = (inp_1qr * 0.191329956f) + (inp_1qi * 0.461929321f);
1496     mul_1qi = (inp_1qr * -0.461929321f) + (inp_1qi * 0.191329956f);
1497 
1498     mul_2qr = ((-inp_2qr + inp_2qi) * 0.353546143f);
1499     mul_2qi = ((inp_2qr + inp_2qi) * -0.353546143f);
1500 
1501     mul_3qr = (inp_3qr * -0.461929321f) + (inp_3qi * -0.191329956f);
1502     mul_3qi = (inp_3qr * 0.191329956f) + (inp_3qi * -0.461929321f);
1503 
1504     sum_0qr = mul_0qr + (mul_2qr * 2);
1505     sum_0qi = mul_0qi + (mul_2qi * 2);
1506     sum_1qr = mul_0qr - (mul_2qr * 2);
1507     sum_1qi = mul_0qi - (mul_2qi * 2);
1508 
1509     sum_2qr = mul_1qr + mul_3qr;
1510     sum_2qi = mul_1qi + mul_3qi;
1511     sum_3qr = mul_1qr - mul_3qr;
1512     sum_3qi = mul_1qi - mul_3qi;
1513 
1514     ptr_y[0] = sum_0qr + (sum_2qr * 2);
1515     ptr_y[1] = sum_0qi + (sum_2qi * 2);
1516     ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1517     ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1518     ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1519     ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1520     ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1521     ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1522 
1523     ptr_x = ptr_in;
1524     ptr_y = &interm_y[32];
1525 
1526     /* Butter fly summation in 2 steps */
1527     inp_0qr = ptr_x[2];
1528     inp_0qi = ptr_x[3];
1529     inp_1qr = ptr_x[6];
1530     inp_1qi = ptr_x[7];
1531     inp_2qr = ptr_x[10];
1532     inp_2qi = ptr_x[11];
1533     inp_3qr = ptr_x[14];
1534     inp_3qi = ptr_x[15];
1535 
1536     mul_0qr = inp_0qr;
1537     mul_0qi = inp_0qi;
1538     mul_1qr = inp_1qr;
1539     mul_1qi = inp_1qi;
1540     mul_2qr = inp_2qr;
1541     mul_2qi = inp_2qi;
1542     mul_3qr = inp_3qr;
1543     mul_3qi = inp_3qi;
1544 
1545     sum_0qr = mul_0qr + mul_2qr;
1546     sum_0qi = mul_0qi + mul_2qi;
1547     sum_1qr = mul_0qr - mul_2qr;
1548     sum_1qi = mul_0qi - mul_2qi;
1549     sum_2qr = mul_1qr + mul_3qr;
1550     sum_2qi = mul_1qi + mul_3qi;
1551     sum_3qr = mul_1qr - mul_3qr;
1552     sum_3qi = mul_1qi - mul_3qi;
1553 
1554     ptr_y[0] = sum_0qr + sum_2qr;
1555     ptr_y[1] = sum_0qi + sum_2qi;
1556     ptr_y[h2] = sum_1qr + sum_3qi;
1557     ptr_y[h2 + 1] = sum_1qi - sum_3qr;
1558     ptr_y[l1] = sum_0qr - sum_2qr;
1559     ptr_y[l1 + 1] = sum_0qi - sum_2qi;
1560     ptr_y[l2] = sum_1qr - sum_3qi;
1561     ptr_y[l2 + 1] = sum_1qi + sum_3qr;
1562 
1563     ptr_y += 2;
1564     ptr_x += 16;
1565 
1566     /* 2nd butter fly */
1567 
1568     inp_0qr = ptr_x[2];
1569     inp_0qi = ptr_x[3];
1570     inp_1qr = ptr_x[6];
1571     inp_1qi = ptr_x[7];
1572     inp_2qr = ptr_x[10];
1573     inp_2qi = ptr_x[11];
1574     inp_3qr = ptr_x[14];
1575     inp_3qi = ptr_x[15];
1576 
1577     mul_0qr = inp_0qr;
1578     mul_0qi = inp_0qi;
1579 
1580     mul_1qr = (inp_1qr * 0.461929321f) + (inp_1qi * 0.191329956f);
1581     mul_1qi = (inp_1qr * -0.191329956f) + (inp_1qi * 0.461929321f);
1582 
1583     mul_2qr = ((inp_2qr + inp_2qi) * 0.353546143f);
1584     mul_2qi = ((-inp_2qr + inp_2qi) * 0.353546143f);
1585 
1586     mul_3qr = (inp_3qr * 0.191329956f) + (inp_3qi * 0.461929321f);
1587     mul_3qi = (inp_3qr * -0.461929321f) + (inp_3qi * 0.191329956f);
1588 
1589     sum_0qr = mul_0qr + (mul_2qr * 2);
1590     sum_0qi = mul_0qi + (mul_2qi * 2);
1591     sum_1qr = mul_0qr - (mul_2qr * 2);
1592     sum_1qi = mul_0qi - (mul_2qi * 2);
1593 
1594     sum_2qr = mul_1qr + mul_3qr;
1595     sum_2qi = mul_1qi + mul_3qi;
1596     sum_3qr = mul_1qr - mul_3qr;
1597     sum_3qi = mul_1qi - mul_3qi;
1598 
1599     ptr_y[0] = sum_0qr + (sum_2qr * 2);
1600     ptr_y[1] = sum_0qi + (sum_2qi * 2);
1601     ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1602     ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1603     ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1604     ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1605     ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1606     ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1607 
1608     ptr_y += 2;
1609     ptr_x += 16;
1610 
1611     /* 3rd butter fly */
1612 
1613     inp_0qr = ptr_x[2];
1614     inp_0qi = ptr_x[3];
1615     inp_1qr = ptr_x[6];
1616     inp_1qi = ptr_x[7];
1617     inp_2qr = ptr_x[10];
1618     inp_2qi = ptr_x[11];
1619     inp_3qr = ptr_x[14];
1620     inp_3qi = ptr_x[15];
1621 
1622     mul_0qr = inp_0qr;
1623     mul_0qi = inp_0qi;
1624 
1625     mul_1qr = ((inp_1qr + inp_1qi) * 0.353546143f);
1626     mul_1qi = ((-inp_1qr + inp_1qi) * 0.353546143f);
1627 
1628     mul_2qr = inp_2qi;
1629     mul_2qi = inp_2qr;
1630 
1631     mul_3qr = ((-inp_3qr + inp_3qi) * 0.353546143f);
1632     mul_3qi = ((inp_3qr + inp_3qi) * -0.353546143f);
1633 
1634     sum_0qr = mul_0qr + mul_2qr;
1635     sum_0qi = mul_0qi - mul_2qi;
1636     sum_1qr = mul_0qr - mul_2qr;
1637     sum_1qi = mul_0qi + mul_2qi;
1638     sum_2qr = mul_1qr + mul_3qr;
1639     sum_2qi = mul_1qi + mul_3qi;
1640     sum_3qr = mul_1qr - mul_3qr;
1641     sum_3qi = mul_1qi - mul_3qi;
1642 
1643     ptr_y[0] = sum_0qr + (sum_2qr * 2);
1644     ptr_y[1] = sum_0qi + (sum_2qi * 2);
1645     ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1646     ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1647     ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1648     ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1649     ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1650     ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1651 
1652     ptr_y += 2;
1653     ptr_x += 16;
1654 
1655     /* 4th butter fly */
1656 
1657     inp_0qr = ptr_x[2];
1658     inp_0qi = ptr_x[3];
1659     inp_1qr = ptr_x[6];
1660     inp_1qi = ptr_x[7];
1661     inp_2qr = ptr_x[10];
1662     inp_2qi = ptr_x[11];
1663     inp_3qr = ptr_x[14];
1664     inp_3qi = ptr_x[15];
1665 
1666     mul_0qr = inp_0qr;
1667     mul_0qi = inp_0qi;
1668 
1669     mul_1qr = (inp_1qr * 0.191329956f) + (inp_1qi * 0.461929321f);
1670     mul_1qi = (inp_1qr * -0.461929321f) + (inp_1qi * 0.191329956f);
1671 
1672     mul_2qr = ((-inp_2qr + inp_2qi) * 0.353546143f);
1673     mul_2qi = ((inp_2qr + inp_2qi) * -0.353546143f);
1674 
1675     mul_3qr = (inp_3qr * -0.461929321f) + (inp_3qi * -0.191329956f);
1676     mul_3qi = (inp_3qr * 0.191329956f) + (inp_3qi * -0.461929321f);
1677 
1678     sum_0qr = mul_0qr + (mul_2qr * 2);
1679     sum_0qi = mul_0qi + (mul_2qi * 2);
1680     sum_1qr = mul_0qr - (mul_2qr * 2);
1681     sum_1qi = mul_0qi - (mul_2qi * 2);
1682 
1683     sum_2qr = mul_1qr + mul_3qr;
1684     sum_2qi = mul_1qi + mul_3qi;
1685     sum_3qr = mul_1qr - mul_3qr;
1686     sum_3qi = mul_1qi - mul_3qi;
1687 
1688     ptr_y[0] = sum_0qr + (sum_2qr * 2);
1689     ptr_y[1] = sum_0qi + (sum_2qi * 2);
1690     ptr_y[h2] = sum_1qr + (sum_3qi * 2);
1691     ptr_y[h2 + 1] = sum_1qi - (sum_3qr * 2);
1692     ptr_y[l1] = sum_0qr - (sum_2qr * 2);
1693     ptr_y[l1 + 1] = sum_0qi - (sum_2qi * 2);
1694     ptr_y[l2] = sum_1qr - (sum_3qi * 2);
1695     ptr_y[l2 + 1] = sum_1qi + (sum_3qr * 2);
1696   }
1697 
1698   // Last stage of 32 point FFT
1699   {
1700     ptr_y = ptr_out;
1701     ptr_y[idx1] = interm_y[0] + interm_y[32];
1702     ptr_y[idx1 + 1] = interm_y[1] + interm_y[33];
1703     ptr_y[idx2] = interm_y[0] - interm_y[32];
1704     ptr_y[idx2 + 1] = interm_y[1] - interm_y[33];
1705     idx1 += FFT15X2;
1706     idx2 += FFT15X2;
1707     for (i = 1; i < FFT16; i++) {
1708       mul_r = (interm_y[FFT32 + 2 * i + 0] * ixheaace_fft_mix_rad_twid_tbl_32[i - 1]) -
1709               (interm_y[FFT32 + 2 * i + 1] * ixheaace_fft_mix_rad_twid_tbl_h_32[i - 1]);
1710       mul_i = (interm_y[FFT32 + 2 * i + 0] * ixheaace_fft_mix_rad_twid_tbl_h_32[i - 1]) +
1711               (interm_y[FFT32 + 2 * i + 1] * ixheaace_fft_mix_rad_twid_tbl_32[i - 1]);
1712 
1713       mul_r = mul_r / 2;
1714       mul_i = mul_i / 2;
1715       ptr_y[idx1] = interm_y[2 * i + 0] + (mul_r * 2);
1716       ptr_y[idx1 + 1] = interm_y[2 * i + 1] + (mul_i * 2);
1717       ptr_y[idx2] = interm_y[2 * i + 0] - (mul_r * 2);
1718       ptr_y[idx2 + 1] = interm_y[2 * i + 1] - (mul_i * 2);
1719       idx1 += FFT15X2;
1720       idx2 += FFT15X2;
1721     }
1722   }
1723 }
1724 
ixheaace_dec_rearrange_short_flt(FLOAT32 * ptr_in,FLOAT32 * ptr_out,WORD32 N,const WORD16 * ptr_re_arr_tab)1725 static VOID ixheaace_dec_rearrange_short_flt(FLOAT32 *ptr_in, FLOAT32 *ptr_out, WORD32 N,
1726                                              const WORD16 *ptr_re_arr_tab) {
1727   WORD32 n, i = 0;
1728 
1729   for (n = 0; n < N; n++) {
1730     WORD32 idx = ptr_re_arr_tab[n] << 1;
1731     ptr_out[i++] = ptr_in[idx];
1732     ptr_out[i++] = ptr_in[idx + 1];
1733   }
1734 }
1735 
ixheaace_fft_5_flt(FLOAT32 * ptr_in,FLOAT32 * ptr_out)1736 static VOID ixheaace_fft_5_flt(FLOAT32 *ptr_in, FLOAT32 *ptr_out) {
1737   FLOAT32 C51 = 0.951056516f;
1738   FLOAT32 C52 = -0.769420885f;
1739   FLOAT32 C53 = -0.363271264f;
1740   FLOAT32 C54 = 0.559016994f;
1741   FLOAT32 C55 = -0.625f;
1742 
1743   FLOAT32 r1, r2, r3, r4;
1744   FLOAT32 s1, s2, s3, s4, t, temp1, temp2;
1745 
1746   r1 = (ptr_in[2] + ptr_in[8]);
1747   r4 = (ptr_in[2] - ptr_in[8]);
1748   r3 = (ptr_in[4] + ptr_in[6]);
1749   r2 = (ptr_in[4] - ptr_in[6]);
1750 
1751   t = ((r1 - r3) * C54);
1752   r1 = (r1 + r3);
1753 
1754   temp1 = (ptr_in[0] + r1);
1755   r1 = (temp1 + (((r1 * C55)) * 2));
1756 
1757   r3 = (r1 - t);
1758   r1 = (r1 + t);
1759 
1760   t = ((r4 + r2) * C51);
1761   r4 = (t + ((r4 * C52) * 2));
1762   r2 = (t + (r2 * C53));
1763 
1764   s1 = (ptr_in[3] + ptr_in[9]);
1765   s4 = (ptr_in[3] - ptr_in[9]);
1766   s3 = (ptr_in[5] + ptr_in[7]);
1767   s2 = (ptr_in[5] - ptr_in[7]);
1768 
1769   t = ((s1 - s3) * C54);
1770   s1 = (s1 + s3);
1771 
1772   temp2 = (ptr_in[1] + s1);
1773 
1774   s1 = (temp2 + (((s1 * C55)) * 2));
1775 
1776   s3 = (s1 - t);
1777   s1 = (s1 + t);
1778 
1779   t = ((s4 + s2) * C51);
1780   s4 = (t + (((s4 * C52)) * 2));
1781   s2 = (t + ((s2 * C53)));
1782 
1783   ptr_out[0] = temp1;
1784   ptr_out[1] = temp2;
1785   ptr_out[2] = (r1 + s2);
1786   ptr_out[3] = (s1 - r2);
1787   ptr_out[4] = (r3 - s4);
1788   ptr_out[5] = (s3 + r4);
1789   ptr_out[6] = (r3 + s4);
1790   ptr_out[7] = (s3 - r4);
1791   ptr_out[8] = (r1 - s2);
1792   ptr_out[9] = (s1 + r2);
1793 }
1794 
ixheaace_fft_3_flt(FLOAT32 * ptr_in,FLOAT32 * ptr_out)1795 static VOID ixheaace_fft_3_flt(FLOAT32 *ptr_in, FLOAT32 *ptr_out) {
1796   FLOAT32 add_r, sub_r;
1797   FLOAT32 add_i, sub_i;
1798   FLOAT32 x_01_r, x_01_i, temp;
1799 
1800   FLOAT32 p1, p2, p3, p4;
1801   FLOAT32 sinmu = 0.866025404f;
1802 
1803   x_01_r = (ptr_in[0] + ptr_in[2]);
1804   x_01_i = (ptr_in[1] + ptr_in[3]);
1805 
1806   add_r = (ptr_in[2] + ptr_in[4]);
1807   add_i = (ptr_in[3] + ptr_in[5]);
1808 
1809   sub_r = (ptr_in[2] - ptr_in[4]);
1810   sub_i = (ptr_in[3] - ptr_in[5]);
1811 
1812   p1 = add_r / 2;
1813   p2 = (sub_i * sinmu);
1814   p3 = (sub_r * sinmu);
1815   p4 = add_i / 2;
1816 
1817   temp = (ptr_in[0] - p1);
1818 
1819   ptr_out[0] = (x_01_r + ptr_in[4]);
1820   ptr_out[1] = (x_01_i + ptr_in[5]);
1821   ptr_out[2] = (temp + p2);
1822   ptr_out[3] = ((ptr_in[1] - p3) - p4);
1823   ptr_out[4] = (temp - p2);
1824   ptr_out[5] = ((ptr_in[1] + p3) - p4);
1825 }
1826 
ixheaace_pre_twiddle_120(FLOAT32 * ptr_in,FLOAT32 * ptr_data,WORD32 n,const FLOAT32 * ptr_cos_sin_tbl)1827 static VOID ixheaace_pre_twiddle_120(FLOAT32 *ptr_in, FLOAT32 *ptr_data, WORD32 n,
1828                                      const FLOAT32 *ptr_cos_sin_tbl) {
1829   WORD npoints_4, i;
1830   FLOAT32 tempr, tempi, temp;
1831   FLOAT32 c, c1, s, s1;
1832   FLOAT32 *ptr_in1, *ptr_in2;
1833   FLOAT32 *ptr_x = ptr_in + (n - 1);
1834 
1835   npoints_4 = n >> 2;
1836 
1837   ptr_in1 = ptr_data;
1838   ptr_in2 = ptr_data + n - 1;
1839 
1840   for (i = 0; i < npoints_4; i++) {
1841     c = *ptr_cos_sin_tbl++;
1842     s = *ptr_cos_sin_tbl++;
1843 
1844     tempr = *ptr_in1++;
1845     tempi = *ptr_in2--;
1846 
1847     temp = -((tempr * c) + (tempi * s));
1848     *ptr_in++ = temp;
1849 
1850     temp = -((tempi * c) - (tempr * s));
1851     *ptr_in++ = temp;
1852 
1853     c1 = *ptr_cos_sin_tbl++;
1854     s1 = *ptr_cos_sin_tbl++;
1855 
1856     tempi = *ptr_in1++;
1857     tempr = *ptr_in2--;
1858 
1859     temp = -((tempi * c1) - (tempr * s1));
1860     *ptr_x-- = temp;
1861 
1862     temp = -((tempr * c1) + (tempi * s1));
1863     *ptr_x-- = temp;
1864   }
1865 }
1866 
ixheaace_post_twiddle_120(FLOAT32 * ptr_out,FLOAT32 * ptr_x,const FLOAT32 * ptr_cos_sin_tbl,WORD m)1867 static VOID ixheaace_post_twiddle_120(FLOAT32 *ptr_out, FLOAT32 *ptr_x,
1868                                       const FLOAT32 *ptr_cos_sin_tbl, WORD m) {
1869   WORD i;
1870   FLOAT32 c, c1, s, s1;
1871   FLOAT32 tempr, tempi, temp;
1872   FLOAT32 *ptr_in2 = ptr_x + (m - 1);
1873   FLOAT32 *ptr_in1 = ptr_x;
1874   FLOAT32 *ptr_x1 = ptr_out;
1875   FLOAT32 *ptr_x2 = ptr_out + (m - 1);
1876 
1877   for (i = 0; i < m; i += 4) {
1878     c = *ptr_cos_sin_tbl++;
1879     s = *ptr_cos_sin_tbl++;
1880     c1 = *ptr_cos_sin_tbl++;
1881     s1 = *ptr_cos_sin_tbl++;
1882 
1883     tempr = *ptr_in1++;
1884     tempi = *ptr_in1++;
1885 
1886     temp = -((tempr * s) - (tempi * c));
1887     *ptr_x2-- = temp;
1888 
1889     temp = -((tempr * c) + (tempi * s));
1890     *ptr_x1++ = temp;
1891 
1892     tempi = *ptr_in2--;
1893     tempr = *ptr_in2--;
1894 
1895     temp = -((tempr * s1) - (tempi * c1));
1896     *ptr_x1++ = temp;
1897 
1898     temp = -((tempr * c1) + (tempi * s1));
1899     *ptr_x2-- = temp;
1900   }
1901 }
1902 
ixheaace_fft_960_15(FLOAT32 * ptr_in_flt,FLOAT32 * ptr_out_flt)1903 static VOID ixheaace_fft_960_15(FLOAT32 *ptr_in_flt, FLOAT32 *ptr_out_flt) {
1904   WORD32 i;
1905   FLOAT32 *ptr_buf1_flt, *ptr_buf2_flt;
1906   ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, FFT15, re_arr_tab_5);
1907 
1908   ptr_buf1_flt = ptr_out_flt;
1909   ptr_buf2_flt = ptr_in_flt;
1910   for (i = 0; i < FFT3; i++) {
1911     ixheaace_fft_5_flt(ptr_buf1_flt, ptr_buf2_flt);
1912 
1913     ptr_buf1_flt += (FFT5 * 2);
1914     ptr_buf2_flt += (FFT5 * 2);
1915   }
1916 
1917   ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, FFT15, re_arr_tab_3);
1918   ptr_buf1_flt = ptr_out_flt;
1919   ptr_buf2_flt = ptr_in_flt;
1920   for (i = 0; i < FFT5; i++) {
1921     ixheaace_fft_3_flt(ptr_buf1_flt, ptr_buf2_flt);
1922 
1923     ptr_buf1_flt += (FFT3 * 2);
1924     ptr_buf2_flt += (FFT3 * 2);
1925   }
1926 
1927   ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, FFT15, re_arr_tab_sml);
1928 }
1929 
ixheaace_fft_120(WORD32 npoints,FLOAT32 * ptr_x_flt,FLOAT32 * ptr_y_flt)1930 static VOID ixheaace_fft_120(WORD32 npoints, FLOAT32 *ptr_x_flt, FLOAT32 *ptr_y_flt) {
1931   WORD32 i;
1932   FLOAT32 *ptr_buf1_flt, *ptr_buf2_flt;
1933   FLOAT32 *ptr_in_flt, *ptr_out_flt;
1934 
1935   ptr_in_flt = ptr_x_flt;
1936   ptr_out_flt = ptr_y_flt;
1937   ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, 60, re_arr_tab_4);
1938 
1939   ptr_buf1_flt = ptr_out_flt;
1940   ptr_buf2_flt = ptr_in_flt;
1941 
1942   for (i = 0; i < FFT15; i++) {
1943     {
1944       FLOAT32 x_0, x_1, x_2, x_3, x_4, x_5, x_6, x_7;
1945       FLOAT32 *y0, *y1, *y2, *y3;
1946       FLOAT32 *x0;
1947       FLOAT32 xh0_0, xh1_0, xh0_1, xh1_1, xl0_0, xl1_0, xl0_1, xl1_1;
1948       WORD32 h2;
1949       FLOAT32 n00, n01, n10, n11, n20, n21, n30, n31;
1950 
1951       ptr_x_flt = ptr_buf1_flt;
1952       ptr_y_flt = ptr_buf2_flt;
1953       npoints = 4;
1954       h2 = 0;
1955 
1956       y0 = ptr_y_flt;
1957       y2 = ptr_y_flt + (WORD32)npoints;
1958       x0 = ptr_x_flt;
1959       y1 = y0 + (WORD32)(npoints >> 1);
1960       y3 = y2 + (WORD32)(npoints >> 1);
1961 
1962       x_0 = x0[0];
1963       x_1 = x0[1];
1964       x_2 = x0[2];
1965       x_3 = x0[3];
1966       x_4 = x0[4];
1967       x_5 = x0[5];
1968       x_6 = x0[6];
1969       x_7 = x0[7];
1970 
1971       xh0_0 = x_0 + x_4;
1972       xh1_0 = x_1 + x_5;
1973       xl0_0 = x_0 - x_4;
1974       xl1_0 = x_1 - x_5;
1975       xh0_1 = x_2 + x_6;
1976       xh1_1 = x_3 + x_7;
1977       xl0_1 = x_2 - x_6;
1978       xl1_1 = x_3 - x_7;
1979 
1980       n00 = xh0_0 + xh0_1;
1981       n01 = xh1_0 + xh1_1;
1982       n10 = xl0_0 + xl1_1;
1983       n11 = xl1_0 - xl0_1;
1984       n20 = xh0_0 - xh0_1;
1985       n21 = xh1_0 - xh1_1;
1986       n30 = xl0_0 - xl1_1;
1987       n31 = xl1_0 + xl0_1;
1988 
1989       y0[2 * h2] = n00;
1990       y0[2 * h2 + 1] = n01;
1991       y1[2 * h2] = n10;
1992       y1[2 * h2 + 1] = n11;
1993       y2[2 * h2] = n20;
1994       y2[2 * h2 + 1] = n21;
1995       y3[2 * h2] = n30;
1996       y3[2 * h2 + 1] = n31;
1997     }
1998 
1999     ptr_buf1_flt += (FFT4 * 2);
2000     ptr_buf2_flt += (FFT4 * 2);
2001   }
2002 
2003   ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, 60, re_arr_tab_15_4);
2004 
2005   ptr_buf1_flt = ptr_out_flt;
2006   ptr_buf2_flt = ptr_in_flt;
2007   for (i = 0; i < FFT4; i++) {
2008     ixheaace_fft_960_15(ptr_buf1_flt, ptr_buf2_flt);
2009     ptr_buf1_flt += (FFT15 * 2);
2010     ptr_buf2_flt += (FFT15 * 2);
2011   }
2012 
2013   ixheaace_dec_rearrange_short_flt(ptr_in_flt, ptr_out_flt, 60, re_arr_tab_120);
2014 }
2015 
ixheaace_cfft_480(FLOAT32 * ptr_inp,FLOAT32 * ptr_op)2016 static VOID ixheaace_cfft_480(FLOAT32 *ptr_inp, FLOAT32 *ptr_op) {
2017   WORD32 i;
2018   FLOAT32 *ptr_buf1, *ptr_buf2;
2019   FLOAT32 fft5_out[FFT15X2] = {0};
2020 
2021   ptr_buf1 = ptr_inp;
2022   ptr_buf2 = ptr_op;
2023 
2024   for (i = 0; i < FFT32; i++) {
2025     ixheaace_cfft_15_480(ptr_buf1, ptr_buf2, &fft5_out[0]);
2026     ptr_buf1 += 2;
2027     ptr_buf2 += 2;
2028   }
2029 
2030   ixheaace_cfft_twiddle_mult(ptr_op, ptr_inp, FFT15, FFT32, ixheaace_fft_mix_rad_twid_tbl_480,
2031                              ixheaace_fft_mix_rad_twid_h_tbl_480);
2032 
2033   ptr_buf1 = ptr_inp;
2034   ptr_buf2 = ptr_op;
2035 
2036   for (i = 0; i < FFT15; i++) {
2037     ixheaace_cfft_32_480(ptr_buf1, ptr_buf2);
2038     ptr_buf1 += (FFT32X2);
2039     ptr_buf2 += 2;
2040   }
2041 }
2042 
ixheaace_pre_twiddle_960(FLOAT32 * ptr_x,FLOAT32 * ptr_data,WORD32 n,const FLOAT32 * ptr_cos_sin_tbl)2043 static VOID ixheaace_pre_twiddle_960(FLOAT32 *ptr_x, FLOAT32 *ptr_data, WORD32 n,
2044                                      const FLOAT32 *ptr_cos_sin_tbl) {
2045   WORD npoints_4, i;
2046   FLOAT32 tempr, tempi, temp;
2047   FLOAT32 c, c1, s, s1;
2048   FLOAT32 *ptr_in_1, *ptr_in_2;
2049   FLOAT32 *ptr_x_1 = ptr_x + (n - 1);
2050 
2051   npoints_4 = n >> 2;
2052 
2053   ptr_in_1 = ptr_data;
2054   ptr_in_2 = ptr_data + n - 1;
2055 
2056   for (i = 0; i < npoints_4; i++) {
2057     c = *ptr_cos_sin_tbl++;
2058     s = *ptr_cos_sin_tbl++;
2059 
2060     tempr = *ptr_in_1++;
2061     tempi = *ptr_in_2--;
2062 
2063     temp = -((tempr * c) + (tempi * s));
2064     *ptr_x++ = temp;
2065 
2066     temp = -((tempi * c) - (tempr * s));
2067     *ptr_x++ = temp;
2068 
2069     c1 = *ptr_cos_sin_tbl++;
2070     s1 = *ptr_cos_sin_tbl++;
2071 
2072     tempi = *ptr_in_1++;
2073     tempr = *ptr_in_2--;
2074 
2075     temp = -((tempi * c1) - (tempr * s1));
2076     *ptr_x_1-- = temp;
2077 
2078     temp = -((tempr * c1) + (tempi * s1));
2079     *ptr_x_1-- = temp;
2080   }
2081 }
2082 
ixheaace_post_twiddle_960(FLOAT32 * ptr_out,FLOAT32 * ptr_x,const FLOAT32 * ptr_cos_sin_tbl,WORD m)2083 static VOID ixheaace_post_twiddle_960(FLOAT32 *ptr_out, FLOAT32 *ptr_x,
2084                                       const FLOAT32 *ptr_cos_sin_tbl, WORD m) {
2085   WORD i;
2086   FLOAT32 c, c1, s, s1;
2087   FLOAT32 tempr, tempi, temp;
2088   FLOAT32 *ptr_in2 = ptr_x + (m - 1);
2089   FLOAT32 *ptr_in1 = ptr_x;
2090   FLOAT32 *ptr_x1 = ptr_out;
2091   FLOAT32 *ptr_x2 = ptr_out + (m - 1);
2092 
2093   for (i = 0; i < m; i += 4) {
2094     c = *ptr_cos_sin_tbl++;
2095     s = *ptr_cos_sin_tbl++;
2096     c1 = *ptr_cos_sin_tbl++;
2097     s1 = *ptr_cos_sin_tbl++;
2098 
2099     tempr = *ptr_in1++;
2100     tempi = *ptr_in1++;
2101 
2102     temp = -((tempr * s) - (tempi * c));
2103     *ptr_x2-- = temp;
2104 
2105     temp = -((tempr * c) + (tempi * s));
2106     *ptr_x1++ = temp;
2107 
2108     tempi = *ptr_in2--;
2109     tempr = *ptr_in2--;
2110 
2111     temp = -((tempr * s1) - (tempi * c1));
2112     *ptr_x1++ = temp;
2113 
2114     temp = -((tempr * c1) + (tempi * s1));
2115     *ptr_x2-- = temp;
2116   }
2117 }
2118 
ixheaace_mdct_960(FLOAT32 * ptr_input_flt,WORD8 * ptr_scratch)2119 static VOID ixheaace_mdct_960(FLOAT32 *ptr_input_flt, WORD8 *ptr_scratch) {
2120   FLOAT32 *ptr_scratch_flt = (FLOAT32 *)ptr_scratch;
2121   FLOAT32 const_mult_fac = 3.142857143f;
2122   FLOAT32 *ptr_data = ptr_input_flt;
2123   WORD32 k;
2124 
2125   memcpy(ptr_scratch_flt, ptr_input_flt, sizeof(*ptr_scratch_flt) * FRAME_LEN_960);
2126   ixheaace_pre_twiddle_960(ptr_input_flt, ptr_scratch_flt, FRAME_LEN_960, cos_sin_table_flt);
2127 
2128   ixheaace_cfft_480(ptr_input_flt, ptr_scratch_flt);
2129 
2130   ixheaace_post_twiddle_960(ptr_input_flt, ptr_scratch_flt, cos_sin_table_flt, FRAME_LEN_960);
2131 
2132   for (k = FRAME_LEN_960 - 1; k >= 0; k -= 2) {
2133     *ptr_data = (*ptr_data * const_mult_fac);
2134     ptr_data++;
2135     *ptr_data = (*ptr_data * const_mult_fac);
2136     ptr_data++;
2137   }
2138 }
2139 
ixheaace_mdct_120(FLOAT32 * ptr_input_flt,WORD8 * ptr_scratch)2140 static VOID ixheaace_mdct_120(FLOAT32 *ptr_input_flt, WORD8 *ptr_scratch) {
2141   WORD32 n, k;
2142   WORD32 n_by_2;
2143   FLOAT32 *ptr_scratch_flt = (FLOAT32 *)ptr_scratch;
2144   FLOAT32 const_mltfac = 3.142857143f;
2145   FLOAT32 *ptr_data = ptr_input_flt;
2146   n = 120;
2147   n_by_2 = n >> 1;
2148   memcpy(ptr_scratch_flt, ptr_input_flt, sizeof(*ptr_scratch_flt) * n);
2149 
2150   ixheaace_pre_twiddle_120(ptr_input_flt, ptr_scratch_flt, n, ixheaace_cosine_array_240);
2151 
2152   ixheaace_fft_120(n_by_2, ptr_input_flt, ptr_scratch_flt);
2153 
2154   ixheaace_post_twiddle_120(ptr_input_flt, ptr_scratch_flt, ixheaace_cosine_array_240, n);
2155 
2156   for (k = n - 1; k >= 0; k -= 2) {
2157     *ptr_data = (*ptr_data * const_mltfac);
2158     ptr_data++;
2159     *ptr_data = (*ptr_data * const_mltfac);
2160     ptr_data++;
2161   }
2162 }
2163 
ixheaace_mdct(FLOAT32 * ptr_dct_data,const FLOAT32 * ptr_trig_data,const FLOAT32 * ptr_sine_window,WORD32 n,WORD32 ld_n,WORD8 * ptr_scratch)2164 static VOID ixheaace_mdct(FLOAT32 *ptr_dct_data, const FLOAT32 *ptr_trig_data,
2165                           const FLOAT32 *ptr_sine_window, WORD32 n, WORD32 ld_n,
2166                           WORD8 *ptr_scratch) {
2167   ixheaace_pre_mdct(ptr_dct_data, n, ptr_sine_window);
2168 
2169   ixheaace_scratch_mem *pstr_scratch = (ixheaace_scratch_mem *)ptr_scratch;
2170   ia_enhaacplus_enc_complex_fft(ptr_dct_data, n / 2, pstr_scratch);
2171 
2172   ixheaace_post_mdct(ptr_dct_data, n, ptr_trig_data,
2173                      1 << (LD_FFT_TWIDDLE_TABLE_SIZE - (ld_n - 1)), FFT_TWIDDLE_TABLE_SIZE);
2174 }
2175 
ixheaace_shift_mdct_delay_buffer(FLOAT32 * ptr_mdct_delay_buffer,const FLOAT32 * ptr_time_signal,WORD32 ch_increment,WORD32 frame_len)2176 static VOID ixheaace_shift_mdct_delay_buffer(FLOAT32 *ptr_mdct_delay_buffer,
2177                                              const FLOAT32 *ptr_time_signal, WORD32 ch_increment,
2178                                              WORD32 frame_len) {
2179   WORD32 i;
2180   WORD32 blk_switch_offset = frame_len;
2181   switch (frame_len) {
2182     case FRAME_LEN_1024:
2183       blk_switch_offset = BLK_SWITCH_OFFSET_LC_128;
2184       memmove(ptr_mdct_delay_buffer, ptr_mdct_delay_buffer + frame_len,
2185               (blk_switch_offset - frame_len) * sizeof(*ptr_mdct_delay_buffer));
2186       break;
2187 
2188     case FRAME_LEN_960:
2189       blk_switch_offset = BLK_SWITCH_OFFSET_LC_120;
2190       memmove(ptr_mdct_delay_buffer, ptr_mdct_delay_buffer + frame_len,
2191               (blk_switch_offset - frame_len) * sizeof(*ptr_mdct_delay_buffer));
2192       break;
2193 
2194     case FRAME_LEN_512:
2195     case FRAME_LEN_480:
2196       blk_switch_offset = frame_len;
2197       break;
2198   }
2199 
2200   for (i = 0; i < frame_len; i++) {
2201     ptr_mdct_delay_buffer[blk_switch_offset - frame_len + i] = ptr_time_signal[i * ch_increment];
2202   }
2203 }
2204 
ixheaace_transform_real_lc_ld(FLOAT32 * ptr_mdct_delay_buffer,const FLOAT32 * ptr_time_signal,WORD32 ch_increment,FLOAT32 * ptr_real_out,WORD32 block_type,WORD32 frame_len,WORD8 * ptr_scratch)2205 VOID ixheaace_transform_real_lc_ld(FLOAT32 *ptr_mdct_delay_buffer, const FLOAT32 *ptr_time_signal,
2206                                    WORD32 ch_increment, FLOAT32 *ptr_real_out, WORD32 block_type,
2207                                    WORD32 frame_len, WORD8 *ptr_scratch) {
2208   WORD32 i, w;
2209   FLOAT32 ws1, ws2;
2210   FLOAT32 *ptr_dct_in;
2211   WORD32 frame_len_short = FRAME_LEN_SHORT_128;
2212   WORD32 ls_trans = LS_TRANS_128;
2213   WORD32 trans_offset = TRANSFORM_OFFSET_SHORT_128;
2214   const FLOAT32 *ptr_window;
2215   if (frame_len == FRAME_LEN_960) {
2216     ls_trans = LS_TRANS_120;
2217     trans_offset = TRANSFORM_OFFSET_SHORT_120;
2218     frame_len_short = FRAME_LEN_SHORT_120;
2219   }
2220   switch (block_type) {
2221     case LONG_WINDOW:
2222       ptr_dct_in = ptr_real_out;
2223       ptr_window = &long_window_KBD[0];
2224       switch (frame_len) {
2225         case FRAME_LEN_1024:
2226           ptr_window = &long_window_KBD[0];
2227           break;
2228 
2229         case FRAME_LEN_960:
2230           ptr_window = &long_window_sine_960[0];
2231           break;
2232 
2233         case FRAME_LEN_512:
2234           ptr_window = &long_window_sine_ld[0];
2235           break;
2236 
2237         case FRAME_LEN_480:
2238           ptr_window = &long_window_sine_ld_480[0];
2239           break;
2240       }
2241       for (i = 0; i < frame_len / 2; i++) {
2242         ws1 = ptr_mdct_delay_buffer[i] * ptr_window[i];
2243 
2244         ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[frame_len - i - 1];
2245 
2246         ptr_dct_in[frame_len / 2 + i] = ws1 - ws2;
2247       }
2248 
2249       ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment,
2250                                        frame_len);
2251 
2252       for (i = 0; i < frame_len / 2; i++) {
2253         ws1 = ptr_mdct_delay_buffer[i] * ptr_window[frame_len - i - 1];
2254 
2255         ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[i];
2256 
2257         ptr_dct_in[frame_len / 2 - i - 1] = -(ws1 + ws2);
2258       }
2259       switch (frame_len) {
2260         case FRAME_LEN_1024:
2261           ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, long_window_sine, frame_len, 10,
2262                         ptr_scratch);
2263           break;
2264 
2265         case FRAME_LEN_960:
2266           ixheaace_mdct_960(ptr_dct_in, ptr_scratch);
2267           break;
2268 
2269         case FRAME_LEN_512:
2270         case FRAME_LEN_480:
2271           ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, ptr_window, frame_len, 9, ptr_scratch);
2272           break;
2273       }
2274       break;
2275 
2276     case START_WINDOW:
2277       ptr_dct_in = ptr_real_out;
2278       ptr_window = &long_window_KBD[0];
2279       switch (frame_len) {
2280         case FRAME_LEN_1024:
2281           ptr_window = &long_window_KBD[0];
2282           break;
2283 
2284         case FRAME_LEN_960:
2285           ptr_window = &long_window_sine_960[0];
2286           break;
2287       }
2288       for (i = 0; i < frame_len / 2; i++) {
2289         ws1 = ptr_mdct_delay_buffer[i] * ptr_window[i];
2290 
2291         ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[frame_len - i - 1];
2292 
2293         ptr_dct_in[frame_len / 2 + i] = ws1 - ws2;
2294       }
2295 
2296       ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment,
2297                                        frame_len);
2298 
2299       if (frame_len == FRAME_LEN_1024) {
2300         ptr_window = &short_window_sine[0];
2301       } else if (frame_len == FRAME_LEN_960) {
2302         ptr_window = &short_window_sine_120[0];
2303       }
2304       for (i = 0; i < ls_trans; i++) {
2305         ws1 = ptr_mdct_delay_buffer[i];
2306         ws2 = 0.0f;
2307 
2308         ptr_dct_in[frame_len / 2 - i - 1] = -(ws1 + ws2);
2309       }
2310 
2311       for (i = 0; i < frame_len_short / 2; i++) {
2312         ws1 = ptr_mdct_delay_buffer[i + ls_trans] * ptr_window[frame_len_short - i - 1];
2313 
2314         ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1 - ls_trans)] * ptr_window[i];
2315 
2316         ptr_dct_in[frame_len / 2 - i - 1 - ls_trans] = -(ws1 + ws2);
2317       }
2318       if (frame_len == FRAME_LEN_960) {
2319         ixheaace_mdct_960(ptr_dct_in, ptr_scratch);
2320       } else {
2321         ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, long_window_sine, frame_len, 10, ptr_scratch);
2322       }
2323 
2324       break;
2325 
2326     case STOP_WINDOW:
2327       ptr_window = &long_window_KBD[0];
2328       ptr_dct_in = ptr_real_out;
2329       if (frame_len == FRAME_LEN_1024) {
2330         ptr_window = &short_window_sine[0];
2331       } else if (frame_len == FRAME_LEN_960) {
2332         ptr_window = &short_window_sine_120[0];
2333       }
2334       for (i = 0; i < ls_trans; i++) {
2335         ws1 = 0.0f;
2336         ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)];
2337         ptr_dct_in[frame_len / 2 + i] = ws1 - ws2;
2338       }
2339 
2340       for (i = 0; i < frame_len_short / 2; i++) {
2341         ws1 = ptr_mdct_delay_buffer[(i + ls_trans)] * ptr_window[i];
2342 
2343         ws2 = ptr_mdct_delay_buffer[(frame_len - ls_trans - i - 1)] *
2344               ptr_window[frame_len_short - i - 1];
2345 
2346         ptr_dct_in[frame_len / 2 + i + ls_trans] = ws1 - ws2;
2347       }
2348 
2349       ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment,
2350                                        frame_len);
2351 
2352       if (frame_len == FRAME_LEN_1024) {
2353         ptr_window = &long_window_KBD[0];
2354       } else if (frame_len == FRAME_LEN_960) {
2355         ptr_window = &long_window_sine_960[0];
2356       }
2357       for (i = 0; i < frame_len / 2; i++) {
2358         ws1 = ptr_mdct_delay_buffer[i] * ptr_window[frame_len - i - 1];
2359 
2360         ws2 = ptr_mdct_delay_buffer[(frame_len - i - 1)] * ptr_window[i];
2361 
2362         ptr_dct_in[frame_len / 2 - i - 1] = -(ws1 + ws2);
2363       }
2364 
2365       if (frame_len == FRAME_LEN_960) {
2366         ixheaace_mdct_960(ptr_dct_in, ptr_scratch);
2367       } else {
2368         ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, long_window_sine, frame_len, 10, ptr_scratch);
2369       }
2370 
2371       break;
2372 
2373     case SHORT_WINDOW:
2374       ptr_window = &short_window_sine[0];
2375       if (frame_len == FRAME_LEN_1024) {
2376         ptr_window = &short_window_sine[0];
2377       } else if (frame_len == FRAME_LEN_960) {
2378         ptr_window = &short_window_sine_120[0];
2379       }
2380       for (w = 0; w < TRANS_FAC; w++) {
2381         ptr_dct_in = ptr_real_out + w * frame_len_short;
2382 
2383         for (i = 0; i < frame_len_short / 2; i++) {
2384           ws1 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + i] * ptr_window[i];
2385 
2386           ws2 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + frame_len_short - i -
2387                                       1] *
2388                 ptr_window[frame_len_short - i - 1];
2389 
2390           ptr_dct_in[frame_len_short / 2 + i] = ws1 - ws2;
2391 
2392           ws1 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + frame_len_short + i] *
2393                 ptr_window[frame_len_short - i - 1];
2394 
2395           ws2 = ptr_mdct_delay_buffer[trans_offset + w * frame_len_short + frame_len_short * 2 -
2396                                       i - 1] *
2397                 ptr_window[i];
2398 
2399           ptr_dct_in[frame_len_short / 2 - i - 1] = -(ws1 + ws2);
2400         }
2401         if (frame_len == FRAME_LEN_960) {
2402           ixheaace_mdct_120(ptr_dct_in, ptr_scratch);
2403         } else {
2404           ixheaace_mdct(ptr_dct_in, fft_twiddle_tab, short_window_sine, frame_len_short, 7,
2405                         ptr_scratch);
2406         }
2407       }
2408 
2409       ixheaace_shift_mdct_delay_buffer(ptr_mdct_delay_buffer, ptr_time_signal, ch_increment,
2410                                        frame_len);
2411       break;
2412   }
2413 }
2414 
ia_enhaacplus_enc_transform_real_eld(FLOAT32 * ptr_mdct_delay_buffer,const FLOAT32 * ptr_time_signal,WORD32 ch_increment,FLOAT32 * ptr_real_out,WORD8 * ptr_shared_buffer5,WORD32 frame_len)2415 VOID ia_enhaacplus_enc_transform_real_eld(FLOAT32 *ptr_mdct_delay_buffer,
2416                                           const FLOAT32 *ptr_time_signal, WORD32 ch_increment,
2417                                           FLOAT32 *ptr_real_out, WORD8 *ptr_shared_buffer5,
2418                                           WORD32 frame_len) {
2419   WORD32 i, loop_len;
2420   FLOAT32 w1, w2;
2421   FLOAT32 *ptr_curr_data, *ptr_prev1_data, *ptr_prev2_data, *ptr_prev3_data;
2422   const FLOAT32 *ptr_win0, *ptr_win1, *ptr_win2, *ptr_win3;
2423 
2424   loop_len = frame_len / 4;
2425 
2426   ptr_curr_data = &ptr_mdct_delay_buffer[3 * frame_len];
2427   ptr_prev1_data = &ptr_mdct_delay_buffer[2 * frame_len];
2428   ptr_prev2_data = &ptr_mdct_delay_buffer[frame_len];
2429   ptr_prev3_data = &ptr_mdct_delay_buffer[0];
2430 
2431   ptr_win0 = &low_delay_window_eld[0];
2432   ptr_win1 = &low_delay_window_eld[frame_len];
2433   ptr_win2 = &low_delay_window_eld[2 * frame_len];
2434   ptr_win3 = &low_delay_window_eld[3 * frame_len];
2435 
2436   memmove(&ptr_mdct_delay_buffer[0], &ptr_mdct_delay_buffer[frame_len],
2437           (3 * frame_len) * sizeof(ptr_mdct_delay_buffer[0]));
2438 
2439   for (i = 0; i < frame_len; i++) {
2440     ptr_curr_data[i] = ptr_time_signal[i * ch_increment];
2441   }
2442 
2443   for (i = 0; i < loop_len; i++) {
2444     w1 = ptr_prev3_data[(frame_len / 2) + loop_len + i] * ptr_win3[(frame_len / 2) - 1 - i];
2445     w1 += ptr_prev3_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win3[(frame_len / 2) + i];
2446 
2447     w2 = (-ptr_prev1_data[(frame_len / 2) + loop_len + i] * ptr_win1[(frame_len / 2) - 1 - i]);
2448     w2 += (-ptr_prev1_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win1[(frame_len / 2) + i]);
2449 
2450     ptr_real_out[i] = w1 + w2;
2451   }
2452 
2453   for (i = 0; i < loop_len; i++) {
2454     w1 = (-ptr_prev2_data[(frame_len / 2) + loop_len + i] * ptr_win2[(frame_len / 2) - 1 - i]);
2455     w1 += ptr_prev2_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win2[(frame_len / 2) + i];
2456 
2457     w2 = ptr_curr_data[(frame_len / 2) + loop_len + i] * ptr_win0[(frame_len / 2) - 1 - i];
2458     w2 += (-ptr_curr_data[(frame_len / 2) + loop_len - 1 - i] * ptr_win0[(frame_len / 2) + i]);
2459 
2460     ptr_real_out[frame_len - 1 - i] = w1 + w2;
2461   }
2462 
2463   for (i = 0; i < loop_len; i++) {
2464     w1 = ptr_prev2_data[loop_len - 1 - i] * ptr_win3[i];
2465     w1 += ptr_prev3_data[loop_len + i] * ptr_win3[frame_len - 1 - i];
2466 
2467     w2 = (-ptr_curr_data[loop_len - 1 - i] * ptr_win1[i]);
2468     w2 += (-ptr_prev1_data[loop_len + i] * ptr_win1[frame_len - 1 - i]);
2469 
2470     ptr_real_out[(frame_len / 2) - 1 - i] = w1 + w2;
2471   }
2472 
2473   for (i = 0; i < loop_len; i++) {
2474     w1 = -(ptr_prev1_data[loop_len - 1 - i] * ptr_win2[i]);
2475     w1 += ptr_prev2_data[loop_len + i] * ptr_win2[frame_len - 1 - i];
2476 
2477     /* First 128 coeffcients are zeros in the window table so they are not used in the code here*/
2478     w2 = (-ptr_curr_data[loop_len + i] * ptr_win0[frame_len - 1 - i]);
2479 
2480     ptr_real_out[(frame_len / 2) + i] = w1 + w2;
2481   }
2482 
2483   ixheaace_mdct(ptr_real_out, fft_twiddle_tab, long_window_sine_ld, frame_len, 9,
2484                 ptr_shared_buffer5);
2485 }
2486