/****************************************************************************** * * * Copyright (C) 2023 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************************** * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore */ #include #include "ixheaac_type_def.h" #include "ixheaace_adjust_threshold_data.h" #include "iusace_cnst.h" #include "iusace_block_switch_const.h" #include "iusace_rom.h" #include "iusace_bitbuffer.h" /* DRC */ #include "impd_drc_common_enc.h" #include "impd_drc_uni_drc.h" #include "impd_drc_tables.h" #include "impd_drc_api.h" #include "impd_drc_uni_drc_eq.h" #include "impd_drc_uni_drc_filter_bank.h" #include "impd_drc_gain_enc.h" #include "impd_drc_struct_def.h" #include "iusace_tns_usac.h" #include "iusace_psy_mod.h" #include "iusace_config.h" #include "iusace_fft.h" #include "iusace_basic_ops_flt.h" #include "ixheaac_constants.h" #include "ixheaace_aac_constants.h" #include "ixheaac_basic_ops32.h" #include "ixheaace_common_utils.h" #include "ixheaac_error_standards.h" #include "ixheaace_error_codes.h" #define DIG_REV(i, m, j) \ do { \ unsigned _ = (i); \ _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \ _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \ _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \ (j) = _ >> (m); \ } while (0) static PLATFORM_INLINE WORD8 iusace_calc_norm(WORD32 a) { WORD8 norm_val; if (a == 0) { norm_val = 31; } else { if (a == (WORD32)0xffffffffL) { norm_val = 31; } else { if (a < 0) { a = ~a; } for (norm_val = 0; a < (WORD32)0x40000000L; norm_val++) { a <<= 1; } } } return norm_val; } static PLATFORM_INLINE VOID iusace_complex_3point_fft(FLOAT32 *ptr_in, FLOAT32 *ptr_out) { FLOAT32 add_r, sub_r; FLOAT32 add_i, sub_i; FLOAT32 x01r, x01i, temp; FLOAT32 p1, p2, p3, p4; FLOAT64 sinmu; sinmu = 0.866025403784439; x01r = ptr_in[0] + ptr_in[2]; x01i = ptr_in[1] + ptr_in[3]; add_r = ptr_in[2] + ptr_in[4]; add_i = ptr_in[3] + ptr_in[5]; sub_r = ptr_in[2] - ptr_in[4]; sub_i = ptr_in[3] - ptr_in[5]; p1 = add_r / (FLOAT32)2.0; p4 = add_i / (FLOAT32)2.0; p2 = (FLOAT32)((FLOAT64)sub_i * sinmu); p3 = (FLOAT32)((FLOAT64)sub_r * sinmu); temp = ptr_in[0] - p1; ptr_out[0] = x01r + ptr_in[4]; ptr_out[1] = x01i + ptr_in[5]; ptr_out[2] = temp + p2; ptr_out[3] = (ptr_in[1] - p3) - p4; ptr_out[4] = temp - p2; ptr_out[5] = (ptr_in[1] + p3) - p4; return; } VOID iusace_complex_fft_p2(FLOAT32 *ptr_x, WORD32 nlength, FLOAT32 *scratch_fft_p2_y) { WORD32 i, j, k, n_stages, h2; FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; FLOAT32 tmp; WORD32 del, nodespacing, in_loop_cnt; WORD32 not_power_4; WORD32 dig_rev_shift; FLOAT32 *y = scratch_fft_p2_y; WORD32 mpass = nlength; WORD32 npoints = nlength; FLOAT32 *ptr_y = y; const FLOAT64 *ptr_w; dig_rev_shift = iusace_calc_norm(mpass) + 1 - 16; n_stages = 30 - iusace_calc_norm(mpass); not_power_4 = n_stages & 1; n_stages = n_stages >> 1; ptr_w = iusace_twiddle_table_fft_32x32; if (dig_rev_shift < 0) { dig_rev_shift = 0; } for (i = 0; i < npoints; i += 4) { FLOAT32 *inp = ptr_x; FLOAT32 tmk; DIG_REV(i, dig_rev_shift, h2); if (not_power_4) { h2 += 1; h2 &= ~1; } inp += (h2); x0r = *inp; x0i = *(inp + 1); inp += (npoints >> 1); x1r = *inp; x1i = *(inp + 1); inp += (npoints >> 1); x2r = *inp; x2i = *(inp + 1); inp += (npoints >> 1); x3r = *inp; x3i = *(inp + 1); x0r = x0r + x2r; x0i = x0i + x2i; tmk = x0r - x2r; x2r = tmk - x2r; tmk = x0i - x2i; x2i = tmk - x2i; x1r = x1r + x3r; x1i = x1i + x3i; tmk = x1r - x3r; x3r = tmk - x3r; tmk = x1i - x3i; x3i = tmk - x3i; x0r = x0r + x1r; x0i = x0i + x1i; tmk = x0r - x1r; x1r = tmk - x1r; tmk = x0i - x1i; x1i = tmk - x1i; x2r = x2r + x3i; x2i = x2i - x3r; tmk = x2r - x3i; x3i = tmk - x3i; tmk = x2i + x3r; x3r = tmk + x3r; *ptr_y++ = x0r; *ptr_y++ = x0i; *ptr_y++ = x2r; *ptr_y++ = x2i; *ptr_y++ = x1r; *ptr_y++ = x1i; *ptr_y++ = x3i; *ptr_y++ = x3r; } ptr_y -= 2 * npoints; del = 4; nodespacing = 64; in_loop_cnt = npoints >> 4; for (i = n_stages - 1; i > 0; i--) { const FLOAT64 *twiddles = ptr_w; FLOAT32 *data = ptr_y; FLOAT64 w_1, w_2, w_3, w_4, w_5, w_6; WORD32 sec_loop_cnt; for (k = in_loop_cnt; k != 0; k--) { x0r = (*data); x0i = (*(data + 1)); data += ((SIZE_T)del << 1); x1r = (*data); x1i = (*(data + 1)); data += ((SIZE_T)del << 1); x2r = (*data); x2i = (*(data + 1)); data += ((SIZE_T)del << 1); x3r = (*data); x3i = (*(data + 1)); data -= 3 * (del << 1); x0r = x0r + x2r; x0i = x0i + x2i; x2r = x0r - (x2r * 2); x2i = x0i - (x2i * 2); x1r = x1r + x3r; x1i = x1i + x3i; x3r = x1r - (x3r * 2); x3i = x1i - (x3i * 2); x0r = x0r + x1r; x0i = x0i + x1i; x1r = x0r - (x1r * 2); x1i = x0i - (x1i * 2); x2r = x2r + x3i; x2i = x2i - x3r; x3i = x2r - (x3i * 2); x3r = x2i + (x3r * 2); *data = x0r; *(data + 1) = x0i; data += ((SIZE_T)del << 1); *data = x2r; *(data + 1) = x2i; data += ((SIZE_T)del << 1); *data = x1r; *(data + 1) = x1i; data += ((SIZE_T)del << 1); *data = x3i; *(data + 1) = x3r; data += ((SIZE_T)del << 1); } data = ptr_y + 2; sec_loop_cnt = (nodespacing * del); sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) + (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) - (sec_loop_cnt / 256); for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) { w_1 = *(twiddles + j); w_4 = *(twiddles + j + 257); w_2 = *(twiddles + ((SIZE_T)j << 1)); w_5 = *(twiddles + ((SIZE_T)j << 1) + 257); w_3 = *(twiddles + j + ((SIZE_T)j << 1)); w_6 = *(twiddles + j + ((SIZE_T)j << 1) + 257); for (k = in_loop_cnt; k != 0; k--) { data += ((SIZE_T)del << 1); x1r = *data; x1i = *(data + 1); data += ((SIZE_T)del << 1); x2r = *data; x2i = *(data + 1); data += ((SIZE_T)del << 1); x3r = *data; x3i = *(data + 1); data -= 3 * (del << 1); tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_1) - ixheaace_dmult((FLOAT64)x1i, w_4)); x1i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1r, w_4), (FLOAT64)x1i, w_1); x1r = tmp; tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2r, w_2) - ixheaace_dmult((FLOAT64)x2i, w_5)); x2i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x2r, w_5), (FLOAT64)x2i, w_2); x2r = tmp; tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3r, w_3) - ixheaace_dmult((FLOAT64)x3i, w_6)); x3i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x3r, w_6), (FLOAT64)x3i, w_3); x3r = tmp; x0r = (*data); x0i = (*(data + 1)); x0r = x0r + (x2r); x0i = x0i + (x2i); x2r = x0r - (x2r * 2); x2i = x0i - (x2i * 2); x1r = x1r + x3r; x1i = x1i + x3i; x3r = x1r - (x3r * 2); x3i = x1i - (x3i * 2); x0r = x0r + (x1r); x0i = x0i + (x1i); x1r = x0r - (x1r * 2); x1i = x0i - (x1i * 2); x2r = x2r + (x3i); x2i = x2i - (x3r); x3i = x2r - (x3i * 2); x3r = x2i + (x3r * 2); *data = x0r; *(data + 1) = x0i; data += ((SIZE_T)del << 1); *data = x2r; *(data + 1) = x2i; data += ((SIZE_T)del << 1); *data = x1r; *(data + 1) = x1i; data += ((SIZE_T)del << 1); *data = x3i; *(data + 1) = x3r; data += ((SIZE_T)del << 1); } data -= 2 * npoints; data += 2; } for (; j <= (nodespacing * del) >> 1; j += nodespacing) { w_1 = *(twiddles + j); w_4 = *(twiddles + j + 257); w_2 = *(twiddles + ((SIZE_T)j << 1)); w_5 = *(twiddles + ((SIZE_T)j << 1) + 257); w_3 = *(twiddles + j + ((SIZE_T)j << 1) - 256); w_6 = *(twiddles + j + ((SIZE_T)j << 1) + 1); for (k = in_loop_cnt; k != 0; k--) { data += ((SIZE_T)del << 1); x1r = *data; x1i = *(data + 1); data += ((SIZE_T)del << 1); x2r = *data; x2i = *(data + 1); data += ((SIZE_T)del << 1); x3r = *data; x3i = *(data + 1); data -= 3 * (del << 1); tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_1) - ixheaace_dmult((FLOAT64)x1i, w_4)); x1i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1r, w_4), (FLOAT64)x1i, w_1); x1r = tmp; tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2r, w_2) - ixheaace_dmult((FLOAT64)x2i, w_5)); x2i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x2r, w_5), (FLOAT64)x2i, w_2); x2r = tmp; tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3r, w_6) + ixheaace_dmult((FLOAT64)x3i, w_3)); x3i = (FLOAT32)(-ixheaace_dmult((FLOAT64)x3r, w_3) + ixheaace_dmult((FLOAT64)x3i, w_6)); x3r = tmp; x0r = (*data); x0i = (*(data + 1)); x0r = x0r + (x2r); x0i = x0i + (x2i); x2r = x0r - (x2r * 2); x2i = x0i - (x2i * 2); x1r = x1r + x3r; x1i = x1i + x3i; x3r = x1r - (x3r * 2); x3i = x1i - (x3i * 2); x0r = x0r + (x1r); x0i = x0i + (x1i); x1r = x0r - (x1r * 2); x1i = x0i - (x1i * 2); x2r = x2r + (x3i); x2i = x2i - (x3r); x3i = x2r - (x3i * 2); x3r = x2i + (x3r * 2); *data = x0r; *(data + 1) = x0i; data += ((SIZE_T)del << 1); *data = x2r; *(data + 1) = x2i; data += ((SIZE_T)del << 1); *data = x1r; *(data + 1) = x1i; data += ((SIZE_T)del << 1); *data = x3i; *(data + 1) = x3r; data += ((SIZE_T)del << 1); } data -= 2 * npoints; data += 2; } for (; j <= sec_loop_cnt * 2; j += nodespacing) { w_1 = *(twiddles + j); w_4 = *(twiddles + j + 257); w_2 = *(twiddles + ((SIZE_T)j << 1) - 256); w_5 = *(twiddles + ((SIZE_T)j << 1) + 1); w_3 = *(twiddles + j + ((SIZE_T)j << 1) - 256); w_6 = *(twiddles + j + ((SIZE_T)j << 1) + 1); for (k = in_loop_cnt; k != 0; k--) { data += ((SIZE_T)del << 1); x1r = *data; x1i = *(data + 1); data += ((SIZE_T)del << 1); x2r = *data; x2i = *(data + 1); data += ((SIZE_T)del << 1); x3r = *data; x3i = *(data + 1); data -= 3 * (del << 1); tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_1) - ixheaace_dmult((FLOAT64)x1i, w_4)); x1i = (FLOAT32)ixheaace_dmac(ixheaace_dmult(x1r, w_4), x1i, w_1); x1r = tmp; tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2r, w_5) + ixheaace_dmult((FLOAT64)x2i, w_2)); x2i = (FLOAT32)(-ixheaace_dmult(x2r, w_2) + ixheaace_dmult(x2i, w_5)); x2r = tmp; tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3r, w_6) + ixheaace_dmult((FLOAT64)x3i, w_3)); x3i = (FLOAT32)(-ixheaace_dmult((FLOAT64)x3r, w_3) + ixheaace_dmult((FLOAT64)x3i, w_6)); x3r = tmp; x0r = (*data); x0i = (*(data + 1)); x0r = x0r + (x2r); x0i = x0i + (x2i); x2r = x0r - (x2r * 2); x2i = x0i - (x2i * 2); x1r = x1r + x3r; x1i = x1i + x3i; x3r = x1r - (x3r * 2); x3i = x1i - (x3i * 2); x0r = x0r + (x1r); x0i = x0i + (x1i); x1r = x0r - (x1r * 2); x1i = x0i - (x1i * 2); x2r = x2r + (x3i); x2i = x2i - (x3r); x3i = x2r - (x3i * 2); x3r = x2i + (x3r * 2); *data = x0r; *(data + 1) = x0i; data += ((SIZE_T)del << 1); *data = x2r; *(data + 1) = x2i; data += ((SIZE_T)del << 1); *data = x1r; *(data + 1) = x1i; data += ((SIZE_T)del << 1); *data = x3i; *(data + 1) = x3r; data += ((SIZE_T)del << 1); } data -= 2 * npoints; data += 2; } for (; j < nodespacing * del; j += nodespacing) { w_1 = *(twiddles + j); w_4 = *(twiddles + j + 257); w_2 = *(twiddles + ((SIZE_T)j << 1) - 256); w_5 = *(twiddles + ((SIZE_T)j << 1) + 1); w_3 = *(twiddles + j + ((SIZE_T)j << 1) - 512); w_6 = *(twiddles + j + ((SIZE_T)j << 1) - 512 + 257); for (k = in_loop_cnt; k != 0; k--) { data += ((SIZE_T)del << 1); x1r = *data; x1i = *(data + 1); data += ((SIZE_T)del << 1); x2r = *data; x2i = *(data + 1); data += ((SIZE_T)del << 1); x3r = *data; x3i = *(data + 1); data -= 3 * ((SIZE_T)del << 1); tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_1) - ixheaace_dmult((FLOAT64)x1i, w_4)); x1i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1r, w_4), (FLOAT64)x1i, w_1); x1r = tmp; tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2r, w_5) + ixheaace_dmult((FLOAT64)x2i, w_2)); x2i = (FLOAT32)(-ixheaace_dmult((FLOAT64)x2r, w_2) + ixheaace_dmult((FLOAT64)x2i, w_5)); x2r = tmp; tmp = (FLOAT32)(-ixheaace_dmult((FLOAT64)x3r, w_3) + ixheaace_dmult((FLOAT64)x3i, w_6)); x3i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x3r, w_6), (FLOAT64)x3i, w_3); x3r = tmp; x0r = (*data); x0i = (*(data + 1)); x0r = x0r + (x2r); x0i = x0i + (x2i); x2r = x0r - (x2r * 2); x2i = x0i - (x2i * 2); x1r = x1r + x3r; x1i = x1i - x3i; x3r = x1r - (x3r * 2); x3i = x1i + (x3i * 2); x0r = x0r + (x1r); x0i = x0i + (x1i); x1r = x0r - (x1r * 2); x1i = x0i - (x1i * 2); x2r = x2r + (x3i); x2i = x2i - (x3r); x3i = x2r - (x3i * 2); x3r = x2i + (x3r * 2); *data = x0r; *(data + 1) = x0i; data += ((SIZE_T)del << 1); *data = x2r; *(data + 1) = x2i; data += ((SIZE_T)del << 1); *data = x1r; *(data + 1) = x1i; data += ((SIZE_T)del << 1); *data = x3i; *(data + 1) = x3r; data += ((SIZE_T)del << 1); } data -= 2 * npoints; data += 2; } nodespacing >>= 2; del <<= 2; in_loop_cnt >>= 2; } if (not_power_4) { const FLOAT64 *twiddles = ptr_w; nodespacing <<= 1; for (j = del / 2; j != 0; j--) { FLOAT64 w_1 = *twiddles; FLOAT64 w_4 = *(twiddles + 257); twiddles += nodespacing; x0r = *ptr_y; x0i = *(ptr_y + 1); ptr_y += ((SIZE_T)del << 1); x1r = *ptr_y; x1i = *(ptr_y + 1); tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_1) - ixheaace_dmult((FLOAT64)x1i, w_4)); x1i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1r, w_4), (FLOAT64)x1i, w_1); x1r = tmp; *ptr_y = (x0r) - (x1r); *(ptr_y + 1) = (x0i) - (x1i); ptr_y -= ((SIZE_T)del << 1); *ptr_y = (x0r) + (x1r); *(ptr_y + 1) = (x0i) + (x1i); ptr_y += 2; } twiddles = ptr_w; for (j = del / 2; j != 0; j--) { FLOAT64 w_1 = *twiddles; FLOAT64 w_4 = *(twiddles + 257); twiddles += nodespacing; x0r = *ptr_y; x0i = *(ptr_y + 1); ptr_y += ((SIZE_T)del << 1); x1r = *ptr_y; x1i = *(ptr_y + 1); tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_4) + ixheaace_dmult((FLOAT64)x1i, w_1)); x1i = (FLOAT32)(-ixheaace_dmult((FLOAT64)x1r, w_1) + ixheaace_dmult((FLOAT64)x1i, w_4)); x1r = tmp; *ptr_y = (x0r) - (x1r); *(ptr_y + 1) = (x0i) - (x1i); ptr_y -= ((SIZE_T)del << 1); *ptr_y = (x0r) + (x1r); *(ptr_y + 1) = (x0i) + (x1i); ptr_y += 2; } } for (i = 0; i < nlength; i++) { *(ptr_x + 2 * i) = y[2 * i]; *(ptr_x + 2 * i + 1) = y[2 * i + 1]; } } static VOID iusace_complex_fft_p3(FLOAT32 *data, WORD32 nlength, iusace_scratch_mem *pstr_scratch) { WORD32 i, j; FLOAT32 *data_3 = pstr_scratch->p_fft_p3_data_3; FLOAT32 *y = pstr_scratch->p_fft_p3_y; WORD32 cnfac; WORD32 mpass = nlength; FLOAT32 *ptr_x = data; FLOAT32 *ptr_y = y; cnfac = 0; while (mpass % 3 == 0) { mpass /= 3; cnfac++; } for (i = 0; i < 3 * cnfac; i++) { for (j = 0; j < mpass; j++) { data_3[2 * j] = data[3 * (2 * j) + (2 * i)]; data_3[2 * j + 1] = data[3 * (2 * j) + 1 + (2 * i)]; } iusace_complex_fft_p2(data_3, mpass, pstr_scratch->p_fft_p2_y); for (j = 0; j < mpass; j++) { data[3 * (2 * j) + (2 * i)] = data_3[2 * j]; data[3 * (2 * j) + 1 + (2 * i)] = data_3[2 * j + 1]; } } { const FLOAT64 *w1r, *w1i; FLOAT32 tmp; w1r = iusace_twiddle_table_3pr; w1i = iusace_twiddle_table_3pi; for (i = 0; i < nlength; i += 3) { tmp = (FLOAT32)((FLOAT64)data[2 * i] * (*w1r) - (FLOAT64)data[2 * i + 1] * (*w1i)); data[2 * i + 1] = (FLOAT32)((FLOAT64)data[2 * i] * (*w1i) + (FLOAT64)data[2 * i + 1] * (*w1r)); data[2 * i] = tmp; w1r++; w1i++; tmp = (FLOAT32)((FLOAT64)data[2 * (i + 1)] * (*w1r) - (FLOAT64)data[2 * (i + 1) + 1] * (*w1i)); data[2 * (i + 1) + 1] = (FLOAT32)((FLOAT64)data[2 * (i + 1)] * (*w1i) + (FLOAT64)data[2 * (i + 1) + 1] * (*w1r)); data[2 * (i + 1)] = tmp; w1r++; w1i++; tmp = (FLOAT32)((FLOAT64)data[2 * (i + 2)] * (*w1r) - (FLOAT64)data[2 * (i + 2) + 1] * (*w1i)); data[2 * (i + 2) + 1] = (FLOAT32)((FLOAT64)data[2 * (i + 2)] * (*w1i) + (FLOAT64)data[2 * (i + 2) + 1] * (*w1r)); data[2 * (i + 2)] = tmp; w1r += 3 * (128 / mpass - 1) + 1; w1i += 3 * (128 / mpass - 1) + 1; } } for (i = 0; i < mpass; i++) { iusace_complex_3point_fft(ptr_x, ptr_y); ptr_x = ptr_x + 6; ptr_y = ptr_y + 6; } for (i = 0; i < mpass; i++) { data[2 * i] = y[6 * i]; data[2 * i + 1] = y[6 * i + 1]; } for (i = 0; i < mpass; i++) { data[2 * (i + mpass)] = y[6 * i + 2]; data[2 * (i + mpass) + 1] = y[6 * i + 3]; } for (i = 0; i < mpass; i++) { data[2 * (i + 2 * mpass)] = y[6 * i + 4]; data[2 * (i + 2 * mpass) + 1] = y[6 * i + 5]; } } VOID iusace_complex_fft_p3_no_scratch(FLOAT32 *data, WORD32 nlength) { WORD32 i, j; FLOAT32 data_3[800]; FLOAT32 y[1024]; FLOAT32 p_fft_p2_y[2048]; WORD32 cnfac; WORD32 mpass = nlength; FLOAT32 *ptr_x = data; FLOAT32 *ptr_y = y; cnfac = 0; while (mpass % 3 == 0) { mpass /= 3; cnfac++; } for (i = 0; i < 3 * cnfac; i++) { for (j = 0; j < mpass; j++) { data_3[2 * j] = data[3 * (2 * j) + (2 * i)]; data_3[2 * j + 1] = data[3 * (2 * j) + 1 + (2 * i)]; } iusace_complex_fft_p2(data_3, mpass, p_fft_p2_y); for (j = 0; j < mpass; j++) { data[3 * (2 * j) + (2 * i)] = data_3[2 * j]; data[3 * (2 * j) + 1 + (2 * i)] = data_3[2 * j + 1]; } } { const FLOAT64 *w1r, *w1i; FLOAT32 tmp; w1r = iusace_twiddle_table_3pr; w1i = iusace_twiddle_table_3pi; for (i = 0; i < nlength; i += 3) { tmp = (FLOAT32)((FLOAT64)data[2 * i] * (*w1r) - (FLOAT64)data[2 * i + 1] * (*w1i)); data[2 * i + 1] = (FLOAT32)((FLOAT64)data[2 * i] * (*w1i) + (FLOAT64)data[2 * i + 1] * (*w1r)); data[2 * i] = tmp; w1r++; w1i++; tmp = (FLOAT32)((FLOAT64)data[2 * (i + 1)] * (*w1r) - (FLOAT64)data[2 * (i + 1) + 1] * (*w1i)); data[2 * (i + 1) + 1] = (FLOAT32)((FLOAT64)data[2 * (i + 1)] * (*w1i) + (FLOAT64)data[2 * (i + 1) + 1] * (*w1r)); data[2 * (i + 1)] = tmp; w1r++; w1i++; tmp = (FLOAT32)((FLOAT64)data[2 * (i + 2)] * (*w1r) - (FLOAT64)data[2 * (i + 2) + 1] * (*w1i)); data[2 * (i + 2) + 1] = (FLOAT32)((FLOAT64)data[2 * (i + 2)] * (*w1i) + (FLOAT64)data[2 * (i + 2) + 1] * (*w1r)); data[2 * (i + 2)] = tmp; w1r += 3 * (128 / mpass - 1) + 1; w1i += 3 * (128 / mpass - 1) + 1; } } for (i = 0; i < mpass; i++) { iusace_complex_3point_fft(ptr_x, ptr_y); ptr_x = ptr_x + 6; ptr_y = ptr_y + 6; } for (i = 0; i < mpass; i++) { data[2 * i] = y[6 * i]; data[2 * i + 1] = y[6 * i + 1]; } for (i = 0; i < mpass; i++) { data[2 * (i + mpass)] = y[6 * i + 2]; data[2 * (i + mpass) + 1] = y[6 * i + 3]; } for (i = 0; i < mpass; i++) { data[2 * (i + 2 * mpass)] = y[6 * i + 4]; data[2 * (i + 2 * mpass) + 1] = y[6 * i + 5]; } } static VOID iusace_calc_pre_twid_enc(FLOAT64 *ptr_in, FLOAT32 *fft_ptr, WORD32 npoints, const FLOAT64 *cos_ptr, const FLOAT64 *sin_ptr, const WORD32 tx_flag) { WORD32 i, n; WORD32 b = npoints >> 1; WORD32 a = npoints - b; WORD32 nlength = npoints >> 2; FLOAT64 tempr, tempi; if (tx_flag == 0) { FLOAT64 norm; for (i = 0; i < b; i++) { norm = ptr_in[i]; /* reuse MDCT: spectrally reverse all bins */ ptr_in[i] = ptr_in[npoints - 1 - i]; ptr_in[npoints - 1 - i] = norm; } } for (i = 0; i < nlength; i++) { n = npoints / 2 - 1 - 2 * i; if (i < b / 4) { tempr = ptr_in[a / 2 + n] + ptr_in[npoints + a / 2 - 1 - n]; } else { tempr = ptr_in[a / 2 + n] - ptr_in[a / 2 - 1 - n]; } n = 2 * i; if (i < a / 4) { tempi = ptr_in[a / 2 + n] - ptr_in[a / 2 - 1 - n]; } else { tempi = ptr_in[a / 2 + n] + ptr_in[npoints + a / 2 - 1 - n]; } fft_ptr[2 * i] = (FLOAT32)(tempr * (*cos_ptr) + tempi * (*sin_ptr)); fft_ptr[2 * i + 1] = (FLOAT32)(tempi * (*cos_ptr++) - tempr * (*sin_ptr++)); } } VOID iusace_complex_fft(FLOAT32 *data, WORD32 nlength, iusace_scratch_mem *pstr_scratch) { if (nlength & (nlength - 1)) { iusace_complex_fft_p3(data, nlength, pstr_scratch); } else { iusace_complex_fft_p2(data, nlength, pstr_scratch->p_fft_p2_y); } } static VOID iusace_calc_post_twid_enc(FLOAT64 *ptr_out, FLOAT32 *fft_ptr, WORD32 npoints, const FLOAT64 *cos_ptr, const FLOAT64 *sin_ptr, const WORD32 tx_flag) { WORD32 i; WORD32 nlength = npoints >> 2; FLOAT64 tempr, tempi; /* post-twiddle FFT output and then get output data */ for (i = 0; i < nlength; i++) { tempr = 2 * ((FLOAT64)(fft_ptr[2 * i]) * (*cos_ptr) + (FLOAT64)(fft_ptr[2 * i + 1]) * (*sin_ptr)); tempi = 2 * ((FLOAT64)(fft_ptr[2 * i + 1]) * (*cos_ptr++) - (FLOAT64)(fft_ptr[2 * i]) * (*sin_ptr++)); ptr_out[2 * i] = -tempr; ptr_out[npoints / 2 - 1 - 2 * i] = tempi; ptr_out[npoints / 2 + 2 * i] = -tempi; ptr_out[npoints - 1 - 2 * i] = tempr; } if (tx_flag == 0) { for (i = 0; i < npoints; i += 2) { ptr_out[i] *= -1; /* reuse MDCT: flip signs at odd indices */ } } } IA_ERRORCODE iusace_fft_based_mdct(FLOAT64 *ptr_in, FLOAT64 *ptr_out, WORD32 npoints, const WORD32 tx_flag, iusace_scratch_mem *pstr_scratch) { FLOAT32 *ptr_scratch1 = pstr_scratch->p_fft_mdct_buf; const FLOAT64 *cos_ptr = NULL; const FLOAT64 *sin_ptr = NULL; WORD32 nlength = npoints >> 1; WORD32 n_total = npoints << 1; memset(ptr_scratch1, 0, ((SIZE_T)n_total << 1) * sizeof(*ptr_scratch1)); switch (npoints) { case (96): cos_ptr = iexheaac_pre_post_twid_cos_192; sin_ptr = iexheaac_pre_post_twid_sin_192; break; case (128): cos_ptr = iusace_pre_post_twid_cos_256; sin_ptr = iusace_pre_post_twid_sin_256; break; case (768): cos_ptr = iexheaac_pre_post_twid_cos_1536; sin_ptr = iexheaac_pre_post_twid_sin_1536; break; case (1024): cos_ptr = iusace_pre_post_twid_cos_2048; sin_ptr = iusace_pre_post_twid_sin_2048; break; default: return IA_EXHEAACE_EXE_FATAL_USAC_INVALID_WINDOW_LENGTH; } /* pre-twiddle */ iusace_calc_pre_twid_enc(ptr_in, ptr_scratch1, npoints << 1, cos_ptr, sin_ptr, tx_flag); /* complex FFT */ iusace_complex_fft(ptr_scratch1, nlength, pstr_scratch); /* post-twiddle */ iusace_calc_post_twid_enc(ptr_out, ptr_scratch1, npoints << 1, cos_ptr, sin_ptr, tx_flag); return IA_NO_ERROR; } VOID iusace_complex_fft_2048(FLOAT32 *ptr_x, FLOAT32 *scratch_fft) { WORD32 i; FLOAT32 re, im, c_v, s_v, tmp_re, tmp_im; FLOAT32 *ptr_re, *ptr_im, *ptr_re_h, *ptr_im_h; FLOAT32 *ptr_cos_val, *ptr_sin_val; iusace_complex_fft_p2(ptr_x, 1024, scratch_fft); iusace_complex_fft_p2(ptr_x + 2048, 1024, scratch_fft); ptr_re = ptr_x; ptr_im = ptr_x + 1; ptr_re_h = ptr_x + 2048; ptr_im_h = ptr_x + 2048 + 1; ptr_cos_val = (FLOAT32 *)&iusace_twiddle_cos_2048[0]; ptr_sin_val = (FLOAT32 *)&iusace_twiddle_sin_2048[0]; for (i = 0; i < 1024; i++) { re = *ptr_re_h; im = *ptr_im_h; c_v = ptr_cos_val[i]; s_v = ptr_sin_val[i]; tmp_re = (re * c_v) + (im * s_v); tmp_im = -(re * s_v) + (im * c_v); re = *ptr_re; im = *ptr_im; *ptr_re = re + tmp_re; *ptr_im = im + tmp_im; *ptr_re_h = re - tmp_re; *ptr_im_h = im - tmp_im; ptr_re += 2; ptr_im += 2; ptr_re_h += 2; ptr_im_h += 2; } } static VOID ixheaace_rad2_cplx_fft(FLOAT32 *ptr_real, FLOAT32 *ptr_imag, WORD32 n_points, FLOAT32 *ptr_scratch) { WORD32 i, j, k, n_stages, h2; FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; WORD32 del, nodespacing, in_loop_cnt; WORD32 not_power_4; WORD32 dig_rev_shift; WORD32 m_points = n_points; FLOAT32 *ptr_x = ptr_scratch; FLOAT32 *y = ptr_scratch + 2048; FLOAT32 *ptr_y = y; const FLOAT32 *ptr_w; dig_rev_shift = ixheaac_norm32(m_points) + 1 - 16; n_stages = 30 - ixheaac_norm32(m_points); not_power_4 = n_stages & 1; n_stages = n_stages >> 1; ptr_w = ia_fft_twiddle_table_float; for (i = 0; i < n_points; i++) { ptr_x[2 * i] = ptr_real[i]; ptr_x[2 * i + 1] = ptr_imag[i]; } dig_rev_shift = max(dig_rev_shift, 0); for (i = 0; i < n_points; i += 4) { FLOAT32 *inp = ptr_x; FLOAT32 tmk; DIG_REV(i, dig_rev_shift, h2); if (not_power_4) { h2 += 1; h2 &= ~1; } inp += (h2); x0r = *inp; x0i = *(inp + 1); inp += (n_points >> 1); x1r = *inp; x1i = *(inp + 1); inp += (n_points >> 1); x2r = *inp; x2i = *(inp + 1); inp += (n_points >> 1); x3r = *inp; x3i = *(inp + 1); x0r = ia_add_flt(x0r, x2r); x0i = ia_add_flt(x0i, x2i); tmk = ia_sub_flt(x0r, x2r); x2r = ia_sub_flt(tmk, x2r); tmk = ia_sub_flt(x0i, x2i); x2i = ia_sub_flt(tmk, x2i); x1r = ia_add_flt(x1r, x3r); x1i = ia_add_flt(x1i, x3i); tmk = ia_sub_flt(x1r, x3r); x3r = ia_sub_flt(tmk, x3r); tmk = ia_sub_flt(x1i, x3i); x3i = ia_sub_flt(tmk, x3i); x0r = ia_add_flt(x0r, x1r); x0i = ia_add_flt(x0i, x1i); tmk = ia_sub_flt(x0r, x1r); x1r = ia_sub_flt(tmk, x1r); tmk = ia_sub_flt(x0i, x1i); x1i = ia_sub_flt(tmk, x1i); x2r = ia_add_flt(x2r, x3i); x2i = ia_sub_flt(x2i, x3r); tmk = ia_sub_flt(x2r, x3i); x3i = ia_sub_flt(tmk, x3i); tmk = ia_add_flt(x2i, x3r); x3r = ia_add_flt(tmk, x3r); *ptr_y++ = x0r; *ptr_y++ = x0i; *ptr_y++ = x2r; *ptr_y++ = x2i; *ptr_y++ = x1r; *ptr_y++ = x1i; *ptr_y++ = x3i; *ptr_y++ = x3r; } ptr_y -= 2 * n_points; del = 4; nodespacing = 64; in_loop_cnt = n_points >> 4; for (i = n_stages - 1; i > 0; i--) { const FLOAT32 *twiddles = ptr_w; FLOAT32 *data = ptr_y; FLOAT32 w_1, w_2, w_3, w_4, w_5, w_6; WORD32 sec_loop_cnt; for (k = in_loop_cnt; k != 0; k--) { x0r = (*data); x0i = (*(data + 1)); data += ((SIZE_T)del << 1); x1r = (*data); x1i = (*(data + 1)); data += ((SIZE_T)del << 1); x2r = (*data); x2i = (*(data + 1)); data += ((SIZE_T)del << 1); x3r = (*data); x3i = (*(data + 1)); data -= 3 * (del << 1); x0r = ia_add_flt(x0r, x2r); x0i = ia_add_flt(x0i, x2i); x2r = ia_msu_flt(x0r, x2r, 2); x2i = ia_msu_flt(x0i, x2i, 2); x1r = ia_add_flt(x1r, x3r); x1i = ia_add_flt(x1i, x3i); x3r = ia_msu_flt(x1r, x3r, 2); x3i = ia_msu_flt(x1i, x3i, 2); x0r = ia_add_flt(x0r, x1r); x0i = ia_add_flt(x0i, x1i); x1r = ia_msu_flt(x0r, x1r, 2); x1i = ia_msu_flt(x0i, x1i, 2); x2r = ia_add_flt(x2r, x3i); x2i = ia_sub_flt(x2i, x3r); x3i = ia_msu_flt(x2r, x3i, 2); x3r = ia_mac_flt(x2i, x3r, 2); *data = x0r; *(data + 1) = x0i; data += ((SIZE_T)del << 1); *data = x2r; *(data + 1) = x2i; data += ((SIZE_T)del << 1); *data = x1r; *(data + 1) = x1i; data += ((SIZE_T)del << 1); *data = x3i; *(data + 1) = x3r; data += ((SIZE_T)del << 1); } data = ptr_y + 2; sec_loop_cnt = (nodespacing * del); sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) + (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) - (sec_loop_cnt / 256); for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) { w_1 = *(twiddles + j); w_4 = *(twiddles + j + 257); w_2 = *(twiddles + ((SIZE_T)j << 1)); w_5 = *(twiddles + ((SIZE_T)j << 1) + 257); w_3 = *(twiddles + j + ((SIZE_T)j << 1)); w_6 = *(twiddles + j + ((SIZE_T)j << 1) + 257); for (k = in_loop_cnt; k != 0; k--) { FLOAT32 tmp; /*x0 is loaded later to avoid register crunch*/ data += ((SIZE_T)del << 1); x1r = *data; x1i = *(data + 1); data += ((SIZE_T)del << 1); x2r = *data; x2i = *(data + 1); data += ((SIZE_T)del << 1); x3r = *data; x3i = *(data + 1); data -= 3 * (del << 1); tmp = ia_sub_flt(ia_mul_flt(x1r, w_1), ia_mul_flt(x1i, w_4)); x1i = ia_mac_flt(ia_mul_flt(x1r, w_4), x1i, w_1); x1r = tmp; tmp = ia_sub_flt(ia_mul_flt(x2r, w_2), ia_mul_flt(x2i, w_5)); x2i = ia_mac_flt(ia_mul_flt(x2r, w_5), x2i, w_2); x2r = tmp; tmp = ia_sub_flt(ia_mul_flt(x3r, w_3), ia_mul_flt(x3i, w_6)); x3i = ia_mac_flt(ia_mul_flt(x3r, w_6), x3i, w_3); x3r = tmp; x0r = (*data); x0i = (*(data + 1)); x0r = ia_add_flt(x0r, (x2r)); x0i = ia_add_flt(x0i, (x2i)); x2r = ia_msu_flt(x0r, x2r, 2); x2i = ia_msu_flt(x0i, x2i, 2); x1r = ia_add_flt(x1r, x3r); x1i = ia_add_flt(x1i, x3i); x3r = ia_msu_flt(x1r, x3r, 2); x3i = ia_msu_flt(x1i, x3i, 2); x0r = ia_add_flt(x0r, (x1r)); x0i = ia_add_flt(x0i, (x1i)); x1r = ia_msu_flt(x0r, x1r, 2); x1i = ia_msu_flt(x0i, x1i, 2); x2r = ia_add_flt(x2r, (x3i)); x2i = ia_sub_flt(x2i, (x3r)); x3i = ia_msu_flt(x2r, x3i, 2); x3r = ia_mac_flt(x2i, x3r, 2); *data = x0r; *(data + 1) = x0i; data += ((SIZE_T)del << 1); *data = x2r; *(data + 1) = x2i; data += ((SIZE_T)del << 1); *data = x1r; *(data + 1) = x1i; data += ((SIZE_T)del << 1); *data = x3i; *(data + 1) = x3r; data += ((SIZE_T)del << 1); } data -= 2 * n_points; data += 2; } for (; j <= (nodespacing * del) >> 1; j += nodespacing) { w_1 = *(twiddles + j); w_4 = *(twiddles + j + 257); w_2 = *(twiddles + ((SIZE_T)j << 1)); w_5 = *(twiddles + ((SIZE_T)j << 1) + 257); w_3 = *(twiddles + j + ((SIZE_T)j << 1) - 256); w_6 = *(twiddles + j + ((SIZE_T)j << 1) + 1); for (k = in_loop_cnt; k != 0; k--) { FLOAT32 tmp; /*x0 is loaded later to avoid register crunch*/ data += ((SIZE_T)del << 1); x1r = *data; x1i = *(data + 1); data += ((SIZE_T)del << 1); x2r = *data; x2i = *(data + 1); data += ((SIZE_T)del << 1); x3r = *data; x3i = *(data + 1); data -= 3 * (del << 1); tmp = ia_sub_flt(ia_mul_flt(x1r, w_1), ia_mul_flt(x1i, w_4)); x1i = ia_mac_flt(ia_mul_flt(x1r, w_4), x1i, w_1); x1r = tmp; tmp = ia_sub_flt(ia_mul_flt(x2r, w_2), ia_mul_flt(x2i, w_5)); x2i = ia_mac_flt(ia_mul_flt(x2r, w_5), x2i, w_2); x2r = tmp; tmp = ia_add_flt(ia_mul_flt(x3r, w_6), ia_mul_flt(x3i, w_3)); x3i = ia_add_flt(ia_negate_flt(ia_mul_flt(x3r, w_3)), ia_mul_flt(x3i, w_6)); x3r = tmp; x0r = (*data); x0i = (*(data + 1)); x0r = ia_add_flt(x0r, (x2r)); x0i = ia_add_flt(x0i, (x2i)); x2r = ia_msu_flt(x0r, x2r, 2); x2i = ia_msu_flt(x0i, x2i, 2); x1r = ia_add_flt(x1r, x3r); x1i = ia_add_flt(x1i, x3i); x3r = ia_msu_flt(x1r, x3r, 2); x3i = ia_msu_flt(x1i, x3i, 2); x0r = ia_add_flt(x0r, (x1r)); x0i = ia_add_flt(x0i, (x1i)); x1r = ia_msu_flt(x0r, x1r, 2); x1i = ia_msu_flt(x0i, x1i, 2); x2r = ia_add_flt(x2r, (x3i)); x2i = ia_sub_flt(x2i, (x3r)); x3i = ia_msu_flt(x2r, x3i, 2); x3r = ia_mac_flt(x2i, x3r, 2); *data = x0r; *(data + 1) = x0i; data += ((SIZE_T)del << 1); *data = x2r; *(data + 1) = x2i; data += ((SIZE_T)del << 1); *data = x1r; *(data + 1) = x1i; data += ((SIZE_T)del << 1); *data = x3i; *(data + 1) = x3r; data += ((SIZE_T)del << 1); } data -= 2 * n_points; data += 2; } for (; j <= sec_loop_cnt * 2; j += nodespacing) { w_1 = *(twiddles + j); w_4 = *(twiddles + j + 257); w_2 = *(twiddles + ((SIZE_T)j << 1) - 256); w_5 = *(twiddles + ((SIZE_T)j << 1) + 1); w_3 = *(twiddles + j + ((SIZE_T)j << 1) - 256); w_6 = *(twiddles + j + ((SIZE_T)j << 1) + 1); for (k = in_loop_cnt; k != 0; k--) { FLOAT32 tmp; /*x0 is loaded later to avoid register crunch*/ data += ((SIZE_T)del << 1); x1r = *data; x1i = *(data + 1); data += ((SIZE_T)del << 1); x2r = *data; x2i = *(data + 1); data += ((SIZE_T)del << 1); x3r = *data; x3i = *(data + 1); data -= 3 * (del << 1); tmp = ia_sub_flt(ia_mul_flt(x1r, w_1), ia_mul_flt(x1i, w_4)); x1i = ia_mac_flt(ia_mul_flt(x1r, w_4), x1i, w_1); x1r = tmp; tmp = ia_add_flt(ia_mul_flt(x2r, w_5), ia_mul_flt(x2i, w_2)); x2i = ia_add_flt(ia_negate_flt(ia_mul_flt(x2r, w_2)), ia_mul_flt(x2i, w_5)); x2r = tmp; tmp = ia_add_flt(ia_mul_flt(x3r, w_6), ia_mul_flt(x3i, w_3)); x3i = ia_add_flt(ia_negate_flt(ia_mul_flt(x3r, w_3)), ia_mul_flt(x3i, w_6)); x3r = tmp; x0r = (*data); x0i = (*(data + 1)); x0r = ia_add_flt(x0r, (x2r)); x0i = ia_add_flt(x0i, (x2i)); x2r = ia_msu_flt(x0r, x2r, 2); x2i = ia_msu_flt(x0i, x2i, 2); x1r = ia_add_flt(x1r, x3r); x1i = ia_add_flt(x1i, x3i); x3r = ia_msu_flt(x1r, x3r, 2); x3i = ia_msu_flt(x1i, x3i, 2); x0r = ia_add_flt(x0r, (x1r)); x0i = ia_add_flt(x0i, (x1i)); x1r = ia_msu_flt(x0r, x1r, 2); x1i = ia_msu_flt(x0i, x1i, 2); x2r = ia_add_flt(x2r, (x3i)); x2i = ia_sub_flt(x2i, (x3r)); x3i = ia_msu_flt(x2r, x3i, 2); x3r = ia_mac_flt(x2i, x3r, 2); *data = x0r; *(data + 1) = x0i; data += ((SIZE_T)del << 1); *data = x2r; *(data + 1) = x2i; data += ((SIZE_T)del << 1); *data = x1r; *(data + 1) = x1i; data += ((SIZE_T)del << 1); *data = x3i; *(data + 1) = x3r; data += ((SIZE_T)del << 1); } data -= 2 * n_points; data += 2; } for (; j < nodespacing * del; j += nodespacing) { w_1 = *(twiddles + j); w_4 = *(twiddles + j + 257); w_2 = *(twiddles + ((SIZE_T)j << 1) - 256); w_5 = *(twiddles + ((SIZE_T)j << 1) + 1); w_3 = *(twiddles + j + ((SIZE_T)j << 1) - 512); w_6 = *(twiddles + j + ((SIZE_T)j << 1) - 512 + 257); for (k = in_loop_cnt; k != 0; k--) { FLOAT32 tmp; /*x0 is loaded later to avoid register crunch*/ data += ((SIZE_T)del << 1); x1r = *data; x1i = *(data + 1); data += ((SIZE_T)del << 1); x2r = *data; x2i = *(data + 1); data += ((SIZE_T)del << 1); x3r = *data; x3i = *(data + 1); data -= 3 * (del << 1); tmp = ia_sub_flt(ia_mul_flt(x1r, w_1), ia_mul_flt(x1i, w_4)); x1i = ia_mac_flt(ia_mul_flt(x1r, w_4), x1i, w_1); x1r = tmp; tmp = ia_add_flt(ia_mul_flt(x2r, w_5), ia_mul_flt(x2i, w_2)); x2i = ia_add_flt(ia_negate_flt(ia_mul_flt(x2r, w_2)), ia_mul_flt(x2i, w_5)); x2r = tmp; tmp = ia_add_flt(ia_negate_flt(ia_mul_flt(x3r, w_3)), ia_mul_flt(x3i, w_6)); x3i = ia_mac_flt(ia_mul_flt(x3r, w_6), x3i, w_3); x3r = tmp; x0r = (*data); x0i = (*(data + 1)); x0r = ia_add_flt(x0r, (x2r)); x0i = ia_add_flt(x0i, (x2i)); x2r = ia_msu_flt(x0r, x2r, 2); x2i = ia_msu_flt(x0i, x2i, 2); x1r = ia_add_flt(x1r, x3r); x1i = ia_sub_flt(x1i, x3i); x3r = ia_msu_flt(x1r, x3r, 2); x3i = ia_mac_flt(x1i, x3i, 2); x0r = ia_add_flt(x0r, (x1r)); x0i = ia_add_flt(x0i, (x1i)); x1r = ia_msu_flt(x0r, x1r, 2); x1i = ia_msu_flt(x0i, x1i, 2); x2r = ia_add_flt(x2r, (x3i)); x2i = ia_sub_flt(x2i, (x3r)); x3i = ia_msu_flt(x2r, x3i, 2); x3r = ia_mac_flt(x2i, x3r, 2); *data = x0r; *(data + 1) = x0i; data += ((SIZE_T)del << 1); *data = x2r; *(data + 1) = x2i; data += ((SIZE_T)del << 1); *data = x1r; *(data + 1) = x1i; data += ((SIZE_T)del << 1); *data = x3i; *(data + 1) = x3r; data += ((SIZE_T)del << 1); } data -= 2 * n_points; data += 2; } nodespacing >>= 2; del <<= 2; in_loop_cnt >>= 2; } if (not_power_4) { const FLOAT32 *twiddles = ptr_w; nodespacing <<= 1; for (j = del / 2; j != 0; j--) { FLOAT32 w_1 = *twiddles; FLOAT32 w_4 = *(twiddles + 257); FLOAT32 tmp; twiddles += nodespacing; x0r = *ptr_y; x0i = *(ptr_y + 1); ptr_y += ((SIZE_T)del << 1); x1r = *ptr_y; x1i = *(ptr_y + 1); tmp = ia_sub_flt(ia_mul_flt(x1r, w_1), ia_mul_flt(x1i, w_4)); x1i = (FLOAT32)ia_mac_flt(ia_mul_flt(x1r, w_4), x1i, w_1); x1r = tmp; *ptr_y = ia_sub_flt((x0r), (x1r)); *(ptr_y + 1) = ia_sub_flt((x0i), (x1i)); ptr_y -= ((SIZE_T)del << 1); *ptr_y = ia_add_flt((x0r), (x1r)); *(ptr_y + 1) = ia_add_flt((x0i), (x1i)); ptr_y += 2; } twiddles = ptr_w; for (j = del / 2; j != 0; j--) { FLOAT32 w_1 = *twiddles; FLOAT32 w_4 = *(twiddles + 257); FLOAT32 tmp; twiddles += nodespacing; x0r = *ptr_y; x0i = *(ptr_y + 1); ptr_y += ((SIZE_T)del << 1); x1r = *ptr_y; x1i = *(ptr_y + 1); tmp = ia_add_flt(ia_mul_flt(x1r, w_4), ia_mul_flt(x1i, w_1)); x1i = ia_add_flt(ia_negate_flt(ia_mul_flt(x1r, w_1)), ia_mul_flt(x1i, w_4)); x1r = tmp; *ptr_y = ia_sub_flt((x0r), (x1r)); *(ptr_y + 1) = ia_sub_flt((x0i), (x1i)); ptr_y -= ((SIZE_T)del << 1); *ptr_y = ia_add_flt((x0r), (x1r)); *(ptr_y + 1) = ia_add_flt((x0i), (x1i)); ptr_y += 2; } } for (i = 0; i < n_points; i++) { ptr_real[i] = y[2 * i]; ptr_imag[i] = y[2 * i + 1]; } } static VOID ixheaace_cplx_fft_4(FLOAT32 *x_r, FLOAT32 *x_i) { FLOAT32 x_0, x_1, x_2, x_3; FLOAT32 x_4, x_5, x_6, x_7; FLOAT32 x0r, x1r, x2r, x3r; FLOAT32 x0i, x1i, x2i, x3i; // 4 Point FFT x_0 = x_r[0]; x_1 = x_i[0]; x_2 = x_r[1]; x_3 = x_i[1]; x_4 = x_r[2]; x_5 = x_i[2]; x_6 = x_r[3]; x_7 = x_i[3]; x0r = ia_add_flt(x_0, x_4); x0i = ia_add_flt(x_1, x_5); x2r = ia_sub_flt(x_0, x_4); x2i = ia_sub_flt(x_1, x_5); x1r = ia_add_flt(x_2, x_6); x1i = ia_add_flt(x_3, x_7); x3r = ia_sub_flt(x_2, x_6); x3i = ia_sub_flt(x_3, x_7); x_r[0] = ia_add_flt(x0r, x1r); x_i[0] = ia_add_flt(x0i, x1i); x_r[2] = ia_sub_flt(x0r, x1r); x_i[2] = ia_sub_flt(x0i, x1i); x_r[1] = ia_add_flt(x2r, x3i); x_i[1] = ia_sub_flt(x2i, x3r); x_r[3] = ia_sub_flt(x2r, x3i); x_i[3] = ia_add_flt(x2i, x3r); return; } VOID iusace_complex_fft_4096(FLOAT32 *ptr_x_r, FLOAT32 *ptr_x_i, FLOAT32 *ptr_scratch_buf) { FLOAT32 *ptr_data_r; FLOAT32 *ptr_data_i; WORD32 fft_len = 4096; FLOAT32 *ptr_fft_interim_buf = &ptr_scratch_buf[2 * fft_len]; WORD32 i, j; WORD32 dim2 = fft_len >> 10; WORD32 dim1 = fft_len / dim2; WORD32 fac = 4; for (i = 0; i < dim2; i++) { ptr_data_r = &ptr_scratch_buf[(2 * i + 0) * dim1]; ptr_data_i = &ptr_scratch_buf[(2 * i + 1) * dim1]; for (j = 0; j < dim1; j++) { ptr_data_r[j] = ptr_x_r[(dim2 * j + i)]; ptr_data_i[j] = 0; } ixheaace_rad2_cplx_fft(ptr_data_r, ptr_data_i, dim1, ptr_fft_interim_buf); } ptr_data_r = &ptr_scratch_buf[0]; ptr_data_i = &ptr_scratch_buf[0]; for (i = 0; i < dim1; i++) { FLOAT32 *ptr_cos_val = (FLOAT32 *)&ia_mixed_rad_twiddle_cos[i * dim2 * fac]; FLOAT32 *ptr_sin_val = (FLOAT32 *)&ia_mixed_rad_twiddle_sin[i * dim2 * fac]; for (j = 0; j < dim2; j++) { FLOAT32 real = ptr_data_r[(2 * j + 0) * dim1 + i]; FLOAT32 imag = ptr_data_i[(2 * j + 1) * dim1 + i]; FLOAT32 cos_val = ptr_cos_val[j * fac]; FLOAT32 sin_val = ptr_sin_val[j * fac]; FLOAT32 temp_real = (FLOAT32)(real * cos_val + imag * sin_val); FLOAT32 temp_imag = (FLOAT32)(imag * cos_val - real * sin_val); ptr_fft_interim_buf[(2 * i + 0) * dim2 + j] = temp_real; ptr_fft_interim_buf[(2 * i + 1) * dim2 + j] = temp_imag; } } for (i = 0; i < dim1; i++) { ptr_data_r = &ptr_fft_interim_buf[(2 * i + 0) * dim2]; ptr_data_i = &ptr_fft_interim_buf[(2 * i + 1) * dim2]; ixheaace_cplx_fft_4(ptr_data_r, ptr_data_i); } ptr_data_r = &ptr_fft_interim_buf[0]; ptr_data_i = &ptr_fft_interim_buf[0]; for (i = 0; i < dim1; i++) { for (j = 0; j < dim2; j++) { ptr_x_r[(j * dim1 + i)] = ptr_data_r[(2 * i + 0) * dim2 + j]; ptr_x_i[(j * dim1 + i)] = ptr_data_i[(2 * i + 1) * dim2 + j]; } } }