19a19cd78SMatthias Ringwald /****************************************************************************** 29a19cd78SMatthias Ringwald * 3*4930cef6SMatthias Ringwald * Copyright 2022 Google LLC 49a19cd78SMatthias Ringwald * 59a19cd78SMatthias Ringwald * Licensed under the Apache License, Version 2.0 (the "License"); 69a19cd78SMatthias Ringwald * you may not use this file except in compliance with the License. 79a19cd78SMatthias Ringwald * You may obtain a copy of the License at: 89a19cd78SMatthias Ringwald * 99a19cd78SMatthias Ringwald * http://www.apache.org/licenses/LICENSE-2.0 109a19cd78SMatthias Ringwald * 119a19cd78SMatthias Ringwald * Unless required by applicable law or agreed to in writing, software 129a19cd78SMatthias Ringwald * distributed under the License is distributed on an "AS IS" BASIS, 139a19cd78SMatthias Ringwald * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 149a19cd78SMatthias Ringwald * See the License for the specific language governing permissions and 159a19cd78SMatthias Ringwald * limitations under the License. 169a19cd78SMatthias Ringwald * 179a19cd78SMatthias Ringwald ******************************************************************************/ 189a19cd78SMatthias Ringwald 199a19cd78SMatthias Ringwald #include "tables.h" 209a19cd78SMatthias Ringwald 21*4930cef6SMatthias Ringwald #include "mdct_neon.h" 22*4930cef6SMatthias Ringwald 239a19cd78SMatthias Ringwald 249a19cd78SMatthias Ringwald /* ---------------------------------------------------------------------------- 259a19cd78SMatthias Ringwald * FFT processing 269a19cd78SMatthias Ringwald * -------------------------------------------------------------------------- */ 279a19cd78SMatthias Ringwald 289a19cd78SMatthias Ringwald /** 29*4930cef6SMatthias Ringwald * FFT 5 Points 309a19cd78SMatthias Ringwald * x, y Input and output coefficients, of size 5xn 31*4930cef6SMatthias Ringwald * n Number of interleaved transform to perform (n % 2 = 0) 329a19cd78SMatthias Ringwald */ 33*4930cef6SMatthias Ringwald #ifndef fft_5 34*4930cef6SMatthias Ringwald LC3_HOT static inline void fft_5( 359a19cd78SMatthias Ringwald const struct lc3_complex *x, struct lc3_complex *y, int n) 369a19cd78SMatthias Ringwald { 379a19cd78SMatthias Ringwald static const float cos1 = 0.3090169944; /* cos(-2Pi 1/5) */ 389a19cd78SMatthias Ringwald static const float cos2 = -0.8090169944; /* cos(-2Pi 2/5) */ 399a19cd78SMatthias Ringwald 409a19cd78SMatthias Ringwald static const float sin1 = -0.9510565163; /* sin(-2Pi 1/5) */ 419a19cd78SMatthias Ringwald static const float sin2 = -0.5877852523; /* sin(-2Pi 2/5) */ 429a19cd78SMatthias Ringwald 439a19cd78SMatthias Ringwald for (int i = 0; i < n; i++, x++, y+= 5) { 449a19cd78SMatthias Ringwald 459a19cd78SMatthias Ringwald struct lc3_complex s14 = 469a19cd78SMatthias Ringwald { x[1*n].re + x[4*n].re, x[1*n].im + x[4*n].im }; 479a19cd78SMatthias Ringwald struct lc3_complex d14 = 489a19cd78SMatthias Ringwald { x[1*n].re - x[4*n].re, x[1*n].im - x[4*n].im }; 499a19cd78SMatthias Ringwald 509a19cd78SMatthias Ringwald struct lc3_complex s23 = 519a19cd78SMatthias Ringwald { x[2*n].re + x[3*n].re, x[2*n].im + x[3*n].im }; 529a19cd78SMatthias Ringwald struct lc3_complex d23 = 539a19cd78SMatthias Ringwald { x[2*n].re - x[3*n].re, x[2*n].im - x[3*n].im }; 549a19cd78SMatthias Ringwald 559a19cd78SMatthias Ringwald y[0].re = x[0].re + s14.re + s23.re; 56*4930cef6SMatthias Ringwald 579a19cd78SMatthias Ringwald y[0].im = x[0].im + s14.im + s23.im; 589a19cd78SMatthias Ringwald 59*4930cef6SMatthias Ringwald y[1].re = x[0].re + s14.re * cos1 - d14.im * sin1 60*4930cef6SMatthias Ringwald + s23.re * cos2 - d23.im * sin2; 619a19cd78SMatthias Ringwald 62*4930cef6SMatthias Ringwald y[1].im = x[0].im + s14.im * cos1 + d14.re * sin1 63*4930cef6SMatthias Ringwald + s23.im * cos2 + d23.re * sin2; 649a19cd78SMatthias Ringwald 65*4930cef6SMatthias Ringwald y[2].re = x[0].re + s14.re * cos2 - d14.im * sin2 66*4930cef6SMatthias Ringwald + s23.re * cos1 + d23.im * sin1; 679a19cd78SMatthias Ringwald 68*4930cef6SMatthias Ringwald y[2].im = x[0].im + s14.im * cos2 + d14.re * sin2 69*4930cef6SMatthias Ringwald + s23.im * cos1 - d23.re * sin1; 709a19cd78SMatthias Ringwald 71*4930cef6SMatthias Ringwald y[3].re = x[0].re + s14.re * cos2 + d14.im * sin2 72*4930cef6SMatthias Ringwald + s23.re * cos1 - d23.im * sin1; 739a19cd78SMatthias Ringwald 74*4930cef6SMatthias Ringwald y[3].im = x[0].im + s14.im * cos2 - d14.re * sin2 75*4930cef6SMatthias Ringwald + s23.im * cos1 + d23.re * sin1; 769a19cd78SMatthias Ringwald 77*4930cef6SMatthias Ringwald y[4].re = x[0].re + s14.re * cos1 + d14.im * sin1 78*4930cef6SMatthias Ringwald + s23.re * cos2 + d23.im * sin2; 799a19cd78SMatthias Ringwald 80*4930cef6SMatthias Ringwald y[4].im = x[0].im + s14.im * cos1 - d14.re * sin1 81*4930cef6SMatthias Ringwald + s23.im * cos2 - d23.re * sin2; 829a19cd78SMatthias Ringwald } 839a19cd78SMatthias Ringwald } 84*4930cef6SMatthias Ringwald #endif /* fft_5 */ 859a19cd78SMatthias Ringwald 869a19cd78SMatthias Ringwald /** 87*4930cef6SMatthias Ringwald * FFT Butterfly 3 Points 889a19cd78SMatthias Ringwald * x, y Input and output coefficients 899a19cd78SMatthias Ringwald * twiddles Twiddles factors, determine size of transform 909a19cd78SMatthias Ringwald * n Number of interleaved transforms 919a19cd78SMatthias Ringwald */ 92*4930cef6SMatthias Ringwald #ifndef fft_bf3 93*4930cef6SMatthias Ringwald LC3_HOT static inline void fft_bf3( 94*4930cef6SMatthias Ringwald const struct lc3_fft_bf3_twiddles *twiddles, 959a19cd78SMatthias Ringwald const struct lc3_complex *x, struct lc3_complex *y, int n) 969a19cd78SMatthias Ringwald { 979a19cd78SMatthias Ringwald int n3 = twiddles->n3; 989a19cd78SMatthias Ringwald const struct lc3_complex (*w0)[2] = twiddles->t; 999a19cd78SMatthias Ringwald const struct lc3_complex (*w1)[2] = w0 + n3, (*w2)[2] = w1 + n3; 1009a19cd78SMatthias Ringwald 1019a19cd78SMatthias Ringwald const struct lc3_complex *x0 = x, *x1 = x0 + n*n3, *x2 = x1 + n*n3; 1029a19cd78SMatthias Ringwald struct lc3_complex *y0 = y, *y1 = y0 + n3, *y2 = y1 + n3; 1039a19cd78SMatthias Ringwald 104*4930cef6SMatthias Ringwald for (int i = 0; i < n; i++, y0 += 3*n3, y1 += 3*n3, y2 += 3*n3) 1059a19cd78SMatthias Ringwald for (int j = 0; j < n3; j++, x0++, x1++, x2++) { 1069a19cd78SMatthias Ringwald 107*4930cef6SMatthias Ringwald y0[j].re = x0->re + x1->re * w0[j][0].re - x1->im * w0[j][0].im 108*4930cef6SMatthias Ringwald + x2->re * w0[j][1].re - x2->im * w0[j][1].im; 1099a19cd78SMatthias Ringwald 110*4930cef6SMatthias Ringwald y0[j].im = x0->im + x1->im * w0[j][0].re + x1->re * w0[j][0].im 111*4930cef6SMatthias Ringwald + x2->im * w0[j][1].re + x2->re * w0[j][1].im; 1129a19cd78SMatthias Ringwald 113*4930cef6SMatthias Ringwald y1[j].re = x0->re + x1->re * w1[j][0].re - x1->im * w1[j][0].im 114*4930cef6SMatthias Ringwald + x2->re * w1[j][1].re - x2->im * w1[j][1].im; 1159a19cd78SMatthias Ringwald 116*4930cef6SMatthias Ringwald y1[j].im = x0->im + x1->im * w1[j][0].re + x1->re * w1[j][0].im 117*4930cef6SMatthias Ringwald + x2->im * w1[j][1].re + x2->re * w1[j][1].im; 1189a19cd78SMatthias Ringwald 119*4930cef6SMatthias Ringwald y2[j].re = x0->re + x1->re * w2[j][0].re - x1->im * w2[j][0].im 120*4930cef6SMatthias Ringwald + x2->re * w2[j][1].re - x2->im * w2[j][1].im; 1219a19cd78SMatthias Ringwald 122*4930cef6SMatthias Ringwald y2[j].im = x0->im + x1->im * w2[j][0].re + x1->re * w2[j][0].im 123*4930cef6SMatthias Ringwald + x2->im * w2[j][1].re + x2->re * w2[j][1].im; 1249a19cd78SMatthias Ringwald } 1259a19cd78SMatthias Ringwald } 126*4930cef6SMatthias Ringwald #endif /* fft_bf3 */ 1279a19cd78SMatthias Ringwald 1289a19cd78SMatthias Ringwald /** 129*4930cef6SMatthias Ringwald * FFT Butterfly 2 Points 1309a19cd78SMatthias Ringwald * twiddles Twiddles factors, determine size of transform 1319a19cd78SMatthias Ringwald * x, y Input and output coefficients 1329a19cd78SMatthias Ringwald * n Number of interleaved transforms 1339a19cd78SMatthias Ringwald */ 134*4930cef6SMatthias Ringwald #ifndef fft_bf2 135*4930cef6SMatthias Ringwald LC3_HOT static inline void fft_bf2( 136*4930cef6SMatthias Ringwald const struct lc3_fft_bf2_twiddles *twiddles, 1379a19cd78SMatthias Ringwald const struct lc3_complex *x, struct lc3_complex *y, int n) 1389a19cd78SMatthias Ringwald { 1399a19cd78SMatthias Ringwald int n2 = twiddles->n2; 1409a19cd78SMatthias Ringwald const struct lc3_complex *w = twiddles->t; 1419a19cd78SMatthias Ringwald 1429a19cd78SMatthias Ringwald const struct lc3_complex *x0 = x, *x1 = x0 + n*n2; 1439a19cd78SMatthias Ringwald struct lc3_complex *y0 = y, *y1 = y0 + n2; 1449a19cd78SMatthias Ringwald 1459a19cd78SMatthias Ringwald for (int i = 0; i < n; i++, y0 += 2*n2, y1 += 2*n2) { 1469a19cd78SMatthias Ringwald 1479a19cd78SMatthias Ringwald for (int j = 0; j < n2; j++, x0++, x1++) { 1489a19cd78SMatthias Ringwald 149*4930cef6SMatthias Ringwald y0[j].re = x0->re + x1->re * w[j].re - x1->im * w[j].im; 150*4930cef6SMatthias Ringwald y0[j].im = x0->im + x1->im * w[j].re + x1->re * w[j].im; 1519a19cd78SMatthias Ringwald 152*4930cef6SMatthias Ringwald y1[j].re = x0->re - x1->re * w[j].re + x1->im * w[j].im; 153*4930cef6SMatthias Ringwald y1[j].im = x0->im - x1->im * w[j].re - x1->re * w[j].im; 1549a19cd78SMatthias Ringwald } 1559a19cd78SMatthias Ringwald } 1569a19cd78SMatthias Ringwald } 157*4930cef6SMatthias Ringwald #endif /* fft_bf2 */ 1589a19cd78SMatthias Ringwald 1599a19cd78SMatthias Ringwald /** 1609a19cd78SMatthias Ringwald * Perform FFT 1619a19cd78SMatthias Ringwald * x, y0, y1 Input, and 2 scratch buffers of size `n` 1629a19cd78SMatthias Ringwald * n Number of points 30, 40, 60, 80, 90, 120, 160, 180, 240 1639a19cd78SMatthias Ringwald * return The buffer `y0` or `y1` that hold the result 1649a19cd78SMatthias Ringwald * 1659a19cd78SMatthias Ringwald * Input `x` can be the same as the `y0` second scratch buffer 1669a19cd78SMatthias Ringwald */ 167*4930cef6SMatthias Ringwald static struct lc3_complex *fft(const struct lc3_complex *x, int n, 1689a19cd78SMatthias Ringwald struct lc3_complex *y0, struct lc3_complex *y1) 1699a19cd78SMatthias Ringwald { 1709a19cd78SMatthias Ringwald struct lc3_complex *y[2] = { y1, y0 }; 1719a19cd78SMatthias Ringwald int i2, i3, is = 0; 1729a19cd78SMatthias Ringwald 1739a19cd78SMatthias Ringwald /* The number of points `n` can be decomposed as : 1749a19cd78SMatthias Ringwald * 1759a19cd78SMatthias Ringwald * n = 5^1 * 3^n3 * 2^n2 1769a19cd78SMatthias Ringwald * 1779a19cd78SMatthias Ringwald * for n = 40, 80, 160 n3 = 0, n2 = [3..5] 1789a19cd78SMatthias Ringwald * n = 30, 60, 120, 240 n3 = 1, n2 = [1..4] 1799a19cd78SMatthias Ringwald * n = 90, 180 n3 = 2, n2 = [1..2] 1809a19cd78SMatthias Ringwald * 1819a19cd78SMatthias Ringwald * Note that the expression `n & (n-1) == 0` is equivalent 1829a19cd78SMatthias Ringwald * to the check that `n` is a power of 2. */ 1839a19cd78SMatthias Ringwald 184*4930cef6SMatthias Ringwald fft_5(x, y[is], n /= 5); 1859a19cd78SMatthias Ringwald 1869a19cd78SMatthias Ringwald for (i3 = 0; n & (n-1); i3++, is ^= 1) 187*4930cef6SMatthias Ringwald fft_bf3(lc3_fft_twiddles_bf3[i3], y[is], y[is ^ 1], n /= 3); 1889a19cd78SMatthias Ringwald 1899a19cd78SMatthias Ringwald for (i2 = 0; n > 1; i2++, is ^= 1) 190*4930cef6SMatthias Ringwald fft_bf2(lc3_fft_twiddles_bf2[i2][i3], y[is], y[is ^ 1], n >>= 1); 1919a19cd78SMatthias Ringwald 1929a19cd78SMatthias Ringwald return y[is]; 1939a19cd78SMatthias Ringwald } 1949a19cd78SMatthias Ringwald 1959a19cd78SMatthias Ringwald 1969a19cd78SMatthias Ringwald /* ---------------------------------------------------------------------------- 1979a19cd78SMatthias Ringwald * MDCT processing 1989a19cd78SMatthias Ringwald * -------------------------------------------------------------------------- */ 1999a19cd78SMatthias Ringwald 2009a19cd78SMatthias Ringwald /** 2019a19cd78SMatthias Ringwald * Windowing of samples before MDCT 2029a19cd78SMatthias Ringwald * dt, sr Duration and samplerate (size of the transform) 203*4930cef6SMatthias Ringwald * x, y Input current and delayed samples 204*4930cef6SMatthias Ringwald * y, d Output windowed samples, and delayed ones 2059a19cd78SMatthias Ringwald */ 206*4930cef6SMatthias Ringwald LC3_HOT static void mdct_window(enum lc3_dt dt, enum lc3_srate sr, 207*4930cef6SMatthias Ringwald const float *x, float *d, float *y) 2089a19cd78SMatthias Ringwald { 2099a19cd78SMatthias Ringwald int ns = LC3_NS(dt, sr), nd = LC3_ND(dt, sr); 2109a19cd78SMatthias Ringwald 2119a19cd78SMatthias Ringwald const float *w0 = lc3_mdct_win[dt][sr], *w1 = w0 + ns; 2129a19cd78SMatthias Ringwald const float *w2 = w1, *w3 = w2 + nd; 2139a19cd78SMatthias Ringwald 214*4930cef6SMatthias Ringwald const float *x0 = x + ns-nd, *x1 = x0; 2159a19cd78SMatthias Ringwald float *y0 = y + ns/2, *y1 = y0; 216*4930cef6SMatthias Ringwald float *d0 = d, *d1 = d + nd; 2179a19cd78SMatthias Ringwald 218*4930cef6SMatthias Ringwald while (x1 > x) { 219*4930cef6SMatthias Ringwald *(--y0) = *d0 * *(w0++) - *(--x1) * *(--w1); 220*4930cef6SMatthias Ringwald *(y1++) = (*(d0++) = *(x0++)) * *(w2++); 2219a19cd78SMatthias Ringwald 222*4930cef6SMatthias Ringwald *(--y0) = *d0 * *(w0++) - *(--x1) * *(--w1); 223*4930cef6SMatthias Ringwald *(y1++) = (*(d0++) = *(x0++)) * *(w2++); 224*4930cef6SMatthias Ringwald } 2259a19cd78SMatthias Ringwald 226*4930cef6SMatthias Ringwald for (x1 += ns; x0 < x1; ) { 227*4930cef6SMatthias Ringwald *(--y0) = *d0 * *(w0++) - *(--d1) * *(--w1); 228*4930cef6SMatthias Ringwald *(y1++) = (*(d0++) = *(x0++)) * *(w2++) + (*d1 = *(--x1)) * *(--w3); 229*4930cef6SMatthias Ringwald 230*4930cef6SMatthias Ringwald *(--y0) = *d0 * *(w0++) - *(--d1) * *(--w1); 231*4930cef6SMatthias Ringwald *(y1++) = (*(d0++) = *(x0++)) * *(w2++) + (*d1 = *(--x1)) * *(--w3); 232*4930cef6SMatthias Ringwald } 2339a19cd78SMatthias Ringwald } 2349a19cd78SMatthias Ringwald 2359a19cd78SMatthias Ringwald /** 2369a19cd78SMatthias Ringwald * Pre-rotate MDCT coefficients of N/2 points, before FFT N/4 points FFT 2379a19cd78SMatthias Ringwald * def Size and twiddles factors 2389a19cd78SMatthias Ringwald * x, y Input and output coefficients 2399a19cd78SMatthias Ringwald * 2409a19cd78SMatthias Ringwald * `x` and y` can be the same buffer 2419a19cd78SMatthias Ringwald */ 242*4930cef6SMatthias Ringwald LC3_HOT static void mdct_pre_fft(const struct lc3_mdct_rot_def *def, 2439a19cd78SMatthias Ringwald const float *x, struct lc3_complex *y) 2449a19cd78SMatthias Ringwald { 2459a19cd78SMatthias Ringwald int n4 = def->n4; 2469a19cd78SMatthias Ringwald 2479a19cd78SMatthias Ringwald const float *x0 = x, *x1 = x0 + 2*n4; 2489a19cd78SMatthias Ringwald const struct lc3_complex *w0 = def->w, *w1 = w0 + n4; 2499a19cd78SMatthias Ringwald struct lc3_complex *y0 = y, *y1 = y0 + n4; 2509a19cd78SMatthias Ringwald 2519a19cd78SMatthias Ringwald while (x0 < x1) { 2529a19cd78SMatthias Ringwald struct lc3_complex u, uw = *(w0++); 2539a19cd78SMatthias Ringwald u.re = - *(--x1) * uw.re + *x0 * uw.im; 2549a19cd78SMatthias Ringwald u.im = *(x0++) * uw.re + *x1 * uw.im; 2559a19cd78SMatthias Ringwald 2569a19cd78SMatthias Ringwald struct lc3_complex v, vw = *(--w1); 2579a19cd78SMatthias Ringwald v.re = - *(--x1) * vw.im + *x0 * vw.re; 2589a19cd78SMatthias Ringwald v.im = - *(x0++) * vw.im - *x1 * vw.re; 2599a19cd78SMatthias Ringwald 2609a19cd78SMatthias Ringwald *(y0++) = u; 2619a19cd78SMatthias Ringwald *(--y1) = v; 2629a19cd78SMatthias Ringwald } 2639a19cd78SMatthias Ringwald } 2649a19cd78SMatthias Ringwald 2659a19cd78SMatthias Ringwald /** 2669a19cd78SMatthias Ringwald * Post-rotate FFT N/4 points coefficients, resulting MDCT N points 2679a19cd78SMatthias Ringwald * def Size and twiddles factors 2689a19cd78SMatthias Ringwald * x, y Input and output coefficients 2699a19cd78SMatthias Ringwald * scale Scale on output coefficients 2709a19cd78SMatthias Ringwald * 2719a19cd78SMatthias Ringwald * `x` and y` can be the same buffer 2729a19cd78SMatthias Ringwald */ 273*4930cef6SMatthias Ringwald LC3_HOT static void mdct_post_fft(const struct lc3_mdct_rot_def *def, 2749a19cd78SMatthias Ringwald const struct lc3_complex *x, float *y, float scale) 2759a19cd78SMatthias Ringwald { 2769a19cd78SMatthias Ringwald int n4 = def->n4, n8 = n4 >> 1; 2779a19cd78SMatthias Ringwald 2789a19cd78SMatthias Ringwald const struct lc3_complex *w0 = def->w + n8, *w1 = w0 - 1; 2799a19cd78SMatthias Ringwald const struct lc3_complex *x0 = x + n8, *x1 = x0 - 1; 2809a19cd78SMatthias Ringwald 2819a19cd78SMatthias Ringwald float *y0 = y + n4, *y1 = y0; 2829a19cd78SMatthias Ringwald 2839a19cd78SMatthias Ringwald for ( ; y1 > y; x0++, x1--, w0++, w1--) { 2849a19cd78SMatthias Ringwald 2859a19cd78SMatthias Ringwald float u0 = (x0->im * w0->im + x0->re * w0->re) * scale; 2869a19cd78SMatthias Ringwald float u1 = (x1->re * w1->im - x1->im * w1->re) * scale; 2879a19cd78SMatthias Ringwald 2889a19cd78SMatthias Ringwald float v0 = (x0->re * w0->im - x0->im * w0->re) * scale; 2899a19cd78SMatthias Ringwald float v1 = (x1->im * w1->im + x1->re * w1->re) * scale; 2909a19cd78SMatthias Ringwald 2919a19cd78SMatthias Ringwald *(y0++) = u0; *(y0++) = u1; 2929a19cd78SMatthias Ringwald *(--y1) = v0; *(--y1) = v1; 2939a19cd78SMatthias Ringwald } 2949a19cd78SMatthias Ringwald } 2959a19cd78SMatthias Ringwald 2969a19cd78SMatthias Ringwald /** 2979a19cd78SMatthias Ringwald * Pre-rotate IMDCT coefficients of N points, before FFT N/4 points FFT 2989a19cd78SMatthias Ringwald * def Size and twiddles factors 2999a19cd78SMatthias Ringwald * x, y Input and output coefficients 3009a19cd78SMatthias Ringwald * 301*4930cef6SMatthias Ringwald * `x` and `y` can be the same buffer 302*4930cef6SMatthias Ringwald * The real and imaginary parts of `y` are swapped, 303*4930cef6SMatthias Ringwald * to operate on FFT instead of IFFT 3049a19cd78SMatthias Ringwald */ 305*4930cef6SMatthias Ringwald LC3_HOT static void imdct_pre_fft(const struct lc3_mdct_rot_def *def, 3069a19cd78SMatthias Ringwald const float *x, struct lc3_complex *y) 3079a19cd78SMatthias Ringwald { 3089a19cd78SMatthias Ringwald int n4 = def->n4; 3099a19cd78SMatthias Ringwald 3109a19cd78SMatthias Ringwald const float *x0 = x, *x1 = x0 + 2*n4; 3119a19cd78SMatthias Ringwald 3129a19cd78SMatthias Ringwald const struct lc3_complex *w0 = def->w, *w1 = w0 + n4; 3139a19cd78SMatthias Ringwald struct lc3_complex *y0 = y, *y1 = y0 + n4; 3149a19cd78SMatthias Ringwald 3159a19cd78SMatthias Ringwald while (x0 < x1) { 3169a19cd78SMatthias Ringwald float u0 = *(x0++), u1 = *(--x1); 3179a19cd78SMatthias Ringwald float v0 = *(x0++), v1 = *(--x1); 3189a19cd78SMatthias Ringwald struct lc3_complex uw = *(w0++), vw = *(--w1); 3199a19cd78SMatthias Ringwald 320*4930cef6SMatthias Ringwald (y0 )->re = - u0 * uw.re - u1 * uw.im; 321*4930cef6SMatthias Ringwald (y0++)->im = - u1 * uw.re + u0 * uw.im; 3229a19cd78SMatthias Ringwald 323*4930cef6SMatthias Ringwald (--y1)->re = - v1 * vw.re - v0 * vw.im; 324*4930cef6SMatthias Ringwald ( y1)->im = - v0 * vw.re + v1 * vw.im; 3259a19cd78SMatthias Ringwald } 3269a19cd78SMatthias Ringwald } 3279a19cd78SMatthias Ringwald 3289a19cd78SMatthias Ringwald /** 3299a19cd78SMatthias Ringwald * Post-rotate FFT N/4 points coefficients, resulting IMDCT N points 3309a19cd78SMatthias Ringwald * def Size and twiddles factors 3319a19cd78SMatthias Ringwald * x, y Input and output coefficients 3329a19cd78SMatthias Ringwald * scale Scale on output coefficients 3339a19cd78SMatthias Ringwald * 3349a19cd78SMatthias Ringwald * `x` and y` can be the same buffer 335*4930cef6SMatthias Ringwald * The real and imaginary parts of `x` are swapped, 336*4930cef6SMatthias Ringwald * to operate on FFT instead of IFFT 3379a19cd78SMatthias Ringwald */ 338*4930cef6SMatthias Ringwald LC3_HOT static void imdct_post_fft(const struct lc3_mdct_rot_def *def, 3399a19cd78SMatthias Ringwald const struct lc3_complex *x, float *y, float scale) 3409a19cd78SMatthias Ringwald { 3419a19cd78SMatthias Ringwald int n4 = def->n4; 3429a19cd78SMatthias Ringwald 3439a19cd78SMatthias Ringwald const struct lc3_complex *w0 = def->w, *w1 = w0 + n4; 3449a19cd78SMatthias Ringwald const struct lc3_complex *x0 = x, *x1 = x0 + n4; 3459a19cd78SMatthias Ringwald 3469a19cd78SMatthias Ringwald float *y0 = y, *y1 = y0 + 2*n4; 3479a19cd78SMatthias Ringwald 3489a19cd78SMatthias Ringwald while (x0 < x1) { 3499a19cd78SMatthias Ringwald struct lc3_complex uz = *(x0++), vz = *(--x1); 3509a19cd78SMatthias Ringwald struct lc3_complex uw = *(w0++), vw = *(--w1); 3519a19cd78SMatthias Ringwald 352*4930cef6SMatthias Ringwald *(y0++) = (uz.re * uw.im - uz.im * uw.re) * scale; 353*4930cef6SMatthias Ringwald *(--y1) = (uz.re * uw.re + uz.im * uw.im) * scale; 3549a19cd78SMatthias Ringwald 355*4930cef6SMatthias Ringwald *(--y1) = (vz.re * vw.im - vz.im * vw.re) * scale; 356*4930cef6SMatthias Ringwald *(y0++) = (vz.re * vw.re + vz.im * vw.im) * scale; 3579a19cd78SMatthias Ringwald } 3589a19cd78SMatthias Ringwald } 3599a19cd78SMatthias Ringwald 3609a19cd78SMatthias Ringwald /** 3619a19cd78SMatthias Ringwald * Apply windowing of samples 3629a19cd78SMatthias Ringwald * dt, sr Duration and samplerate 3639a19cd78SMatthias Ringwald * x, d Middle half of IMDCT coefficients and delayed samples 3649a19cd78SMatthias Ringwald * y, d Output samples and delayed ones 3659a19cd78SMatthias Ringwald */ 366*4930cef6SMatthias Ringwald LC3_HOT static void imdct_window(enum lc3_dt dt, enum lc3_srate sr, 3679a19cd78SMatthias Ringwald const float *x, float *d, float *y) 3689a19cd78SMatthias Ringwald { 3699a19cd78SMatthias Ringwald /* The full MDCT coefficients is given by symmetry : 3709a19cd78SMatthias Ringwald * T[ 0 .. n/4-1] = -half[n/4-1 .. 0 ] 3719a19cd78SMatthias Ringwald * T[ n/4 .. n/2-1] = half[0 .. n/4-1] 3729a19cd78SMatthias Ringwald * T[ n/2 .. 3n/4-1] = half[n/4 .. n/2-1] 3739a19cd78SMatthias Ringwald * T[3n/4 .. n-1] = half[n/2-1 .. n/4 ] */ 3749a19cd78SMatthias Ringwald 3759a19cd78SMatthias Ringwald int n4 = LC3_NS(dt, sr) >> 1, nd = LC3_ND(dt, sr); 3769a19cd78SMatthias Ringwald const float *w2 = lc3_mdct_win[dt][sr], *w0 = w2 + 3*n4, *w1 = w0; 3779a19cd78SMatthias Ringwald 3789a19cd78SMatthias Ringwald const float *x0 = d + nd-n4, *x1 = x0; 3799a19cd78SMatthias Ringwald float *y0 = y + nd-n4, *y1 = y0, *y2 = d + nd, *y3 = d; 3809a19cd78SMatthias Ringwald 3819a19cd78SMatthias Ringwald while (y0 > y) { 3829a19cd78SMatthias Ringwald *(--y0) = *(--x0) - *(x ) * *(w1++); 3839a19cd78SMatthias Ringwald *(y1++) = *(x1++) + *(x++) * *(--w0); 384*4930cef6SMatthias Ringwald 385*4930cef6SMatthias Ringwald *(--y0) = *(--x0) - *(x ) * *(w1++); 386*4930cef6SMatthias Ringwald *(y1++) = *(x1++) + *(x++) * *(--w0); 3879a19cd78SMatthias Ringwald } 3889a19cd78SMatthias Ringwald 3899a19cd78SMatthias Ringwald while (y1 < y + nd) { 3909a19cd78SMatthias Ringwald *(y1++) = *(x1++) + *(x++) * *(--w0); 391*4930cef6SMatthias Ringwald *(y1++) = *(x1++) + *(x++) * *(--w0); 3929a19cd78SMatthias Ringwald } 3939a19cd78SMatthias Ringwald 3949a19cd78SMatthias Ringwald while (y1 < y + 2*n4) { 3959a19cd78SMatthias Ringwald *(y1++) = *(x ) * *(--w0); 3969a19cd78SMatthias Ringwald *(--y2) = *(x++) * *(w2++); 397*4930cef6SMatthias Ringwald 398*4930cef6SMatthias Ringwald *(y1++) = *(x ) * *(--w0); 399*4930cef6SMatthias Ringwald *(--y2) = *(x++) * *(w2++); 4009a19cd78SMatthias Ringwald } 4019a19cd78SMatthias Ringwald 4029a19cd78SMatthias Ringwald while (y2 > y3) { 4039a19cd78SMatthias Ringwald *(y3++) = *(x ) * *(--w0); 4049a19cd78SMatthias Ringwald *(--y2) = *(x++) * *(w2++); 405*4930cef6SMatthias Ringwald 406*4930cef6SMatthias Ringwald *(y3++) = *(x ) * *(--w0); 407*4930cef6SMatthias Ringwald *(--y2) = *(x++) * *(w2++); 4089a19cd78SMatthias Ringwald } 4099a19cd78SMatthias Ringwald } 4109a19cd78SMatthias Ringwald 4119a19cd78SMatthias Ringwald /** 4129a19cd78SMatthias Ringwald * Forward MDCT transformation 4139a19cd78SMatthias Ringwald */ 4149a19cd78SMatthias Ringwald void lc3_mdct_forward(enum lc3_dt dt, enum lc3_srate sr, 415*4930cef6SMatthias Ringwald enum lc3_srate sr_dst, const float *x, float *d, float *y) 4169a19cd78SMatthias Ringwald { 4179a19cd78SMatthias Ringwald const struct lc3_mdct_rot_def *rot = lc3_mdct_rot[dt][sr]; 4189a19cd78SMatthias Ringwald int nf = LC3_NS(dt, sr_dst); 4199a19cd78SMatthias Ringwald int ns = LC3_NS(dt, sr); 4209a19cd78SMatthias Ringwald 421*4930cef6SMatthias Ringwald struct lc3_complex buffer[ns/2]; 422*4930cef6SMatthias Ringwald struct lc3_complex *z = (struct lc3_complex *)y; 423*4930cef6SMatthias Ringwald union { float *f; struct lc3_complex *z; } u = { .z = buffer }; 4249a19cd78SMatthias Ringwald 425*4930cef6SMatthias Ringwald mdct_window(dt, sr, x, d, u.f); 4269a19cd78SMatthias Ringwald 4279a19cd78SMatthias Ringwald mdct_pre_fft(rot, u.f, u.z); 428*4930cef6SMatthias Ringwald u.z = fft(u.z, ns/2, u.z, z); 4299a19cd78SMatthias Ringwald mdct_post_fft(rot, u.z, y, sqrtf( (2.f*nf) / (ns*ns) )); 4309a19cd78SMatthias Ringwald } 4319a19cd78SMatthias Ringwald 4329a19cd78SMatthias Ringwald /** 4339a19cd78SMatthias Ringwald * Inverse MDCT transformation 4349a19cd78SMatthias Ringwald */ 4359a19cd78SMatthias Ringwald void lc3_mdct_inverse(enum lc3_dt dt, enum lc3_srate sr, 4369a19cd78SMatthias Ringwald enum lc3_srate sr_src, const float *x, float *d, float *y) 4379a19cd78SMatthias Ringwald { 4389a19cd78SMatthias Ringwald const struct lc3_mdct_rot_def *rot = lc3_mdct_rot[dt][sr]; 4399a19cd78SMatthias Ringwald int nf = LC3_NS(dt, sr_src); 4409a19cd78SMatthias Ringwald int ns = LC3_NS(dt, sr); 4419a19cd78SMatthias Ringwald 4429a19cd78SMatthias Ringwald struct lc3_complex buffer[ns/2]; 4439a19cd78SMatthias Ringwald struct lc3_complex *z = (struct lc3_complex *)y; 4449a19cd78SMatthias Ringwald union { float *f; struct lc3_complex *z; } u = { .z = buffer }; 4459a19cd78SMatthias Ringwald 4469a19cd78SMatthias Ringwald imdct_pre_fft(rot, x, z); 447*4930cef6SMatthias Ringwald z = fft(z, ns/2, z, u.z); 4489a19cd78SMatthias Ringwald imdct_post_fft(rot, z, u.f, sqrtf(2.f / nf)); 4499a19cd78SMatthias Ringwald 4509a19cd78SMatthias Ringwald imdct_window(dt, sr, u.f, d, y); 4519a19cd78SMatthias Ringwald } 452