xref: /btstack/3rd-party/lc3-google/src/mdct.c (revision 4930cef6e21e6da2d7571b9259c7f0fb8bed3d01)
19a19cd78SMatthias Ringwald /******************************************************************************
29a19cd78SMatthias Ringwald  *
3*4930cef6SMatthias Ringwald  *  Copyright 2022 Google LLC
49a19cd78SMatthias Ringwald  *
59a19cd78SMatthias Ringwald  *  Licensed under the Apache License, Version 2.0 (the "License");
69a19cd78SMatthias Ringwald  *  you may not use this file except in compliance with the License.
79a19cd78SMatthias Ringwald  *  You may obtain a copy of the License at:
89a19cd78SMatthias Ringwald  *
99a19cd78SMatthias Ringwald  *  http://www.apache.org/licenses/LICENSE-2.0
109a19cd78SMatthias Ringwald  *
119a19cd78SMatthias Ringwald  *  Unless required by applicable law or agreed to in writing, software
129a19cd78SMatthias Ringwald  *  distributed under the License is distributed on an "AS IS" BASIS,
139a19cd78SMatthias Ringwald  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
149a19cd78SMatthias Ringwald  *  See the License for the specific language governing permissions and
159a19cd78SMatthias Ringwald  *  limitations under the License.
169a19cd78SMatthias Ringwald  *
179a19cd78SMatthias Ringwald  ******************************************************************************/
189a19cd78SMatthias Ringwald 
199a19cd78SMatthias Ringwald #include "tables.h"
209a19cd78SMatthias Ringwald 
21*4930cef6SMatthias Ringwald #include "mdct_neon.h"
22*4930cef6SMatthias Ringwald 
239a19cd78SMatthias Ringwald 
249a19cd78SMatthias Ringwald /* ----------------------------------------------------------------------------
259a19cd78SMatthias Ringwald  *  FFT processing
269a19cd78SMatthias Ringwald  * -------------------------------------------------------------------------- */
279a19cd78SMatthias Ringwald 
289a19cd78SMatthias Ringwald /**
29*4930cef6SMatthias Ringwald  * FFT 5 Points
309a19cd78SMatthias Ringwald  * x, y            Input and output coefficients, of size 5xn
31*4930cef6SMatthias Ringwald  * n               Number of interleaved transform to perform (n % 2 = 0)
329a19cd78SMatthias Ringwald  */
33*4930cef6SMatthias Ringwald #ifndef fft_5
34*4930cef6SMatthias Ringwald LC3_HOT static inline void fft_5(
359a19cd78SMatthias Ringwald     const struct lc3_complex *x, struct lc3_complex *y, int n)
369a19cd78SMatthias Ringwald {
379a19cd78SMatthias Ringwald     static const float cos1 =  0.3090169944;  /* cos(-2Pi 1/5) */
389a19cd78SMatthias Ringwald     static const float cos2 = -0.8090169944;  /* cos(-2Pi 2/5) */
399a19cd78SMatthias Ringwald 
409a19cd78SMatthias Ringwald     static const float sin1 = -0.9510565163;  /* sin(-2Pi 1/5) */
419a19cd78SMatthias Ringwald     static const float sin2 = -0.5877852523;  /* sin(-2Pi 2/5) */
429a19cd78SMatthias Ringwald 
439a19cd78SMatthias Ringwald     for (int i = 0; i < n; i++, x++, y+= 5) {
449a19cd78SMatthias Ringwald 
459a19cd78SMatthias Ringwald         struct lc3_complex s14 =
469a19cd78SMatthias Ringwald             { x[1*n].re + x[4*n].re, x[1*n].im + x[4*n].im };
479a19cd78SMatthias Ringwald         struct lc3_complex d14 =
489a19cd78SMatthias Ringwald             { x[1*n].re - x[4*n].re, x[1*n].im - x[4*n].im };
499a19cd78SMatthias Ringwald 
509a19cd78SMatthias Ringwald         struct lc3_complex s23 =
519a19cd78SMatthias Ringwald             { x[2*n].re + x[3*n].re, x[2*n].im + x[3*n].im };
529a19cd78SMatthias Ringwald         struct lc3_complex d23 =
539a19cd78SMatthias Ringwald             { x[2*n].re - x[3*n].re, x[2*n].im - x[3*n].im };
549a19cd78SMatthias Ringwald 
559a19cd78SMatthias Ringwald         y[0].re = x[0].re + s14.re + s23.re;
56*4930cef6SMatthias Ringwald 
579a19cd78SMatthias Ringwald         y[0].im = x[0].im + s14.im + s23.im;
589a19cd78SMatthias Ringwald 
59*4930cef6SMatthias Ringwald         y[1].re = x[0].re + s14.re * cos1 - d14.im * sin1
60*4930cef6SMatthias Ringwald                           + s23.re * cos2 - d23.im * sin2;
619a19cd78SMatthias Ringwald 
62*4930cef6SMatthias Ringwald         y[1].im = x[0].im + s14.im * cos1 + d14.re * sin1
63*4930cef6SMatthias Ringwald                           + s23.im * cos2 + d23.re * sin2;
649a19cd78SMatthias Ringwald 
65*4930cef6SMatthias Ringwald         y[2].re = x[0].re + s14.re * cos2 - d14.im * sin2
66*4930cef6SMatthias Ringwald                           + s23.re * cos1 + d23.im * sin1;
679a19cd78SMatthias Ringwald 
68*4930cef6SMatthias Ringwald         y[2].im = x[0].im + s14.im * cos2 + d14.re * sin2
69*4930cef6SMatthias Ringwald                           + s23.im * cos1 - d23.re * sin1;
709a19cd78SMatthias Ringwald 
71*4930cef6SMatthias Ringwald         y[3].re = x[0].re + s14.re * cos2 + d14.im * sin2
72*4930cef6SMatthias Ringwald                           + s23.re * cos1 - d23.im * sin1;
739a19cd78SMatthias Ringwald 
74*4930cef6SMatthias Ringwald         y[3].im = x[0].im + s14.im * cos2 - d14.re * sin2
75*4930cef6SMatthias Ringwald                           + s23.im * cos1 + d23.re * sin1;
769a19cd78SMatthias Ringwald 
77*4930cef6SMatthias Ringwald         y[4].re = x[0].re + s14.re * cos1 + d14.im * sin1
78*4930cef6SMatthias Ringwald                           + s23.re * cos2 + d23.im * sin2;
799a19cd78SMatthias Ringwald 
80*4930cef6SMatthias Ringwald         y[4].im = x[0].im + s14.im * cos1 - d14.re * sin1
81*4930cef6SMatthias Ringwald                           + s23.im * cos2 - d23.re * sin2;
829a19cd78SMatthias Ringwald     }
839a19cd78SMatthias Ringwald }
84*4930cef6SMatthias Ringwald #endif /* fft_5 */
859a19cd78SMatthias Ringwald 
869a19cd78SMatthias Ringwald /**
87*4930cef6SMatthias Ringwald  * FFT Butterfly 3 Points
889a19cd78SMatthias Ringwald  * x, y            Input and output coefficients
899a19cd78SMatthias Ringwald  * twiddles        Twiddles factors, determine size of transform
909a19cd78SMatthias Ringwald  * n               Number of interleaved transforms
919a19cd78SMatthias Ringwald  */
92*4930cef6SMatthias Ringwald #ifndef fft_bf3
93*4930cef6SMatthias Ringwald LC3_HOT static inline void fft_bf3(
94*4930cef6SMatthias Ringwald     const struct lc3_fft_bf3_twiddles *twiddles,
959a19cd78SMatthias Ringwald     const struct lc3_complex *x, struct lc3_complex *y, int n)
969a19cd78SMatthias Ringwald {
979a19cd78SMatthias Ringwald     int n3 = twiddles->n3;
989a19cd78SMatthias Ringwald     const struct lc3_complex (*w0)[2] = twiddles->t;
999a19cd78SMatthias Ringwald     const struct lc3_complex (*w1)[2] = w0 + n3, (*w2)[2] = w1 + n3;
1009a19cd78SMatthias Ringwald 
1019a19cd78SMatthias Ringwald     const struct lc3_complex *x0 = x, *x1 = x0 + n*n3, *x2 = x1 + n*n3;
1029a19cd78SMatthias Ringwald     struct lc3_complex *y0 = y, *y1 = y0 + n3, *y2 = y1 + n3;
1039a19cd78SMatthias Ringwald 
104*4930cef6SMatthias Ringwald     for (int i = 0; i < n; i++, y0 += 3*n3, y1 += 3*n3, y2 += 3*n3)
1059a19cd78SMatthias Ringwald         for (int j = 0; j < n3; j++, x0++, x1++, x2++) {
1069a19cd78SMatthias Ringwald 
107*4930cef6SMatthias Ringwald             y0[j].re = x0->re + x1->re * w0[j][0].re - x1->im * w0[j][0].im
108*4930cef6SMatthias Ringwald                               + x2->re * w0[j][1].re - x2->im * w0[j][1].im;
1099a19cd78SMatthias Ringwald 
110*4930cef6SMatthias Ringwald             y0[j].im = x0->im + x1->im * w0[j][0].re + x1->re * w0[j][0].im
111*4930cef6SMatthias Ringwald                               + x2->im * w0[j][1].re + x2->re * w0[j][1].im;
1129a19cd78SMatthias Ringwald 
113*4930cef6SMatthias Ringwald             y1[j].re = x0->re + x1->re * w1[j][0].re - x1->im * w1[j][0].im
114*4930cef6SMatthias Ringwald                               + x2->re * w1[j][1].re - x2->im * w1[j][1].im;
1159a19cd78SMatthias Ringwald 
116*4930cef6SMatthias Ringwald             y1[j].im = x0->im + x1->im * w1[j][0].re + x1->re * w1[j][0].im
117*4930cef6SMatthias Ringwald                               + x2->im * w1[j][1].re + x2->re * w1[j][1].im;
1189a19cd78SMatthias Ringwald 
119*4930cef6SMatthias Ringwald             y2[j].re = x0->re + x1->re * w2[j][0].re - x1->im * w2[j][0].im
120*4930cef6SMatthias Ringwald                               + x2->re * w2[j][1].re - x2->im * w2[j][1].im;
1219a19cd78SMatthias Ringwald 
122*4930cef6SMatthias Ringwald             y2[j].im = x0->im + x1->im * w2[j][0].re + x1->re * w2[j][0].im
123*4930cef6SMatthias Ringwald                               + x2->im * w2[j][1].re + x2->re * w2[j][1].im;
1249a19cd78SMatthias Ringwald         }
1259a19cd78SMatthias Ringwald }
126*4930cef6SMatthias Ringwald #endif /* fft_bf3 */
1279a19cd78SMatthias Ringwald 
1289a19cd78SMatthias Ringwald /**
129*4930cef6SMatthias Ringwald  * FFT Butterfly 2 Points
1309a19cd78SMatthias Ringwald  * twiddles        Twiddles factors, determine size of transform
1319a19cd78SMatthias Ringwald  * x, y            Input and output coefficients
1329a19cd78SMatthias Ringwald  * n               Number of interleaved transforms
1339a19cd78SMatthias Ringwald  */
134*4930cef6SMatthias Ringwald #ifndef fft_bf2
135*4930cef6SMatthias Ringwald LC3_HOT static inline void fft_bf2(
136*4930cef6SMatthias Ringwald     const struct lc3_fft_bf2_twiddles *twiddles,
1379a19cd78SMatthias Ringwald     const struct lc3_complex *x, struct lc3_complex *y, int n)
1389a19cd78SMatthias Ringwald {
1399a19cd78SMatthias Ringwald     int n2 = twiddles->n2;
1409a19cd78SMatthias Ringwald     const struct lc3_complex *w = twiddles->t;
1419a19cd78SMatthias Ringwald 
1429a19cd78SMatthias Ringwald     const struct lc3_complex *x0 = x, *x1 = x0 + n*n2;
1439a19cd78SMatthias Ringwald     struct lc3_complex *y0 = y, *y1 = y0 + n2;
1449a19cd78SMatthias Ringwald 
1459a19cd78SMatthias Ringwald     for (int i = 0; i < n; i++, y0 += 2*n2, y1 += 2*n2) {
1469a19cd78SMatthias Ringwald 
1479a19cd78SMatthias Ringwald         for (int j = 0; j < n2; j++, x0++, x1++) {
1489a19cd78SMatthias Ringwald 
149*4930cef6SMatthias Ringwald             y0[j].re = x0->re + x1->re * w[j].re - x1->im * w[j].im;
150*4930cef6SMatthias Ringwald             y0[j].im = x0->im + x1->im * w[j].re + x1->re * w[j].im;
1519a19cd78SMatthias Ringwald 
152*4930cef6SMatthias Ringwald             y1[j].re = x0->re - x1->re * w[j].re + x1->im * w[j].im;
153*4930cef6SMatthias Ringwald             y1[j].im = x0->im - x1->im * w[j].re - x1->re * w[j].im;
1549a19cd78SMatthias Ringwald         }
1559a19cd78SMatthias Ringwald     }
1569a19cd78SMatthias Ringwald }
157*4930cef6SMatthias Ringwald #endif /* fft_bf2 */
1589a19cd78SMatthias Ringwald 
1599a19cd78SMatthias Ringwald /**
1609a19cd78SMatthias Ringwald  * Perform FFT
1619a19cd78SMatthias Ringwald  * x, y0, y1       Input, and 2 scratch buffers of size `n`
1629a19cd78SMatthias Ringwald  * n               Number of points 30, 40, 60, 80, 90, 120, 160, 180, 240
1639a19cd78SMatthias Ringwald  * return          The buffer `y0` or `y1` that hold the result
1649a19cd78SMatthias Ringwald  *
1659a19cd78SMatthias Ringwald  * Input `x` can be the same as the `y0` second scratch buffer
1669a19cd78SMatthias Ringwald  */
167*4930cef6SMatthias Ringwald static struct lc3_complex *fft(const struct lc3_complex *x, int n,
1689a19cd78SMatthias Ringwald     struct lc3_complex *y0, struct lc3_complex *y1)
1699a19cd78SMatthias Ringwald {
1709a19cd78SMatthias Ringwald     struct lc3_complex *y[2] = { y1, y0 };
1719a19cd78SMatthias Ringwald     int i2, i3, is = 0;
1729a19cd78SMatthias Ringwald 
1739a19cd78SMatthias Ringwald     /* The number of points `n` can be decomposed as :
1749a19cd78SMatthias Ringwald      *
1759a19cd78SMatthias Ringwald      *   n = 5^1 * 3^n3 * 2^n2
1769a19cd78SMatthias Ringwald      *
1779a19cd78SMatthias Ringwald      *   for n = 40, 80, 160        n3 = 0, n2 = [3..5]
1789a19cd78SMatthias Ringwald      *       n = 30, 60, 120, 240   n3 = 1, n2 = [1..4]
1799a19cd78SMatthias Ringwald      *       n = 90, 180            n3 = 2, n2 = [1..2]
1809a19cd78SMatthias Ringwald      *
1819a19cd78SMatthias Ringwald      * Note that the expression `n & (n-1) == 0` is equivalent
1829a19cd78SMatthias Ringwald      * to the check that `n` is a power of 2. */
1839a19cd78SMatthias Ringwald 
184*4930cef6SMatthias Ringwald     fft_5(x, y[is], n /= 5);
1859a19cd78SMatthias Ringwald 
1869a19cd78SMatthias Ringwald     for (i3 = 0; n & (n-1); i3++, is ^= 1)
187*4930cef6SMatthias Ringwald         fft_bf3(lc3_fft_twiddles_bf3[i3], y[is], y[is ^ 1], n /= 3);
1889a19cd78SMatthias Ringwald 
1899a19cd78SMatthias Ringwald     for (i2 = 0; n > 1; i2++, is ^= 1)
190*4930cef6SMatthias Ringwald         fft_bf2(lc3_fft_twiddles_bf2[i2][i3], y[is], y[is ^ 1], n >>= 1);
1919a19cd78SMatthias Ringwald 
1929a19cd78SMatthias Ringwald     return y[is];
1939a19cd78SMatthias Ringwald }
1949a19cd78SMatthias Ringwald 
1959a19cd78SMatthias Ringwald 
1969a19cd78SMatthias Ringwald /* ----------------------------------------------------------------------------
1979a19cd78SMatthias Ringwald  *  MDCT processing
1989a19cd78SMatthias Ringwald  * -------------------------------------------------------------------------- */
1999a19cd78SMatthias Ringwald 
2009a19cd78SMatthias Ringwald /**
2019a19cd78SMatthias Ringwald  * Windowing of samples before MDCT
2029a19cd78SMatthias Ringwald  * dt, sr          Duration and samplerate (size of the transform)
203*4930cef6SMatthias Ringwald  * x, y            Input current and delayed samples
204*4930cef6SMatthias Ringwald  * y, d            Output windowed samples, and delayed ones
2059a19cd78SMatthias Ringwald  */
206*4930cef6SMatthias Ringwald LC3_HOT static void mdct_window(enum lc3_dt dt, enum lc3_srate sr,
207*4930cef6SMatthias Ringwald     const float *x, float *d, float *y)
2089a19cd78SMatthias Ringwald {
2099a19cd78SMatthias Ringwald     int ns = LC3_NS(dt, sr), nd = LC3_ND(dt, sr);
2109a19cd78SMatthias Ringwald 
2119a19cd78SMatthias Ringwald     const float *w0 = lc3_mdct_win[dt][sr], *w1 = w0 + ns;
2129a19cd78SMatthias Ringwald     const float *w2 = w1, *w3 = w2 + nd;
2139a19cd78SMatthias Ringwald 
214*4930cef6SMatthias Ringwald     const float *x0 = x + ns-nd, *x1 = x0;
2159a19cd78SMatthias Ringwald     float *y0 = y + ns/2, *y1 = y0;
216*4930cef6SMatthias Ringwald     float *d0 = d, *d1 = d + nd;
2179a19cd78SMatthias Ringwald 
218*4930cef6SMatthias Ringwald     while (x1 > x) {
219*4930cef6SMatthias Ringwald         *(--y0) = *d0 * *(w0++) - *(--x1) * *(--w1);
220*4930cef6SMatthias Ringwald         *(y1++) = (*(d0++) = *(x0++)) * *(w2++);
2219a19cd78SMatthias Ringwald 
222*4930cef6SMatthias Ringwald         *(--y0) = *d0 * *(w0++) - *(--x1) * *(--w1);
223*4930cef6SMatthias Ringwald         *(y1++) = (*(d0++) = *(x0++)) * *(w2++);
224*4930cef6SMatthias Ringwald     }
2259a19cd78SMatthias Ringwald 
226*4930cef6SMatthias Ringwald     for (x1 += ns; x0 < x1; ) {
227*4930cef6SMatthias Ringwald         *(--y0) = *d0 * *(w0++) - *(--d1) * *(--w1);
228*4930cef6SMatthias Ringwald         *(y1++) = (*(d0++) = *(x0++)) * *(w2++) + (*d1 = *(--x1)) * *(--w3);
229*4930cef6SMatthias Ringwald 
230*4930cef6SMatthias Ringwald         *(--y0) = *d0 * *(w0++) - *(--d1) * *(--w1);
231*4930cef6SMatthias Ringwald         *(y1++) = (*(d0++) = *(x0++)) * *(w2++) + (*d1 = *(--x1)) * *(--w3);
232*4930cef6SMatthias Ringwald     }
2339a19cd78SMatthias Ringwald }
2349a19cd78SMatthias Ringwald 
2359a19cd78SMatthias Ringwald /**
2369a19cd78SMatthias Ringwald  * Pre-rotate MDCT coefficients of N/2 points, before FFT N/4 points FFT
2379a19cd78SMatthias Ringwald  * def             Size and twiddles factors
2389a19cd78SMatthias Ringwald  * x, y            Input and output coefficients
2399a19cd78SMatthias Ringwald  *
2409a19cd78SMatthias Ringwald  * `x` and y` can be the same buffer
2419a19cd78SMatthias Ringwald  */
242*4930cef6SMatthias Ringwald LC3_HOT static void mdct_pre_fft(const struct lc3_mdct_rot_def *def,
2439a19cd78SMatthias Ringwald     const float *x, struct lc3_complex *y)
2449a19cd78SMatthias Ringwald {
2459a19cd78SMatthias Ringwald     int n4 = def->n4;
2469a19cd78SMatthias Ringwald 
2479a19cd78SMatthias Ringwald     const float *x0 = x, *x1 = x0 + 2*n4;
2489a19cd78SMatthias Ringwald     const struct lc3_complex *w0 = def->w, *w1 = w0 + n4;
2499a19cd78SMatthias Ringwald     struct lc3_complex *y0 = y, *y1 = y0 + n4;
2509a19cd78SMatthias Ringwald 
2519a19cd78SMatthias Ringwald     while (x0 < x1) {
2529a19cd78SMatthias Ringwald         struct lc3_complex u, uw = *(w0++);
2539a19cd78SMatthias Ringwald         u.re = - *(--x1) * uw.re + *x0 * uw.im;
2549a19cd78SMatthias Ringwald         u.im =   *(x0++) * uw.re + *x1 * uw.im;
2559a19cd78SMatthias Ringwald 
2569a19cd78SMatthias Ringwald         struct lc3_complex v, vw = *(--w1);
2579a19cd78SMatthias Ringwald         v.re = - *(--x1) * vw.im + *x0 * vw.re;
2589a19cd78SMatthias Ringwald         v.im = - *(x0++) * vw.im - *x1 * vw.re;
2599a19cd78SMatthias Ringwald 
2609a19cd78SMatthias Ringwald         *(y0++) = u;
2619a19cd78SMatthias Ringwald         *(--y1) = v;
2629a19cd78SMatthias Ringwald     }
2639a19cd78SMatthias Ringwald }
2649a19cd78SMatthias Ringwald 
2659a19cd78SMatthias Ringwald /**
2669a19cd78SMatthias Ringwald  * Post-rotate FFT N/4 points coefficients, resulting MDCT N points
2679a19cd78SMatthias Ringwald  * def             Size and twiddles factors
2689a19cd78SMatthias Ringwald  * x, y            Input and output coefficients
2699a19cd78SMatthias Ringwald  * scale           Scale on output coefficients
2709a19cd78SMatthias Ringwald  *
2719a19cd78SMatthias Ringwald  * `x` and y` can be the same buffer
2729a19cd78SMatthias Ringwald  */
273*4930cef6SMatthias Ringwald LC3_HOT static void mdct_post_fft(const struct lc3_mdct_rot_def *def,
2749a19cd78SMatthias Ringwald     const struct lc3_complex *x, float *y, float scale)
2759a19cd78SMatthias Ringwald {
2769a19cd78SMatthias Ringwald     int n4 = def->n4, n8 = n4 >> 1;
2779a19cd78SMatthias Ringwald 
2789a19cd78SMatthias Ringwald     const struct lc3_complex *w0 = def->w + n8, *w1 = w0 - 1;
2799a19cd78SMatthias Ringwald     const struct lc3_complex *x0 = x + n8, *x1 = x0 - 1;
2809a19cd78SMatthias Ringwald 
2819a19cd78SMatthias Ringwald     float *y0 = y + n4, *y1 = y0;
2829a19cd78SMatthias Ringwald 
2839a19cd78SMatthias Ringwald     for ( ; y1 > y; x0++, x1--, w0++, w1--) {
2849a19cd78SMatthias Ringwald 
2859a19cd78SMatthias Ringwald         float u0 = (x0->im * w0->im + x0->re * w0->re) * scale;
2869a19cd78SMatthias Ringwald         float u1 = (x1->re * w1->im - x1->im * w1->re) * scale;
2879a19cd78SMatthias Ringwald 
2889a19cd78SMatthias Ringwald         float v0 = (x0->re * w0->im - x0->im * w0->re) * scale;
2899a19cd78SMatthias Ringwald         float v1 = (x1->im * w1->im + x1->re * w1->re) * scale;
2909a19cd78SMatthias Ringwald 
2919a19cd78SMatthias Ringwald         *(y0++) = u0;  *(y0++) = u1;
2929a19cd78SMatthias Ringwald         *(--y1) = v0;  *(--y1) = v1;
2939a19cd78SMatthias Ringwald     }
2949a19cd78SMatthias Ringwald }
2959a19cd78SMatthias Ringwald 
2969a19cd78SMatthias Ringwald /**
2979a19cd78SMatthias Ringwald  * Pre-rotate IMDCT coefficients of N points, before FFT N/4 points FFT
2989a19cd78SMatthias Ringwald  * def             Size and twiddles factors
2999a19cd78SMatthias Ringwald  * x, y            Input and output coefficients
3009a19cd78SMatthias Ringwald  *
301*4930cef6SMatthias Ringwald  * `x` and `y` can be the same buffer
302*4930cef6SMatthias Ringwald  * The real and imaginary parts of `y` are swapped,
303*4930cef6SMatthias Ringwald  * to operate on FFT instead of IFFT
3049a19cd78SMatthias Ringwald  */
305*4930cef6SMatthias Ringwald LC3_HOT static void imdct_pre_fft(const struct lc3_mdct_rot_def *def,
3069a19cd78SMatthias Ringwald     const float *x, struct lc3_complex *y)
3079a19cd78SMatthias Ringwald {
3089a19cd78SMatthias Ringwald     int n4 = def->n4;
3099a19cd78SMatthias Ringwald 
3109a19cd78SMatthias Ringwald     const float *x0 = x, *x1 = x0 + 2*n4;
3119a19cd78SMatthias Ringwald 
3129a19cd78SMatthias Ringwald     const struct lc3_complex *w0 = def->w, *w1 = w0 + n4;
3139a19cd78SMatthias Ringwald     struct lc3_complex *y0 = y, *y1 = y0 + n4;
3149a19cd78SMatthias Ringwald 
3159a19cd78SMatthias Ringwald     while (x0 < x1) {
3169a19cd78SMatthias Ringwald         float u0 = *(x0++), u1 = *(--x1);
3179a19cd78SMatthias Ringwald         float v0 = *(x0++), v1 = *(--x1);
3189a19cd78SMatthias Ringwald         struct lc3_complex uw = *(w0++), vw = *(--w1);
3199a19cd78SMatthias Ringwald 
320*4930cef6SMatthias Ringwald         (y0  )->re = - u0 * uw.re - u1 * uw.im;
321*4930cef6SMatthias Ringwald         (y0++)->im = - u1 * uw.re + u0 * uw.im;
3229a19cd78SMatthias Ringwald 
323*4930cef6SMatthias Ringwald         (--y1)->re = - v1 * vw.re - v0 * vw.im;
324*4930cef6SMatthias Ringwald         (  y1)->im = - v0 * vw.re + v1 * vw.im;
3259a19cd78SMatthias Ringwald     }
3269a19cd78SMatthias Ringwald }
3279a19cd78SMatthias Ringwald 
3289a19cd78SMatthias Ringwald /**
3299a19cd78SMatthias Ringwald  * Post-rotate FFT N/4 points coefficients, resulting IMDCT N points
3309a19cd78SMatthias Ringwald  * def             Size and twiddles factors
3319a19cd78SMatthias Ringwald  * x, y            Input and output coefficients
3329a19cd78SMatthias Ringwald  * scale           Scale on output coefficients
3339a19cd78SMatthias Ringwald  *
3349a19cd78SMatthias Ringwald  * `x` and y` can be the same buffer
335*4930cef6SMatthias Ringwald  * The real and imaginary parts of `x` are swapped,
336*4930cef6SMatthias Ringwald  * to operate on FFT instead of IFFT
3379a19cd78SMatthias Ringwald  */
338*4930cef6SMatthias Ringwald LC3_HOT static void imdct_post_fft(const struct lc3_mdct_rot_def *def,
3399a19cd78SMatthias Ringwald     const struct lc3_complex *x, float *y, float scale)
3409a19cd78SMatthias Ringwald {
3419a19cd78SMatthias Ringwald     int n4 = def->n4;
3429a19cd78SMatthias Ringwald 
3439a19cd78SMatthias Ringwald     const struct lc3_complex *w0 = def->w, *w1 = w0 + n4;
3449a19cd78SMatthias Ringwald     const struct lc3_complex *x0 = x, *x1 = x0 + n4;
3459a19cd78SMatthias Ringwald 
3469a19cd78SMatthias Ringwald     float *y0 = y, *y1 = y0 + 2*n4;
3479a19cd78SMatthias Ringwald 
3489a19cd78SMatthias Ringwald     while (x0 < x1) {
3499a19cd78SMatthias Ringwald         struct lc3_complex uz = *(x0++), vz = *(--x1);
3509a19cd78SMatthias Ringwald         struct lc3_complex uw = *(w0++), vw = *(--w1);
3519a19cd78SMatthias Ringwald 
352*4930cef6SMatthias Ringwald         *(y0++) = (uz.re * uw.im - uz.im * uw.re) * scale;
353*4930cef6SMatthias Ringwald         *(--y1) = (uz.re * uw.re + uz.im * uw.im) * scale;
3549a19cd78SMatthias Ringwald 
355*4930cef6SMatthias Ringwald         *(--y1) = (vz.re * vw.im - vz.im * vw.re) * scale;
356*4930cef6SMatthias Ringwald         *(y0++) = (vz.re * vw.re + vz.im * vw.im) * scale;
3579a19cd78SMatthias Ringwald     }
3589a19cd78SMatthias Ringwald }
3599a19cd78SMatthias Ringwald 
3609a19cd78SMatthias Ringwald /**
3619a19cd78SMatthias Ringwald  * Apply windowing of samples
3629a19cd78SMatthias Ringwald  * dt, sr          Duration and samplerate
3639a19cd78SMatthias Ringwald  * x, d            Middle half of IMDCT coefficients and delayed samples
3649a19cd78SMatthias Ringwald  * y, d            Output samples and delayed ones
3659a19cd78SMatthias Ringwald  */
366*4930cef6SMatthias Ringwald LC3_HOT static void imdct_window(enum lc3_dt dt, enum lc3_srate sr,
3679a19cd78SMatthias Ringwald     const float *x, float *d, float *y)
3689a19cd78SMatthias Ringwald {
3699a19cd78SMatthias Ringwald     /* The full MDCT coefficients is given by symmetry :
3709a19cd78SMatthias Ringwald      *   T[   0 ..  n/4-1] = -half[n/4-1 .. 0    ]
3719a19cd78SMatthias Ringwald      *   T[ n/4 ..  n/2-1] =  half[0     .. n/4-1]
3729a19cd78SMatthias Ringwald      *   T[ n/2 .. 3n/4-1] =  half[n/4   .. n/2-1]
3739a19cd78SMatthias Ringwald      *   T[3n/4 ..    n-1] =  half[n/2-1 .. n/4  ]  */
3749a19cd78SMatthias Ringwald 
3759a19cd78SMatthias Ringwald     int n4 = LC3_NS(dt, sr) >> 1, nd = LC3_ND(dt, sr);
3769a19cd78SMatthias Ringwald     const float *w2 = lc3_mdct_win[dt][sr], *w0 = w2 + 3*n4, *w1 = w0;
3779a19cd78SMatthias Ringwald 
3789a19cd78SMatthias Ringwald     const float *x0 = d + nd-n4, *x1 = x0;
3799a19cd78SMatthias Ringwald     float *y0 = y + nd-n4, *y1 = y0, *y2 = d + nd, *y3 = d;
3809a19cd78SMatthias Ringwald 
3819a19cd78SMatthias Ringwald     while (y0 > y) {
3829a19cd78SMatthias Ringwald         *(--y0) = *(--x0) - *(x  ) * *(w1++);
3839a19cd78SMatthias Ringwald         *(y1++) = *(x1++) + *(x++) * *(--w0);
384*4930cef6SMatthias Ringwald 
385*4930cef6SMatthias Ringwald         *(--y0) = *(--x0) - *(x  ) * *(w1++);
386*4930cef6SMatthias Ringwald         *(y1++) = *(x1++) + *(x++) * *(--w0);
3879a19cd78SMatthias Ringwald     }
3889a19cd78SMatthias Ringwald 
3899a19cd78SMatthias Ringwald     while (y1 < y + nd) {
3909a19cd78SMatthias Ringwald         *(y1++) = *(x1++) + *(x++) * *(--w0);
391*4930cef6SMatthias Ringwald         *(y1++) = *(x1++) + *(x++) * *(--w0);
3929a19cd78SMatthias Ringwald     }
3939a19cd78SMatthias Ringwald 
3949a19cd78SMatthias Ringwald     while (y1 < y + 2*n4) {
3959a19cd78SMatthias Ringwald         *(y1++) = *(x  ) * *(--w0);
3969a19cd78SMatthias Ringwald         *(--y2) = *(x++) * *(w2++);
397*4930cef6SMatthias Ringwald 
398*4930cef6SMatthias Ringwald         *(y1++) = *(x  ) * *(--w0);
399*4930cef6SMatthias Ringwald         *(--y2) = *(x++) * *(w2++);
4009a19cd78SMatthias Ringwald     }
4019a19cd78SMatthias Ringwald 
4029a19cd78SMatthias Ringwald     while (y2 > y3) {
4039a19cd78SMatthias Ringwald         *(y3++) = *(x  ) * *(--w0);
4049a19cd78SMatthias Ringwald         *(--y2) = *(x++) * *(w2++);
405*4930cef6SMatthias Ringwald 
406*4930cef6SMatthias Ringwald         *(y3++) = *(x  ) * *(--w0);
407*4930cef6SMatthias Ringwald         *(--y2) = *(x++) * *(w2++);
4089a19cd78SMatthias Ringwald     }
4099a19cd78SMatthias Ringwald }
4109a19cd78SMatthias Ringwald 
4119a19cd78SMatthias Ringwald /**
4129a19cd78SMatthias Ringwald  * Forward MDCT transformation
4139a19cd78SMatthias Ringwald  */
4149a19cd78SMatthias Ringwald void lc3_mdct_forward(enum lc3_dt dt, enum lc3_srate sr,
415*4930cef6SMatthias Ringwald     enum lc3_srate sr_dst, const float *x, float *d, float *y)
4169a19cd78SMatthias Ringwald {
4179a19cd78SMatthias Ringwald     const struct lc3_mdct_rot_def *rot = lc3_mdct_rot[dt][sr];
4189a19cd78SMatthias Ringwald     int nf = LC3_NS(dt, sr_dst);
4199a19cd78SMatthias Ringwald     int ns = LC3_NS(dt, sr);
4209a19cd78SMatthias Ringwald 
421*4930cef6SMatthias Ringwald     struct lc3_complex buffer[ns/2];
422*4930cef6SMatthias Ringwald     struct lc3_complex *z = (struct lc3_complex *)y;
423*4930cef6SMatthias Ringwald     union { float *f; struct lc3_complex *z; } u = { .z = buffer };
4249a19cd78SMatthias Ringwald 
425*4930cef6SMatthias Ringwald     mdct_window(dt, sr, x, d, u.f);
4269a19cd78SMatthias Ringwald 
4279a19cd78SMatthias Ringwald     mdct_pre_fft(rot, u.f, u.z);
428*4930cef6SMatthias Ringwald     u.z = fft(u.z, ns/2, u.z, z);
4299a19cd78SMatthias Ringwald     mdct_post_fft(rot, u.z, y, sqrtf( (2.f*nf) / (ns*ns) ));
4309a19cd78SMatthias Ringwald }
4319a19cd78SMatthias Ringwald 
4329a19cd78SMatthias Ringwald /**
4339a19cd78SMatthias Ringwald  * Inverse MDCT transformation
4349a19cd78SMatthias Ringwald  */
4359a19cd78SMatthias Ringwald void lc3_mdct_inverse(enum lc3_dt dt, enum lc3_srate sr,
4369a19cd78SMatthias Ringwald     enum lc3_srate sr_src, const float *x, float *d, float *y)
4379a19cd78SMatthias Ringwald {
4389a19cd78SMatthias Ringwald     const struct lc3_mdct_rot_def *rot = lc3_mdct_rot[dt][sr];
4399a19cd78SMatthias Ringwald     int nf = LC3_NS(dt, sr_src);
4409a19cd78SMatthias Ringwald     int ns = LC3_NS(dt, sr);
4419a19cd78SMatthias Ringwald 
4429a19cd78SMatthias Ringwald     struct lc3_complex buffer[ns/2];
4439a19cd78SMatthias Ringwald     struct lc3_complex *z = (struct lc3_complex *)y;
4449a19cd78SMatthias Ringwald     union { float *f; struct lc3_complex *z; } u = { .z = buffer };
4459a19cd78SMatthias Ringwald 
4469a19cd78SMatthias Ringwald     imdct_pre_fft(rot, x, z);
447*4930cef6SMatthias Ringwald     z = fft(z, ns/2, z, u.z);
4489a19cd78SMatthias Ringwald     imdct_post_fft(rot, z, u.f, sqrtf(2.f / nf));
4499a19cd78SMatthias Ringwald 
4509a19cd78SMatthias Ringwald     imdct_window(dt, sr, u.f, d, y);
4519a19cd78SMatthias Ringwald }
452