xref: /btstack/3rd-party/lc3-google/src/mdct.c (revision 6897da5c53aac5b1f90f41b5b15d0bd43d61dfff)
19a19cd78SMatthias Ringwald /******************************************************************************
29a19cd78SMatthias Ringwald  *
34930cef6SMatthias Ringwald  *  Copyright 2022 Google LLC
49a19cd78SMatthias Ringwald  *
59a19cd78SMatthias Ringwald  *  Licensed under the Apache License, Version 2.0 (the "License");
69a19cd78SMatthias Ringwald  *  you may not use this file except in compliance with the License.
79a19cd78SMatthias Ringwald  *  You may obtain a copy of the License at:
89a19cd78SMatthias Ringwald  *
99a19cd78SMatthias Ringwald  *  http://www.apache.org/licenses/LICENSE-2.0
109a19cd78SMatthias Ringwald  *
119a19cd78SMatthias Ringwald  *  Unless required by applicable law or agreed to in writing, software
129a19cd78SMatthias Ringwald  *  distributed under the License is distributed on an "AS IS" BASIS,
139a19cd78SMatthias Ringwald  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
149a19cd78SMatthias Ringwald  *  See the License for the specific language governing permissions and
159a19cd78SMatthias Ringwald  *  limitations under the License.
169a19cd78SMatthias Ringwald  *
179a19cd78SMatthias Ringwald  ******************************************************************************/
189a19cd78SMatthias Ringwald 
194c4eb519SMatthias Ringwald #include "mdct.h"
209a19cd78SMatthias Ringwald #include "tables.h"
219a19cd78SMatthias Ringwald 
224930cef6SMatthias Ringwald #include "mdct_neon.h"
234930cef6SMatthias Ringwald 
249a19cd78SMatthias Ringwald 
259a19cd78SMatthias Ringwald /* ----------------------------------------------------------------------------
269a19cd78SMatthias Ringwald  *  FFT processing
279a19cd78SMatthias Ringwald  * -------------------------------------------------------------------------- */
289a19cd78SMatthias Ringwald 
299a19cd78SMatthias Ringwald /**
304930cef6SMatthias Ringwald  * FFT 5 Points
319a19cd78SMatthias Ringwald  * x, y            Input and output coefficients, of size 5xn
324930cef6SMatthias Ringwald  * n               Number of interleaved transform to perform (n % 2 = 0)
339a19cd78SMatthias Ringwald  */
344930cef6SMatthias Ringwald #ifndef fft_5
fft_5(const struct lc3_complex * x,struct lc3_complex * y,int n)354930cef6SMatthias Ringwald LC3_HOT static inline void fft_5(
369a19cd78SMatthias Ringwald     const struct lc3_complex *x, struct lc3_complex *y, int n)
379a19cd78SMatthias Ringwald {
389a19cd78SMatthias Ringwald     static const float cos1 =  0.3090169944;  /* cos(-2Pi 1/5) */
399a19cd78SMatthias Ringwald     static const float cos2 = -0.8090169944;  /* cos(-2Pi 2/5) */
409a19cd78SMatthias Ringwald 
419a19cd78SMatthias Ringwald     static const float sin1 = -0.9510565163;  /* sin(-2Pi 1/5) */
429a19cd78SMatthias Ringwald     static const float sin2 = -0.5877852523;  /* sin(-2Pi 2/5) */
439a19cd78SMatthias Ringwald 
449a19cd78SMatthias Ringwald     for (int i = 0; i < n; i++, x++, y+= 5) {
459a19cd78SMatthias Ringwald 
469a19cd78SMatthias Ringwald         struct lc3_complex s14 =
479a19cd78SMatthias Ringwald             { x[1*n].re + x[4*n].re, x[1*n].im + x[4*n].im };
489a19cd78SMatthias Ringwald         struct lc3_complex d14 =
499a19cd78SMatthias Ringwald             { x[1*n].re - x[4*n].re, x[1*n].im - x[4*n].im };
509a19cd78SMatthias Ringwald 
519a19cd78SMatthias Ringwald         struct lc3_complex s23 =
529a19cd78SMatthias Ringwald             { x[2*n].re + x[3*n].re, x[2*n].im + x[3*n].im };
539a19cd78SMatthias Ringwald         struct lc3_complex d23 =
549a19cd78SMatthias Ringwald             { x[2*n].re - x[3*n].re, x[2*n].im - x[3*n].im };
559a19cd78SMatthias Ringwald 
569a19cd78SMatthias Ringwald         y[0].re = x[0].re + s14.re + s23.re;
574930cef6SMatthias Ringwald 
589a19cd78SMatthias Ringwald         y[0].im = x[0].im + s14.im + s23.im;
599a19cd78SMatthias Ringwald 
604930cef6SMatthias Ringwald         y[1].re = x[0].re + s14.re * cos1 - d14.im * sin1
614930cef6SMatthias Ringwald                           + s23.re * cos2 - d23.im * sin2;
629a19cd78SMatthias Ringwald 
634930cef6SMatthias Ringwald         y[1].im = x[0].im + s14.im * cos1 + d14.re * sin1
644930cef6SMatthias Ringwald                           + s23.im * cos2 + d23.re * sin2;
659a19cd78SMatthias Ringwald 
664930cef6SMatthias Ringwald         y[2].re = x[0].re + s14.re * cos2 - d14.im * sin2
674930cef6SMatthias Ringwald                           + s23.re * cos1 + d23.im * sin1;
689a19cd78SMatthias Ringwald 
694930cef6SMatthias Ringwald         y[2].im = x[0].im + s14.im * cos2 + d14.re * sin2
704930cef6SMatthias Ringwald                           + s23.im * cos1 - d23.re * sin1;
719a19cd78SMatthias Ringwald 
724930cef6SMatthias Ringwald         y[3].re = x[0].re + s14.re * cos2 + d14.im * sin2
734930cef6SMatthias Ringwald                           + s23.re * cos1 - d23.im * sin1;
749a19cd78SMatthias Ringwald 
754930cef6SMatthias Ringwald         y[3].im = x[0].im + s14.im * cos2 - d14.re * sin2
764930cef6SMatthias Ringwald                           + s23.im * cos1 + d23.re * sin1;
779a19cd78SMatthias Ringwald 
784930cef6SMatthias Ringwald         y[4].re = x[0].re + s14.re * cos1 + d14.im * sin1
794930cef6SMatthias Ringwald                           + s23.re * cos2 + d23.im * sin2;
809a19cd78SMatthias Ringwald 
814930cef6SMatthias Ringwald         y[4].im = x[0].im + s14.im * cos1 - d14.re * sin1
824930cef6SMatthias Ringwald                           + s23.im * cos2 - d23.re * sin2;
839a19cd78SMatthias Ringwald     }
849a19cd78SMatthias Ringwald }
854930cef6SMatthias Ringwald #endif /* fft_5 */
869a19cd78SMatthias Ringwald 
879a19cd78SMatthias Ringwald /**
884930cef6SMatthias Ringwald  * FFT Butterfly 3 Points
899a19cd78SMatthias Ringwald  * x, y            Input and output coefficients
909a19cd78SMatthias Ringwald  * twiddles        Twiddles factors, determine size of transform
919a19cd78SMatthias Ringwald  * n               Number of interleaved transforms
929a19cd78SMatthias Ringwald  */
934930cef6SMatthias Ringwald #ifndef fft_bf3
fft_bf3(const struct lc3_fft_bf3_twiddles * twiddles,const struct lc3_complex * x,struct lc3_complex * y,int n)944930cef6SMatthias Ringwald LC3_HOT static inline void fft_bf3(
954930cef6SMatthias Ringwald     const struct lc3_fft_bf3_twiddles *twiddles,
969a19cd78SMatthias Ringwald     const struct lc3_complex *x, struct lc3_complex *y, int n)
979a19cd78SMatthias Ringwald {
989a19cd78SMatthias Ringwald     int n3 = twiddles->n3;
999a19cd78SMatthias Ringwald     const struct lc3_complex (*w0)[2] = twiddles->t;
1009a19cd78SMatthias Ringwald     const struct lc3_complex (*w1)[2] = w0 + n3, (*w2)[2] = w1 + n3;
1019a19cd78SMatthias Ringwald 
1029a19cd78SMatthias Ringwald     const struct lc3_complex *x0 = x, *x1 = x0 + n*n3, *x2 = x1 + n*n3;
1039a19cd78SMatthias Ringwald     struct lc3_complex *y0 = y, *y1 = y0 + n3, *y2 = y1 + n3;
1049a19cd78SMatthias Ringwald 
1054930cef6SMatthias Ringwald     for (int i = 0; i < n; i++, y0 += 3*n3, y1 += 3*n3, y2 += 3*n3)
1069a19cd78SMatthias Ringwald         for (int j = 0; j < n3; j++, x0++, x1++, x2++) {
1079a19cd78SMatthias Ringwald 
1084930cef6SMatthias Ringwald             y0[j].re = x0->re + x1->re * w0[j][0].re - x1->im * w0[j][0].im
1094930cef6SMatthias Ringwald                               + x2->re * w0[j][1].re - x2->im * w0[j][1].im;
1109a19cd78SMatthias Ringwald 
1114930cef6SMatthias Ringwald             y0[j].im = x0->im + x1->im * w0[j][0].re + x1->re * w0[j][0].im
1124930cef6SMatthias Ringwald                               + x2->im * w0[j][1].re + x2->re * w0[j][1].im;
1139a19cd78SMatthias Ringwald 
1144930cef6SMatthias Ringwald             y1[j].re = x0->re + x1->re * w1[j][0].re - x1->im * w1[j][0].im
1154930cef6SMatthias Ringwald                               + x2->re * w1[j][1].re - x2->im * w1[j][1].im;
1169a19cd78SMatthias Ringwald 
1174930cef6SMatthias Ringwald             y1[j].im = x0->im + x1->im * w1[j][0].re + x1->re * w1[j][0].im
1184930cef6SMatthias Ringwald                               + x2->im * w1[j][1].re + x2->re * w1[j][1].im;
1199a19cd78SMatthias Ringwald 
1204930cef6SMatthias Ringwald             y2[j].re = x0->re + x1->re * w2[j][0].re - x1->im * w2[j][0].im
1214930cef6SMatthias Ringwald                               + x2->re * w2[j][1].re - x2->im * w2[j][1].im;
1229a19cd78SMatthias Ringwald 
1234930cef6SMatthias Ringwald             y2[j].im = x0->im + x1->im * w2[j][0].re + x1->re * w2[j][0].im
1244930cef6SMatthias Ringwald                               + x2->im * w2[j][1].re + x2->re * w2[j][1].im;
1259a19cd78SMatthias Ringwald         }
1269a19cd78SMatthias Ringwald }
1274930cef6SMatthias Ringwald #endif /* fft_bf3 */
1289a19cd78SMatthias Ringwald 
1299a19cd78SMatthias Ringwald /**
1304930cef6SMatthias Ringwald  * FFT Butterfly 2 Points
1319a19cd78SMatthias Ringwald  * twiddles        Twiddles factors, determine size of transform
1329a19cd78SMatthias Ringwald  * x, y            Input and output coefficients
1339a19cd78SMatthias Ringwald  * n               Number of interleaved transforms
1349a19cd78SMatthias Ringwald  */
1354930cef6SMatthias Ringwald #ifndef fft_bf2
fft_bf2(const struct lc3_fft_bf2_twiddles * twiddles,const struct lc3_complex * x,struct lc3_complex * y,int n)1364930cef6SMatthias Ringwald LC3_HOT static inline void fft_bf2(
1374930cef6SMatthias Ringwald     const struct lc3_fft_bf2_twiddles *twiddles,
1389a19cd78SMatthias Ringwald     const struct lc3_complex *x, struct lc3_complex *y, int n)
1399a19cd78SMatthias Ringwald {
1409a19cd78SMatthias Ringwald     int n2 = twiddles->n2;
1419a19cd78SMatthias Ringwald     const struct lc3_complex *w = twiddles->t;
1429a19cd78SMatthias Ringwald 
1439a19cd78SMatthias Ringwald     const struct lc3_complex *x0 = x, *x1 = x0 + n*n2;
1449a19cd78SMatthias Ringwald     struct lc3_complex *y0 = y, *y1 = y0 + n2;
1459a19cd78SMatthias Ringwald 
1469a19cd78SMatthias Ringwald     for (int i = 0; i < n; i++, y0 += 2*n2, y1 += 2*n2) {
1479a19cd78SMatthias Ringwald 
1489a19cd78SMatthias Ringwald         for (int j = 0; j < n2; j++, x0++, x1++) {
1499a19cd78SMatthias Ringwald 
1504930cef6SMatthias Ringwald             y0[j].re = x0->re + x1->re * w[j].re - x1->im * w[j].im;
1514930cef6SMatthias Ringwald             y0[j].im = x0->im + x1->im * w[j].re + x1->re * w[j].im;
1529a19cd78SMatthias Ringwald 
1534930cef6SMatthias Ringwald             y1[j].re = x0->re - x1->re * w[j].re + x1->im * w[j].im;
1544930cef6SMatthias Ringwald             y1[j].im = x0->im - x1->im * w[j].re - x1->re * w[j].im;
1559a19cd78SMatthias Ringwald         }
1569a19cd78SMatthias Ringwald     }
1579a19cd78SMatthias Ringwald }
1584930cef6SMatthias Ringwald #endif /* fft_bf2 */
1599a19cd78SMatthias Ringwald 
1609a19cd78SMatthias Ringwald /**
1619a19cd78SMatthias Ringwald  * Perform FFT
1629a19cd78SMatthias Ringwald  * x, y0, y1       Input, and 2 scratch buffers of size `n`
163*6897da5cSDirk Helbig  * n               Number of points 30, 40, 60, 80, 90, 120, 160, 180, 240, 480
1649a19cd78SMatthias Ringwald  * return          The buffer `y0` or `y1` that hold the result
1659a19cd78SMatthias Ringwald  *
1669a19cd78SMatthias Ringwald  * Input `x` can be the same as the `y0` second scratch buffer
1679a19cd78SMatthias Ringwald  */
fft(const struct lc3_complex * x,int n,struct lc3_complex * y0,struct lc3_complex * y1)1684930cef6SMatthias Ringwald static struct lc3_complex *fft(const struct lc3_complex *x, int n,
1699a19cd78SMatthias Ringwald     struct lc3_complex *y0, struct lc3_complex *y1)
1709a19cd78SMatthias Ringwald {
1719a19cd78SMatthias Ringwald     struct lc3_complex *y[2] = { y1, y0 };
1729a19cd78SMatthias Ringwald     int i2, i3, is = 0;
1739a19cd78SMatthias Ringwald 
1749a19cd78SMatthias Ringwald     /* The number of points `n` can be decomposed as :
1759a19cd78SMatthias Ringwald      *
1769a19cd78SMatthias Ringwald      *   n = 5^1 * 3^n3 * 2^n2
1779a19cd78SMatthias Ringwald      *
178*6897da5cSDirk Helbig      *   for n = 10, 20, 40, 80, 160    n3 = 0, n2 = [1..5]
179*6897da5cSDirk Helbig      *       n = 30, 60, 120, 240, 480  n3 = 1, n2 = [1..5]
1809a19cd78SMatthias Ringwald      *       n = 90, 180                n3 = 2, n2 = [1..2]
1819a19cd78SMatthias Ringwald      *
1829a19cd78SMatthias Ringwald      * Note that the expression `n & (n-1) == 0` is equivalent
1839a19cd78SMatthias Ringwald      * to the check that `n` is a power of 2. */
1849a19cd78SMatthias Ringwald 
1854930cef6SMatthias Ringwald     fft_5(x, y[is], n /= 5);
1869a19cd78SMatthias Ringwald 
1879a19cd78SMatthias Ringwald     for (i3 = 0; n & (n-1); i3++, is ^= 1)
1884930cef6SMatthias Ringwald         fft_bf3(lc3_fft_twiddles_bf3[i3], y[is], y[is ^ 1], n /= 3);
1899a19cd78SMatthias Ringwald 
1909a19cd78SMatthias Ringwald     for (i2 = 0; n > 1; i2++, is ^= 1)
1914930cef6SMatthias Ringwald         fft_bf2(lc3_fft_twiddles_bf2[i2][i3], y[is], y[is ^ 1], n >>= 1);
1929a19cd78SMatthias Ringwald 
1939a19cd78SMatthias Ringwald     return y[is];
1949a19cd78SMatthias Ringwald }
1959a19cd78SMatthias Ringwald 
1969a19cd78SMatthias Ringwald 
1979a19cd78SMatthias Ringwald /* ----------------------------------------------------------------------------
1989a19cd78SMatthias Ringwald  *  MDCT processing
1999a19cd78SMatthias Ringwald  * -------------------------------------------------------------------------- */
2009a19cd78SMatthias Ringwald 
2019a19cd78SMatthias Ringwald /**
2029a19cd78SMatthias Ringwald  * Windowing of samples before MDCT
203*6897da5cSDirk Helbig  * dt, sr          Duration and samplerate
2044930cef6SMatthias Ringwald  * x, y            Input current and delayed samples
2054930cef6SMatthias Ringwald  * y, d            Output windowed samples, and delayed ones
2069a19cd78SMatthias Ringwald  */
mdct_window(enum lc3_dt dt,enum lc3_srate sr,const float * x,float * d,float * y)207*6897da5cSDirk Helbig LC3_HOT static void mdct_window(
208*6897da5cSDirk Helbig     enum lc3_dt dt, enum lc3_srate sr,
2094930cef6SMatthias Ringwald     const float *x, float *d, float *y)
2109a19cd78SMatthias Ringwald {
211*6897da5cSDirk Helbig     const float *win = lc3_mdct_win[dt][sr];
212*6897da5cSDirk Helbig     int ns = lc3_ns(dt, sr), nd = lc3_nd(dt, sr);
2139a19cd78SMatthias Ringwald 
214*6897da5cSDirk Helbig     const float *w0 = win, *w1 = w0 + ns;
2159a19cd78SMatthias Ringwald     const float *w2 = w1, *w3 = w2 + nd;
2169a19cd78SMatthias Ringwald 
2174930cef6SMatthias Ringwald     const float *x0 = x + ns-nd, *x1 = x0;
2189a19cd78SMatthias Ringwald     float *y0 = y + ns/2, *y1 = y0;
2194930cef6SMatthias Ringwald     float *d0 = d, *d1 = d + nd;
2209a19cd78SMatthias Ringwald 
2214930cef6SMatthias Ringwald     while (x1 > x) {
2224930cef6SMatthias Ringwald         *(--y0) = *d0 * *(w0++) - *(--x1) * *(--w1);
2234930cef6SMatthias Ringwald         *(y1++) = (*(d0++) = *(x0++)) * *(w2++);
2249a19cd78SMatthias Ringwald 
2254930cef6SMatthias Ringwald         *(--y0) = *d0 * *(w0++) - *(--x1) * *(--w1);
2264930cef6SMatthias Ringwald         *(y1++) = (*(d0++) = *(x0++)) * *(w2++);
2274930cef6SMatthias Ringwald     }
2289a19cd78SMatthias Ringwald 
2294930cef6SMatthias Ringwald     for (x1 += ns; x0 < x1; ) {
2304930cef6SMatthias Ringwald         *(--y0) = *d0 * *(w0++) - *(--d1) * *(--w1);
2314930cef6SMatthias Ringwald         *(y1++) = (*(d0++) = *(x0++)) * *(w2++) + (*d1 = *(--x1)) * *(--w3);
2324930cef6SMatthias Ringwald 
2334930cef6SMatthias Ringwald         *(--y0) = *d0 * *(w0++) - *(--d1) * *(--w1);
2344930cef6SMatthias Ringwald         *(y1++) = (*(d0++) = *(x0++)) * *(w2++) + (*d1 = *(--x1)) * *(--w3);
2354930cef6SMatthias Ringwald     }
2369a19cd78SMatthias Ringwald }
2379a19cd78SMatthias Ringwald 
2389a19cd78SMatthias Ringwald /**
2399a19cd78SMatthias Ringwald  * Pre-rotate MDCT coefficients of N/2 points, before FFT N/4 points FFT
2409a19cd78SMatthias Ringwald  * def             Size and twiddles factors
2419a19cd78SMatthias Ringwald  * x, y            Input and output coefficients
2429a19cd78SMatthias Ringwald  *
2439a19cd78SMatthias Ringwald  * `x` and y` can be the same buffer
2449a19cd78SMatthias Ringwald  */
mdct_pre_fft(const struct lc3_mdct_rot_def * def,const float * x,struct lc3_complex * y)2454930cef6SMatthias Ringwald LC3_HOT static void mdct_pre_fft(const struct lc3_mdct_rot_def *def,
2469a19cd78SMatthias Ringwald     const float *x, struct lc3_complex *y)
2479a19cd78SMatthias Ringwald {
2489a19cd78SMatthias Ringwald     int n4 = def->n4;
2499a19cd78SMatthias Ringwald 
2509a19cd78SMatthias Ringwald     const float *x0 = x, *x1 = x0 + 2*n4;
2519a19cd78SMatthias Ringwald     const struct lc3_complex *w0 = def->w, *w1 = w0 + n4;
2529a19cd78SMatthias Ringwald     struct lc3_complex *y0 = y, *y1 = y0 + n4;
2539a19cd78SMatthias Ringwald 
2549a19cd78SMatthias Ringwald     while (x0 < x1) {
2559a19cd78SMatthias Ringwald         struct lc3_complex u, uw = *(w0++);
2569a19cd78SMatthias Ringwald         u.re = - *(--x1) * uw.re + *x0 * uw.im;
2579a19cd78SMatthias Ringwald         u.im =   *(x0++) * uw.re + *x1 * uw.im;
2589a19cd78SMatthias Ringwald 
2599a19cd78SMatthias Ringwald         struct lc3_complex v, vw = *(--w1);
2609a19cd78SMatthias Ringwald         v.re = - *(--x1) * vw.im + *x0 * vw.re;
2619a19cd78SMatthias Ringwald         v.im = - *(x0++) * vw.im - *x1 * vw.re;
2629a19cd78SMatthias Ringwald 
2639a19cd78SMatthias Ringwald         *(y0++) = u;
2649a19cd78SMatthias Ringwald         *(--y1) = v;
2659a19cd78SMatthias Ringwald     }
2669a19cd78SMatthias Ringwald }
2679a19cd78SMatthias Ringwald 
2689a19cd78SMatthias Ringwald /**
2699a19cd78SMatthias Ringwald  * Post-rotate FFT N/4 points coefficients, resulting MDCT N points
2709a19cd78SMatthias Ringwald  * def             Size and twiddles factors
2719a19cd78SMatthias Ringwald  * x, y            Input and output coefficients
2729a19cd78SMatthias Ringwald  *
2739a19cd78SMatthias Ringwald  * `x` and y` can be the same buffer
2749a19cd78SMatthias Ringwald  */
mdct_post_fft(const struct lc3_mdct_rot_def * def,const struct lc3_complex * x,float * y)2754930cef6SMatthias Ringwald LC3_HOT static void mdct_post_fft(const struct lc3_mdct_rot_def *def,
276*6897da5cSDirk Helbig     const struct lc3_complex *x, float *y)
2779a19cd78SMatthias Ringwald {
2789a19cd78SMatthias Ringwald     int n4 = def->n4, n8 = n4 >> 1;
2799a19cd78SMatthias Ringwald 
2809a19cd78SMatthias Ringwald     const struct lc3_complex *w0 = def->w + n8, *w1 = w0 - 1;
2819a19cd78SMatthias Ringwald     const struct lc3_complex *x0 = x + n8, *x1 = x0 - 1;
2829a19cd78SMatthias Ringwald 
2839a19cd78SMatthias Ringwald     float *y0 = y + n4, *y1 = y0;
2849a19cd78SMatthias Ringwald 
2859a19cd78SMatthias Ringwald     for ( ; y1 > y; x0++, x1--, w0++, w1--) {
2869a19cd78SMatthias Ringwald 
287*6897da5cSDirk Helbig         float u0 = x0->im * w0->im + x0->re * w0->re;
288*6897da5cSDirk Helbig         float u1 = x1->re * w1->im - x1->im * w1->re;
2899a19cd78SMatthias Ringwald 
290*6897da5cSDirk Helbig         float v0 = x0->re * w0->im - x0->im * w0->re;
291*6897da5cSDirk Helbig         float v1 = x1->im * w1->im + x1->re * w1->re;
2929a19cd78SMatthias Ringwald 
2939a19cd78SMatthias Ringwald         *(y0++) = u0;  *(y0++) = u1;
2949a19cd78SMatthias Ringwald         *(--y1) = v0;  *(--y1) = v1;
2959a19cd78SMatthias Ringwald     }
2969a19cd78SMatthias Ringwald }
2979a19cd78SMatthias Ringwald 
2989a19cd78SMatthias Ringwald /**
2999a19cd78SMatthias Ringwald  * Pre-rotate IMDCT coefficients of N points, before FFT N/4 points FFT
3009a19cd78SMatthias Ringwald  * def             Size and twiddles factors
3019a19cd78SMatthias Ringwald  * x, y            Input and output coefficients
3029a19cd78SMatthias Ringwald  *
3034930cef6SMatthias Ringwald  * `x` and `y` can be the same buffer
3044930cef6SMatthias Ringwald  * The real and imaginary parts of `y` are swapped,
3054930cef6SMatthias Ringwald  * to operate on FFT instead of IFFT
3069a19cd78SMatthias Ringwald  */
imdct_pre_fft(const struct lc3_mdct_rot_def * def,const float * x,struct lc3_complex * y)3074930cef6SMatthias Ringwald LC3_HOT static void imdct_pre_fft(const struct lc3_mdct_rot_def *def,
3089a19cd78SMatthias Ringwald     const float *x, struct lc3_complex *y)
3099a19cd78SMatthias Ringwald {
3109a19cd78SMatthias Ringwald     int n4 = def->n4;
3119a19cd78SMatthias Ringwald 
3129a19cd78SMatthias Ringwald     const float *x0 = x, *x1 = x0 + 2*n4;
3139a19cd78SMatthias Ringwald 
3149a19cd78SMatthias Ringwald     const struct lc3_complex *w0 = def->w, *w1 = w0 + n4;
3159a19cd78SMatthias Ringwald     struct lc3_complex *y0 = y, *y1 = y0 + n4;
3169a19cd78SMatthias Ringwald 
3179a19cd78SMatthias Ringwald     while (x0 < x1) {
3189a19cd78SMatthias Ringwald         float u0 = *(x0++), u1 = *(--x1);
3199a19cd78SMatthias Ringwald         float v0 = *(x0++), v1 = *(--x1);
3209a19cd78SMatthias Ringwald         struct lc3_complex uw = *(w0++), vw = *(--w1);
3219a19cd78SMatthias Ringwald 
3224930cef6SMatthias Ringwald         (y0  )->re = - u0 * uw.re - u1 * uw.im;
3234930cef6SMatthias Ringwald         (y0++)->im = - u1 * uw.re + u0 * uw.im;
3249a19cd78SMatthias Ringwald 
3254930cef6SMatthias Ringwald         (--y1)->re = - v1 * vw.re - v0 * vw.im;
3264930cef6SMatthias Ringwald         (  y1)->im = - v0 * vw.re + v1 * vw.im;
3279a19cd78SMatthias Ringwald     }
3289a19cd78SMatthias Ringwald }
3299a19cd78SMatthias Ringwald 
3309a19cd78SMatthias Ringwald /**
3319a19cd78SMatthias Ringwald  * Post-rotate FFT N/4 points coefficients, resulting IMDCT N points
3329a19cd78SMatthias Ringwald  * def             Size and twiddles factors
3339a19cd78SMatthias Ringwald  * x, y            Input and output coefficients
3349a19cd78SMatthias Ringwald  *
3359a19cd78SMatthias Ringwald  * `x` and y` can be the same buffer
3364930cef6SMatthias Ringwald  * The real and imaginary parts of `x` are swapped,
3374930cef6SMatthias Ringwald  * to operate on FFT instead of IFFT
3389a19cd78SMatthias Ringwald  */
imdct_post_fft(const struct lc3_mdct_rot_def * def,const struct lc3_complex * x,float * y)3394930cef6SMatthias Ringwald LC3_HOT static void imdct_post_fft(const struct lc3_mdct_rot_def *def,
340*6897da5cSDirk Helbig     const struct lc3_complex *x, float *y)
3419a19cd78SMatthias Ringwald {
3429a19cd78SMatthias Ringwald     int n4 = def->n4;
3439a19cd78SMatthias Ringwald 
3449a19cd78SMatthias Ringwald     const struct lc3_complex *w0 = def->w, *w1 = w0 + n4;
3459a19cd78SMatthias Ringwald     const struct lc3_complex *x0 = x, *x1 = x0 + n4;
3469a19cd78SMatthias Ringwald 
3479a19cd78SMatthias Ringwald     float *y0 = y, *y1 = y0 + 2*n4;
3489a19cd78SMatthias Ringwald 
3499a19cd78SMatthias Ringwald     while (x0 < x1) {
3509a19cd78SMatthias Ringwald         struct lc3_complex uz = *(x0++), vz = *(--x1);
3519a19cd78SMatthias Ringwald         struct lc3_complex uw = *(w0++), vw = *(--w1);
3529a19cd78SMatthias Ringwald 
353*6897da5cSDirk Helbig         *(y0++) = uz.re * uw.im - uz.im * uw.re;
354*6897da5cSDirk Helbig         *(--y1) = uz.re * uw.re + uz.im * uw.im;
3559a19cd78SMatthias Ringwald 
356*6897da5cSDirk Helbig         *(--y1) = vz.re * vw.im - vz.im * vw.re;
357*6897da5cSDirk Helbig         *(y0++) = vz.re * vw.re + vz.im * vw.im;
3589a19cd78SMatthias Ringwald     }
3599a19cd78SMatthias Ringwald }
3609a19cd78SMatthias Ringwald 
3619a19cd78SMatthias Ringwald /**
3629a19cd78SMatthias Ringwald  * Apply windowing of samples
3639a19cd78SMatthias Ringwald  * dt, sr          Duration and samplerate
3649a19cd78SMatthias Ringwald  * x, d            Middle half of IMDCT coefficients and delayed samples
3659a19cd78SMatthias Ringwald  * y, d            Output samples and delayed ones
3669a19cd78SMatthias Ringwald  */
imdct_window(enum lc3_dt dt,enum lc3_srate sr,const float * x,float * d,float * y)367*6897da5cSDirk Helbig LC3_HOT static void imdct_window(
368*6897da5cSDirk Helbig     enum lc3_dt dt, enum lc3_srate sr,
3699a19cd78SMatthias Ringwald     const float *x, float *d, float *y)
3709a19cd78SMatthias Ringwald {
3719a19cd78SMatthias Ringwald     /* The full MDCT coefficients is given by symmetry :
3729a19cd78SMatthias Ringwald      *   T[   0 ..  n/4-1] = -half[n/4-1 .. 0    ]
3739a19cd78SMatthias Ringwald      *   T[ n/4 ..  n/2-1] =  half[0     .. n/4-1]
3749a19cd78SMatthias Ringwald      *   T[ n/2 .. 3n/4-1] =  half[n/4   .. n/2-1]
3759a19cd78SMatthias Ringwald      *   T[3n/4 ..    n-1] =  half[n/2-1 .. n/4  ]  */
3769a19cd78SMatthias Ringwald 
377*6897da5cSDirk Helbig     const float *win = lc3_mdct_win[dt][sr];
378*6897da5cSDirk Helbig     int n4 = lc3_ns(dt, sr) >> 1, nd = lc3_nd(dt, sr);
379*6897da5cSDirk Helbig     const float *w2 = win, *w0 = w2 + 3*n4, *w1 = w0;
3809a19cd78SMatthias Ringwald 
3819a19cd78SMatthias Ringwald     const float *x0 = d + nd-n4, *x1 = x0;
3829a19cd78SMatthias Ringwald     float *y0 = y + nd-n4, *y1 = y0, *y2 = d + nd, *y3 = d;
3839a19cd78SMatthias Ringwald 
3849a19cd78SMatthias Ringwald     while (y0 > y) {
3859a19cd78SMatthias Ringwald         *(--y0) = *(--x0) - *(x  ) * *(w1++);
3869a19cd78SMatthias Ringwald         *(y1++) = *(x1++) + *(x++) * *(--w0);
3874930cef6SMatthias Ringwald 
3884930cef6SMatthias Ringwald         *(--y0) = *(--x0) - *(x  ) * *(w1++);
3894930cef6SMatthias Ringwald         *(y1++) = *(x1++) + *(x++) * *(--w0);
3909a19cd78SMatthias Ringwald     }
3919a19cd78SMatthias Ringwald 
3929a19cd78SMatthias Ringwald     while (y1 < y + nd) {
3939a19cd78SMatthias Ringwald         *(y1++) = *(x1++) + *(x++) * *(--w0);
3944930cef6SMatthias Ringwald         *(y1++) = *(x1++) + *(x++) * *(--w0);
3959a19cd78SMatthias Ringwald     }
3969a19cd78SMatthias Ringwald 
3979a19cd78SMatthias Ringwald     while (y1 < y + 2*n4) {
3989a19cd78SMatthias Ringwald         *(y1++) = *(x  ) * *(--w0);
3999a19cd78SMatthias Ringwald         *(--y2) = *(x++) * *(w2++);
4004930cef6SMatthias Ringwald 
4014930cef6SMatthias Ringwald         *(y1++) = *(x  ) * *(--w0);
4024930cef6SMatthias Ringwald         *(--y2) = *(x++) * *(w2++);
4039a19cd78SMatthias Ringwald     }
4049a19cd78SMatthias Ringwald 
4059a19cd78SMatthias Ringwald     while (y2 > y3) {
4069a19cd78SMatthias Ringwald         *(y3++) = *(x  ) * *(--w0);
4079a19cd78SMatthias Ringwald         *(--y2) = *(x++) * *(w2++);
4084930cef6SMatthias Ringwald 
4094930cef6SMatthias Ringwald         *(y3++) = *(x  ) * *(--w0);
4104930cef6SMatthias Ringwald         *(--y2) = *(x++) * *(w2++);
4119a19cd78SMatthias Ringwald     }
4129a19cd78SMatthias Ringwald }
4139a19cd78SMatthias Ringwald 
4149a19cd78SMatthias Ringwald /**
415*6897da5cSDirk Helbig  * Rescale samples
416*6897da5cSDirk Helbig  * x, n            Input and count of samples, scaled as output
417*6897da5cSDirk Helbig  * scale           Scale factor
418*6897da5cSDirk Helbig  */
rescale(float * x,int n,float f)419*6897da5cSDirk Helbig LC3_HOT static void rescale(float *x, int n, float f)
420*6897da5cSDirk Helbig {
421*6897da5cSDirk Helbig     for (int i = 0; i < (n >> 2); i++) {
422*6897da5cSDirk Helbig         *(x++) *= f; *(x++) *= f;
423*6897da5cSDirk Helbig         *(x++) *= f; *(x++) *= f;
424*6897da5cSDirk Helbig     }
425*6897da5cSDirk Helbig }
426*6897da5cSDirk Helbig 
427*6897da5cSDirk Helbig /**
4289a19cd78SMatthias Ringwald  * Forward MDCT transformation
4299a19cd78SMatthias Ringwald  */
lc3_mdct_forward(enum lc3_dt dt,enum lc3_srate sr,enum lc3_srate sr_dst,const float * x,float * d,float * y)430*6897da5cSDirk Helbig void lc3_mdct_forward(
431*6897da5cSDirk Helbig     enum lc3_dt dt, enum lc3_srate sr, enum lc3_srate sr_dst,
432*6897da5cSDirk Helbig     const float *x, float *d, float *y)
4339a19cd78SMatthias Ringwald {
4349a19cd78SMatthias Ringwald     const struct lc3_mdct_rot_def *rot = lc3_mdct_rot[dt][sr];
435*6897da5cSDirk Helbig     int ns_dst = lc3_ns(dt, sr_dst);
436*6897da5cSDirk Helbig     int ns = lc3_ns(dt, sr);
4379a19cd78SMatthias Ringwald 
438*6897da5cSDirk Helbig     struct lc3_complex buffer[LC3_MAX_NS / 2];
4394930cef6SMatthias Ringwald     struct lc3_complex *z = (struct lc3_complex *)y;
4404930cef6SMatthias Ringwald     union { float *f; struct lc3_complex *z; } u = { .z = buffer };
4419a19cd78SMatthias Ringwald 
4424930cef6SMatthias Ringwald     mdct_window(dt, sr, x, d, u.f);
4439a19cd78SMatthias Ringwald 
4449a19cd78SMatthias Ringwald     mdct_pre_fft(rot, u.f, u.z);
4454930cef6SMatthias Ringwald     u.z = fft(u.z, ns/2, u.z, z);
446*6897da5cSDirk Helbig     mdct_post_fft(rot, u.z, y);
447*6897da5cSDirk Helbig 
448*6897da5cSDirk Helbig     if (ns != ns_dst)
449*6897da5cSDirk Helbig         rescale(y, ns_dst, sqrtf((float)ns_dst / ns));
4509a19cd78SMatthias Ringwald }
4519a19cd78SMatthias Ringwald 
4529a19cd78SMatthias Ringwald /**
4539a19cd78SMatthias Ringwald  * Inverse MDCT transformation
4549a19cd78SMatthias Ringwald  */
lc3_mdct_inverse(enum lc3_dt dt,enum lc3_srate sr,enum lc3_srate sr_src,const float * x,float * d,float * y)455*6897da5cSDirk Helbig void lc3_mdct_inverse(
456*6897da5cSDirk Helbig     enum lc3_dt dt, enum lc3_srate sr, enum lc3_srate sr_src,
457*6897da5cSDirk Helbig     const float *x, float *d, float *y)
4589a19cd78SMatthias Ringwald {
4599a19cd78SMatthias Ringwald     const struct lc3_mdct_rot_def *rot = lc3_mdct_rot[dt][sr];
460*6897da5cSDirk Helbig     int ns_src = lc3_ns(dt, sr_src);
461*6897da5cSDirk Helbig     int ns = lc3_ns(dt, sr);
4629a19cd78SMatthias Ringwald 
463*6897da5cSDirk Helbig     struct lc3_complex buffer[LC3_MAX_NS / 2];
4649a19cd78SMatthias Ringwald     struct lc3_complex *z = (struct lc3_complex *)y;
4659a19cd78SMatthias Ringwald     union { float *f; struct lc3_complex *z; } u = { .z = buffer };
4669a19cd78SMatthias Ringwald 
4679a19cd78SMatthias Ringwald     imdct_pre_fft(rot, x, z);
4684930cef6SMatthias Ringwald     z = fft(z, ns/2, z, u.z);
469*6897da5cSDirk Helbig     imdct_post_fft(rot, z, u.f);
470*6897da5cSDirk Helbig 
471*6897da5cSDirk Helbig     if (ns != ns_src)
472*6897da5cSDirk Helbig         rescale(u.f, ns, sqrtf((float)ns / ns_src));
4739a19cd78SMatthias Ringwald 
4749a19cd78SMatthias Ringwald     imdct_window(dt, sr, u.f, d, y);
4759a19cd78SMatthias Ringwald }
476